gcc/
[official-gcc.git] / gcc / config / arm / arm.c
blob9519e0196e238babdfd34f6ea46f058a1a751731
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 int, HOST_WIDE_INT);
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 rtx);
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 #endif
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int count_insns_for_constant (HOST_WIDE_INT, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree, tree);
136 static enum machine_mode arm_promote_function_mode (const_tree,
137 enum machine_mode, int *,
138 const_tree, int);
139 static bool arm_return_in_memory (const_tree, const_tree);
140 static rtx arm_function_value (const_tree, const_tree, bool);
141 static rtx arm_libcall_value (enum machine_mode, const_rtx);
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 tree);
146 static bool arm_have_conditional_execution (void);
147 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
148 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
149 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
150 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
151 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
154 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
155 static bool arm_rtx_costs (rtx, int, int, int *, bool);
156 static int arm_address_cost (rtx, bool);
157 static bool arm_memory_load_p (rtx);
158 static bool arm_cirrus_insn_p (rtx);
159 static void cirrus_reorg (rtx);
160 static void arm_init_builtins (void);
161 static void arm_init_iwmmxt_builtins (void);
162 static rtx safe_vector_operand (rtx, enum machine_mode);
163 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
164 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
165 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
166 static tree arm_builtin_decl (unsigned, bool);
167 static void emit_constant_insn (rtx cond, rtx pattern);
168 static rtx emit_set_insn (rtx, rtx);
169 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
170 tree, bool);
171 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
172 const_tree, bool);
173 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
174 const_tree, bool);
175 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
176 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
177 const_tree);
178 static int aapcs_select_return_coproc (const_tree, const_tree);
180 #ifdef OBJECT_FORMAT_ELF
181 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
182 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
183 #endif
184 #ifndef ARM_PE
185 static void arm_encode_section_info (tree, rtx, int);
186 #endif
188 static void arm_file_end (void);
189 static void arm_file_start (void);
191 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
192 tree, int *, int);
193 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
194 enum machine_mode, const_tree, bool);
195 static bool arm_promote_prototypes (const_tree);
196 static bool arm_default_short_enums (void);
197 static bool arm_align_anon_bitfield (void);
198 static bool arm_return_in_msb (const_tree);
199 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
200 static bool arm_return_in_memory (const_tree, const_tree);
201 #if ARM_UNWIND_INFO
202 static void arm_unwind_emit (FILE *, rtx);
203 static bool arm_output_ttype (rtx);
204 static void arm_asm_emit_except_personality (rtx);
205 static void arm_asm_init_sections (void);
206 #endif
207 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
208 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
209 static rtx arm_dwarf_register_span (rtx);
211 static tree arm_cxx_guard_type (void);
212 static bool arm_cxx_guard_mask_bit (void);
213 static tree arm_get_cookie_size (tree);
214 static bool arm_cookie_has_size (void);
215 static bool arm_cxx_cdtor_returns_this (void);
216 static bool arm_cxx_key_method_may_be_inline (void);
217 static void arm_cxx_determine_class_data_visibility (tree);
218 static bool arm_cxx_class_data_always_comdat (void);
219 static bool arm_cxx_use_aeabi_atexit (void);
220 static void arm_init_libfuncs (void);
221 static tree arm_build_builtin_va_list (void);
222 static void arm_expand_builtin_va_start (tree, rtx);
223 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
224 static void arm_option_override (void);
225 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
226 static bool arm_cannot_copy_insn_p (rtx);
227 static bool arm_tls_symbol_p (rtx x);
228 static int arm_issue_rate (void);
229 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
230 static bool arm_output_addr_const_extra (FILE *, rtx);
231 static bool arm_allocate_stack_slots_for_args (void);
232 static const char *arm_invalid_parameter_type (const_tree t);
233 static const char *arm_invalid_return_type (const_tree t);
234 static tree arm_promoted_type (const_tree t);
235 static tree arm_convert_to_type (tree type, tree expr);
236 static bool arm_scalar_mode_supported_p (enum machine_mode);
237 static bool arm_frame_pointer_required (void);
238 static bool arm_can_eliminate (const int, const int);
239 static void arm_asm_trampoline_template (FILE *);
240 static void arm_trampoline_init (rtx, tree, rtx);
241 static rtx arm_trampoline_adjust_address (rtx);
242 static rtx arm_pic_static_addr (rtx orig, rtx reg);
243 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
245 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
246 static bool arm_array_mode_supported_p (enum machine_mode,
247 unsigned HOST_WIDE_INT);
248 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
249 static bool arm_class_likely_spilled_p (reg_class_t);
250 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
251 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
252 const_tree type,
253 int misalignment,
254 bool is_packed);
255 static void arm_conditional_register_usage (void);
256 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
257 static unsigned int arm_autovectorize_vector_sizes (void);
258 static int arm_default_branch_cost (bool, bool);
261 /* Table of machine attributes. */
262 static const struct attribute_spec arm_attribute_table[] =
264 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
265 affects_type_identity } */
266 /* Function calls made to this symbol must be done indirectly, because
267 it may lie outside of the 26 bit addressing range of a normal function
268 call. */
269 { "long_call", 0, 0, false, true, true, NULL, false },
270 /* Whereas these functions are always known to reside within the 26 bit
271 addressing range. */
272 { "short_call", 0, 0, false, true, true, NULL, false },
273 /* Specify the procedure call conventions for a function. */
274 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
275 false },
276 /* Interrupt Service Routines have special prologue and epilogue requirements. */
277 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
278 false },
279 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
280 false },
281 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
282 false },
283 #ifdef ARM_PE
284 /* ARM/PE has three new attributes:
285 interfacearm - ?
286 dllexport - for exporting a function/variable that will live in a dll
287 dllimport - for importing a function/variable from a dll
289 Microsoft allows multiple declspecs in one __declspec, separating
290 them with spaces. We do NOT support this. Instead, use __declspec
291 multiple times.
293 { "dllimport", 0, 0, true, false, false, NULL, false },
294 { "dllexport", 0, 0, true, false, false, NULL, false },
295 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
296 false },
297 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
298 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
299 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
300 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
301 false },
302 #endif
303 { NULL, 0, 0, false, false, false, NULL, false }
306 /* Set default optimization options. */
307 static const struct default_options arm_option_optimization_table[] =
309 /* Enable section anchors by default at -O1 or higher. */
310 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
311 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
312 { OPT_LEVELS_NONE, 0, NULL, 0 }
315 /* Initialize the GCC target structure. */
316 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
317 #undef TARGET_MERGE_DECL_ATTRIBUTES
318 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
319 #endif
321 #undef TARGET_LEGITIMIZE_ADDRESS
322 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
324 #undef TARGET_ATTRIBUTE_TABLE
325 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
327 #undef TARGET_ASM_FILE_START
328 #define TARGET_ASM_FILE_START arm_file_start
329 #undef TARGET_ASM_FILE_END
330 #define TARGET_ASM_FILE_END arm_file_end
332 #undef TARGET_ASM_ALIGNED_SI_OP
333 #define TARGET_ASM_ALIGNED_SI_OP NULL
334 #undef TARGET_ASM_INTEGER
335 #define TARGET_ASM_INTEGER arm_assemble_integer
337 #undef TARGET_PRINT_OPERAND
338 #define TARGET_PRINT_OPERAND arm_print_operand
339 #undef TARGET_PRINT_OPERAND_ADDRESS
340 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
341 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
342 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
344 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
345 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
347 #undef TARGET_ASM_FUNCTION_PROLOGUE
348 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
350 #undef TARGET_ASM_FUNCTION_EPILOGUE
351 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
353 #undef TARGET_DEFAULT_TARGET_FLAGS
354 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
355 #undef TARGET_OPTION_OVERRIDE
356 #define TARGET_OPTION_OVERRIDE arm_option_override
357 #undef TARGET_OPTION_OPTIMIZATION_TABLE
358 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
369 #undef TARGET_ENCODE_SECTION_INFO
370 #ifdef ARM_PE
371 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
372 #else
373 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
374 #endif
376 #undef TARGET_STRIP_NAME_ENCODING
377 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
379 #undef TARGET_ASM_INTERNAL_LABEL
380 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
382 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
383 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
385 #undef TARGET_FUNCTION_VALUE
386 #define TARGET_FUNCTION_VALUE arm_function_value
388 #undef TARGET_LIBCALL_VALUE
389 #define TARGET_LIBCALL_VALUE arm_libcall_value
391 #undef TARGET_ASM_OUTPUT_MI_THUNK
392 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
393 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
394 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
396 #undef TARGET_RTX_COSTS
397 #define TARGET_RTX_COSTS arm_rtx_costs
398 #undef TARGET_ADDRESS_COST
399 #define TARGET_ADDRESS_COST arm_address_cost
401 #undef TARGET_SHIFT_TRUNCATION_MASK
402 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
403 #undef TARGET_VECTOR_MODE_SUPPORTED_P
404 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
405 #undef TARGET_ARRAY_MODE_SUPPORTED_P
406 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
407 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
408 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
409 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
410 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
411 arm_autovectorize_vector_sizes
413 #undef TARGET_MACHINE_DEPENDENT_REORG
414 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
416 #undef TARGET_INIT_BUILTINS
417 #define TARGET_INIT_BUILTINS arm_init_builtins
418 #undef TARGET_EXPAND_BUILTIN
419 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
420 #undef TARGET_BUILTIN_DECL
421 #define TARGET_BUILTIN_DECL arm_builtin_decl
423 #undef TARGET_INIT_LIBFUNCS
424 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
426 #undef TARGET_PROMOTE_FUNCTION_MODE
427 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
428 #undef TARGET_PROMOTE_PROTOTYPES
429 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
430 #undef TARGET_PASS_BY_REFERENCE
431 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
432 #undef TARGET_ARG_PARTIAL_BYTES
433 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
434 #undef TARGET_FUNCTION_ARG
435 #define TARGET_FUNCTION_ARG arm_function_arg
436 #undef TARGET_FUNCTION_ARG_ADVANCE
437 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
438 #undef TARGET_FUNCTION_ARG_BOUNDARY
439 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
441 #undef TARGET_SETUP_INCOMING_VARARGS
442 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
444 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
445 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
447 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
448 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
449 #undef TARGET_TRAMPOLINE_INIT
450 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
451 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
452 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
454 #undef TARGET_DEFAULT_SHORT_ENUMS
455 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
457 #undef TARGET_ALIGN_ANON_BITFIELD
458 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
460 #undef TARGET_NARROW_VOLATILE_BITFIELD
461 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
463 #undef TARGET_CXX_GUARD_TYPE
464 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
466 #undef TARGET_CXX_GUARD_MASK_BIT
467 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
469 #undef TARGET_CXX_GET_COOKIE_SIZE
470 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
472 #undef TARGET_CXX_COOKIE_HAS_SIZE
473 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
475 #undef TARGET_CXX_CDTOR_RETURNS_THIS
476 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
478 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
479 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
481 #undef TARGET_CXX_USE_AEABI_ATEXIT
482 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
484 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
485 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
486 arm_cxx_determine_class_data_visibility
488 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
489 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
491 #undef TARGET_RETURN_IN_MSB
492 #define TARGET_RETURN_IN_MSB arm_return_in_msb
494 #undef TARGET_RETURN_IN_MEMORY
495 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
497 #undef TARGET_MUST_PASS_IN_STACK
498 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
500 #if ARM_UNWIND_INFO
501 #undef TARGET_ASM_UNWIND_EMIT
502 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
504 /* EABI unwinding tables use a different format for the typeinfo tables. */
505 #undef TARGET_ASM_TTYPE
506 #define TARGET_ASM_TTYPE arm_output_ttype
508 #undef TARGET_ARM_EABI_UNWINDER
509 #define TARGET_ARM_EABI_UNWINDER true
511 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
512 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
514 #undef TARGET_ASM_INIT_SECTIONS
515 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
516 #endif /* ARM_UNWIND_INFO */
518 #undef TARGET_EXCEPT_UNWIND_INFO
519 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
521 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
522 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
524 #undef TARGET_DWARF_REGISTER_SPAN
525 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
527 #undef TARGET_CANNOT_COPY_INSN_P
528 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
530 #ifdef HAVE_AS_TLS
531 #undef TARGET_HAVE_TLS
532 #define TARGET_HAVE_TLS true
533 #endif
535 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
536 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
538 #undef TARGET_LEGITIMATE_CONSTANT_P
539 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
541 #undef TARGET_CANNOT_FORCE_CONST_MEM
542 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
544 #undef TARGET_MAX_ANCHOR_OFFSET
545 #define TARGET_MAX_ANCHOR_OFFSET 4095
547 /* The minimum is set such that the total size of the block
548 for a particular anchor is -4088 + 1 + 4095 bytes, which is
549 divisible by eight, ensuring natural spacing of anchors. */
550 #undef TARGET_MIN_ANCHOR_OFFSET
551 #define TARGET_MIN_ANCHOR_OFFSET -4088
553 #undef TARGET_SCHED_ISSUE_RATE
554 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
556 #undef TARGET_MANGLE_TYPE
557 #define TARGET_MANGLE_TYPE arm_mangle_type
559 #undef TARGET_BUILD_BUILTIN_VA_LIST
560 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
561 #undef TARGET_EXPAND_BUILTIN_VA_START
562 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
563 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
564 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
566 #ifdef HAVE_AS_TLS
567 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
568 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
569 #endif
571 #undef TARGET_LEGITIMATE_ADDRESS_P
572 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
574 #undef TARGET_INVALID_PARAMETER_TYPE
575 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
577 #undef TARGET_INVALID_RETURN_TYPE
578 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
580 #undef TARGET_PROMOTED_TYPE
581 #define TARGET_PROMOTED_TYPE arm_promoted_type
583 #undef TARGET_CONVERT_TO_TYPE
584 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
586 #undef TARGET_SCALAR_MODE_SUPPORTED_P
587 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
589 #undef TARGET_FRAME_POINTER_REQUIRED
590 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
592 #undef TARGET_CAN_ELIMINATE
593 #define TARGET_CAN_ELIMINATE arm_can_eliminate
595 #undef TARGET_CONDITIONAL_REGISTER_USAGE
596 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
598 #undef TARGET_CLASS_LIKELY_SPILLED_P
599 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
601 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
602 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
603 arm_vector_alignment_reachable
605 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
606 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
607 arm_builtin_support_vector_misalignment
609 #undef TARGET_PREFERRED_RENAME_CLASS
610 #define TARGET_PREFERRED_RENAME_CLASS \
611 arm_preferred_rename_class
613 struct gcc_target targetm = TARGET_INITIALIZER;
615 /* Obstack for minipool constant handling. */
616 static struct obstack minipool_obstack;
617 static char * minipool_startobj;
619 /* The maximum number of insns skipped which
620 will be conditionalised if possible. */
621 static int max_insns_skipped = 5;
623 extern FILE * asm_out_file;
625 /* True if we are currently building a constant table. */
626 int making_const_table;
628 /* The processor for which instructions should be scheduled. */
629 enum processor_type arm_tune = arm_none;
631 /* The current tuning set. */
632 const struct tune_params *current_tune;
634 /* Which floating point hardware to schedule for. */
635 int arm_fpu_attr;
637 /* Which floating popint hardware to use. */
638 const struct arm_fpu_desc *arm_fpu_desc;
640 /* Used for Thumb call_via trampolines. */
641 rtx thumb_call_via_label[14];
642 static int thumb_call_reg_needed;
644 /* Bit values used to identify processor capabilities. */
645 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
646 #define FL_ARCH3M (1 << 1) /* Extended multiply */
647 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
648 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
649 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
650 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
651 #define FL_THUMB (1 << 6) /* Thumb aware */
652 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
653 #define FL_STRONG (1 << 8) /* StrongARM */
654 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
655 #define FL_XSCALE (1 << 10) /* XScale */
656 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
657 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
658 media instructions. */
659 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
660 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
661 Note: ARM6 & 7 derivatives only. */
662 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
663 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
664 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
665 profile. */
666 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
667 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
668 #define FL_NEON (1 << 20) /* Neon instructions. */
669 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
670 architecture. */
671 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
672 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
674 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
676 /* Flags that only effect tuning, not available instructions. */
677 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
678 | FL_CO_PROC)
680 #define FL_FOR_ARCH2 FL_NOTM
681 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
682 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
683 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
684 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
685 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
686 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
687 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
688 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
689 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
690 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
691 #define FL_FOR_ARCH6J FL_FOR_ARCH6
692 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
693 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
694 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
695 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
696 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
697 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
698 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
699 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
700 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
701 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
703 /* The bits in this mask specify which
704 instructions we are allowed to generate. */
705 static unsigned long insn_flags = 0;
707 /* The bits in this mask specify which instruction scheduling options should
708 be used. */
709 static unsigned long tune_flags = 0;
711 /* The following are used in the arm.md file as equivalents to bits
712 in the above two flag variables. */
714 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
715 int arm_arch3m = 0;
717 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
718 int arm_arch4 = 0;
720 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
721 int arm_arch4t = 0;
723 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
724 int arm_arch5 = 0;
726 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
727 int arm_arch5e = 0;
729 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
730 int arm_arch6 = 0;
732 /* Nonzero if this chip supports the ARM 6K extensions. */
733 int arm_arch6k = 0;
735 /* Nonzero if this chip supports the ARM 7 extensions. */
736 int arm_arch7 = 0;
738 /* Nonzero if instructions not present in the 'M' profile can be used. */
739 int arm_arch_notm = 0;
741 /* Nonzero if instructions present in ARMv7E-M can be used. */
742 int arm_arch7em = 0;
744 /* Nonzero if this chip can benefit from load scheduling. */
745 int arm_ld_sched = 0;
747 /* Nonzero if this chip is a StrongARM. */
748 int arm_tune_strongarm = 0;
750 /* Nonzero if this chip is a Cirrus variant. */
751 int arm_arch_cirrus = 0;
753 /* Nonzero if this chip supports Intel Wireless MMX technology. */
754 int arm_arch_iwmmxt = 0;
756 /* Nonzero if this chip is an XScale. */
757 int arm_arch_xscale = 0;
759 /* Nonzero if tuning for XScale */
760 int arm_tune_xscale = 0;
762 /* Nonzero if we want to tune for stores that access the write-buffer.
763 This typically means an ARM6 or ARM7 with MMU or MPU. */
764 int arm_tune_wbuf = 0;
766 /* Nonzero if tuning for Cortex-A9. */
767 int arm_tune_cortex_a9 = 0;
769 /* Nonzero if generating Thumb instructions. */
770 int thumb_code = 0;
772 /* Nonzero if generating Thumb-1 instructions. */
773 int thumb1_code = 0;
775 /* Nonzero if we should define __THUMB_INTERWORK__ in the
776 preprocessor.
777 XXX This is a bit of a hack, it's intended to help work around
778 problems in GLD which doesn't understand that armv5t code is
779 interworking clean. */
780 int arm_cpp_interwork = 0;
782 /* Nonzero if chip supports Thumb 2. */
783 int arm_arch_thumb2;
785 /* Nonzero if chip supports integer division instruction. */
786 int arm_arch_arm_hwdiv;
787 int arm_arch_thumb_hwdiv;
789 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
790 we must report the mode of the memory reference from
791 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
792 enum machine_mode output_memory_reference_mode;
794 /* The register number to be used for the PIC offset register. */
795 unsigned arm_pic_register = INVALID_REGNUM;
797 /* Set to 1 after arm_reorg has started. Reset to start at the start of
798 the next function. */
799 static int after_arm_reorg = 0;
801 enum arm_pcs arm_pcs_default;
803 /* For an explanation of these variables, see final_prescan_insn below. */
804 int arm_ccfsm_state;
805 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
806 enum arm_cond_code arm_current_cc;
808 rtx arm_target_insn;
809 int arm_target_label;
810 /* The number of conditionally executed insns, including the current insn. */
811 int arm_condexec_count = 0;
812 /* A bitmask specifying the patterns for the IT block.
813 Zero means do not output an IT block before this insn. */
814 int arm_condexec_mask = 0;
815 /* The number of bits used in arm_condexec_mask. */
816 int arm_condexec_masklen = 0;
818 /* The condition codes of the ARM, and the inverse function. */
819 static const char * const arm_condition_codes[] =
821 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
822 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
825 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
826 int arm_regs_in_sequence[] =
828 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
831 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
832 #define streq(string1, string2) (strcmp (string1, string2) == 0)
834 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
835 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
836 | (1 << PIC_OFFSET_TABLE_REGNUM)))
838 /* Initialization code. */
840 struct processors
842 const char *const name;
843 enum processor_type core;
844 const char *arch;
845 const unsigned long flags;
846 const struct tune_params *const tune;
850 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
851 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
852 prefetch_slots, \
853 l1_size, \
854 l1_line_size
856 const struct tune_params arm_slowmul_tune =
858 arm_slowmul_rtx_costs,
859 NULL,
860 3, /* Constant limit. */
861 ARM_PREFETCH_NOT_BENEFICIAL,
862 true, /* Prefer constant pool. */
863 arm_default_branch_cost
866 const struct tune_params arm_fastmul_tune =
868 arm_fastmul_rtx_costs,
869 NULL,
870 1, /* Constant limit. */
871 ARM_PREFETCH_NOT_BENEFICIAL,
872 true, /* Prefer constant pool. */
873 arm_default_branch_cost
876 const struct tune_params arm_xscale_tune =
878 arm_xscale_rtx_costs,
879 xscale_sched_adjust_cost,
880 2, /* Constant limit. */
881 ARM_PREFETCH_NOT_BENEFICIAL,
882 true, /* Prefer constant pool. */
883 arm_default_branch_cost
886 const struct tune_params arm_9e_tune =
888 arm_9e_rtx_costs,
889 NULL,
890 1, /* Constant limit. */
891 ARM_PREFETCH_NOT_BENEFICIAL,
892 true, /* Prefer constant pool. */
893 arm_default_branch_cost
896 const struct tune_params arm_v6t2_tune =
898 arm_9e_rtx_costs,
899 NULL,
900 1, /* Constant limit. */
901 ARM_PREFETCH_NOT_BENEFICIAL,
902 false, /* Prefer constant pool. */
903 arm_default_branch_cost
906 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
907 const struct tune_params arm_cortex_tune =
909 arm_9e_rtx_costs,
910 NULL,
911 1, /* Constant limit. */
912 ARM_PREFETCH_NOT_BENEFICIAL,
913 false, /* Prefer constant pool. */
914 arm_default_branch_cost
917 const struct tune_params arm_cortex_a9_tune =
919 arm_9e_rtx_costs,
920 cortex_a9_sched_adjust_cost,
921 1, /* Constant limit. */
922 ARM_PREFETCH_BENEFICIAL(4,32,32),
923 false, /* Prefer constant pool. */
924 arm_default_branch_cost
927 const struct tune_params arm_fa726te_tune =
929 arm_9e_rtx_costs,
930 fa726te_sched_adjust_cost,
931 1, /* Constant limit. */
932 ARM_PREFETCH_NOT_BENEFICIAL,
933 true, /* Prefer constant pool. */
934 arm_default_branch_cost
938 /* Not all of these give usefully different compilation alternatives,
939 but there is no simple way of generalizing them. */
940 static const struct processors all_cores[] =
942 /* ARM Cores */
943 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
944 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
945 #include "arm-cores.def"
946 #undef ARM_CORE
947 {NULL, arm_none, NULL, 0, NULL}
950 static const struct processors all_architectures[] =
952 /* ARM Architectures */
953 /* We don't specify tuning costs here as it will be figured out
954 from the core. */
956 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
957 {NAME, CORE, #ARCH, FLAGS, NULL},
958 #include "arm-arches.def"
959 #undef ARM_ARCH
960 {NULL, arm_none, NULL, 0 , NULL}
964 /* These are populated as commandline arguments are processed, or NULL
965 if not specified. */
966 static const struct processors *arm_selected_arch;
967 static const struct processors *arm_selected_cpu;
968 static const struct processors *arm_selected_tune;
970 /* The name of the preprocessor macro to define for this architecture. */
972 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
974 /* Available values for -mfpu=. */
976 static const struct arm_fpu_desc all_fpus[] =
978 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
979 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
980 #include "arm-fpus.def"
981 #undef ARM_FPU
985 /* Supported TLS relocations. */
987 enum tls_reloc {
988 TLS_GD32,
989 TLS_LDM32,
990 TLS_LDO32,
991 TLS_IE32,
992 TLS_LE32
995 /* The maximum number of insns to be used when loading a constant. */
996 inline static int
997 arm_constant_limit (bool size_p)
999 return size_p ? 1 : current_tune->constant_limit;
1002 /* Emit an insn that's a simple single-set. Both the operands must be known
1003 to be valid. */
1004 inline static rtx
1005 emit_set_insn (rtx x, rtx y)
1007 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1010 /* Return the number of bits set in VALUE. */
1011 static unsigned
1012 bit_count (unsigned long value)
1014 unsigned long count = 0;
1016 while (value)
1018 count++;
1019 value &= value - 1; /* Clear the least-significant set bit. */
1022 return count;
1025 /* Set up library functions unique to ARM. */
1027 static void
1028 arm_init_libfuncs (void)
1030 /* There are no special library functions unless we are using the
1031 ARM BPABI. */
1032 if (!TARGET_BPABI)
1033 return;
1035 /* The functions below are described in Section 4 of the "Run-Time
1036 ABI for the ARM architecture", Version 1.0. */
1038 /* Double-precision floating-point arithmetic. Table 2. */
1039 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1040 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1041 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1042 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1043 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1045 /* Double-precision comparisons. Table 3. */
1046 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1047 set_optab_libfunc (ne_optab, DFmode, NULL);
1048 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1049 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1050 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1051 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1052 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1054 /* Single-precision floating-point arithmetic. Table 4. */
1055 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1056 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1057 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1058 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1059 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1061 /* Single-precision comparisons. Table 5. */
1062 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1063 set_optab_libfunc (ne_optab, SFmode, NULL);
1064 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1065 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1066 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1067 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1068 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1070 /* Floating-point to integer conversions. Table 6. */
1071 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1072 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1073 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1074 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1075 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1076 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1077 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1078 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1080 /* Conversions between floating types. Table 7. */
1081 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1082 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1084 /* Integer to floating-point conversions. Table 8. */
1085 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1086 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1087 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1088 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1089 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1090 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1091 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1092 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1094 /* Long long. Table 9. */
1095 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1096 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1097 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1098 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1099 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1100 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1101 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1102 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1104 /* Integer (32/32->32) division. \S 4.3.1. */
1105 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1106 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1108 /* The divmod functions are designed so that they can be used for
1109 plain division, even though they return both the quotient and the
1110 remainder. The quotient is returned in the usual location (i.e.,
1111 r0 for SImode, {r0, r1} for DImode), just as would be expected
1112 for an ordinary division routine. Because the AAPCS calling
1113 conventions specify that all of { r0, r1, r2, r3 } are
1114 callee-saved registers, there is no need to tell the compiler
1115 explicitly that those registers are clobbered by these
1116 routines. */
1117 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1118 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1120 /* For SImode division the ABI provides div-without-mod routines,
1121 which are faster. */
1122 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1123 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1125 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1126 divmod libcalls instead. */
1127 set_optab_libfunc (smod_optab, DImode, NULL);
1128 set_optab_libfunc (umod_optab, DImode, NULL);
1129 set_optab_libfunc (smod_optab, SImode, NULL);
1130 set_optab_libfunc (umod_optab, SImode, NULL);
1132 /* Half-precision float operations. The compiler handles all operations
1133 with NULL libfuncs by converting the SFmode. */
1134 switch (arm_fp16_format)
1136 case ARM_FP16_FORMAT_IEEE:
1137 case ARM_FP16_FORMAT_ALTERNATIVE:
1139 /* Conversions. */
1140 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1141 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1142 ? "__gnu_f2h_ieee"
1143 : "__gnu_f2h_alternative"));
1144 set_conv_libfunc (sext_optab, SFmode, HFmode,
1145 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1146 ? "__gnu_h2f_ieee"
1147 : "__gnu_h2f_alternative"));
1149 /* Arithmetic. */
1150 set_optab_libfunc (add_optab, HFmode, NULL);
1151 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1152 set_optab_libfunc (smul_optab, HFmode, NULL);
1153 set_optab_libfunc (neg_optab, HFmode, NULL);
1154 set_optab_libfunc (sub_optab, HFmode, NULL);
1156 /* Comparisons. */
1157 set_optab_libfunc (eq_optab, HFmode, NULL);
1158 set_optab_libfunc (ne_optab, HFmode, NULL);
1159 set_optab_libfunc (lt_optab, HFmode, NULL);
1160 set_optab_libfunc (le_optab, HFmode, NULL);
1161 set_optab_libfunc (ge_optab, HFmode, NULL);
1162 set_optab_libfunc (gt_optab, HFmode, NULL);
1163 set_optab_libfunc (unord_optab, HFmode, NULL);
1164 break;
1166 default:
1167 break;
1170 if (TARGET_AAPCS_BASED)
1171 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1174 /* On AAPCS systems, this is the "struct __va_list". */
1175 static GTY(()) tree va_list_type;
1177 /* Return the type to use as __builtin_va_list. */
1178 static tree
1179 arm_build_builtin_va_list (void)
1181 tree va_list_name;
1182 tree ap_field;
1184 if (!TARGET_AAPCS_BASED)
1185 return std_build_builtin_va_list ();
1187 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1188 defined as:
1190 struct __va_list
1192 void *__ap;
1195 The C Library ABI further reinforces this definition in \S
1196 4.1.
1198 We must follow this definition exactly. The structure tag
1199 name is visible in C++ mangled names, and thus forms a part
1200 of the ABI. The field name may be used by people who
1201 #include <stdarg.h>. */
1202 /* Create the type. */
1203 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1204 /* Give it the required name. */
1205 va_list_name = build_decl (BUILTINS_LOCATION,
1206 TYPE_DECL,
1207 get_identifier ("__va_list"),
1208 va_list_type);
1209 DECL_ARTIFICIAL (va_list_name) = 1;
1210 TYPE_NAME (va_list_type) = va_list_name;
1211 TYPE_STUB_DECL (va_list_type) = va_list_name;
1212 /* Create the __ap field. */
1213 ap_field = build_decl (BUILTINS_LOCATION,
1214 FIELD_DECL,
1215 get_identifier ("__ap"),
1216 ptr_type_node);
1217 DECL_ARTIFICIAL (ap_field) = 1;
1218 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1219 TYPE_FIELDS (va_list_type) = ap_field;
1220 /* Compute its layout. */
1221 layout_type (va_list_type);
1223 return va_list_type;
1226 /* Return an expression of type "void *" pointing to the next
1227 available argument in a variable-argument list. VALIST is the
1228 user-level va_list object, of type __builtin_va_list. */
1229 static tree
1230 arm_extract_valist_ptr (tree valist)
1232 if (TREE_TYPE (valist) == error_mark_node)
1233 return error_mark_node;
1235 /* On an AAPCS target, the pointer is stored within "struct
1236 va_list". */
1237 if (TARGET_AAPCS_BASED)
1239 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1240 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1241 valist, ap_field, NULL_TREE);
1244 return valist;
1247 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1248 static void
1249 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1251 valist = arm_extract_valist_ptr (valist);
1252 std_expand_builtin_va_start (valist, nextarg);
1255 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1256 static tree
1257 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1258 gimple_seq *post_p)
1260 valist = arm_extract_valist_ptr (valist);
1261 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1264 /* Fix up any incompatible options that the user has specified. */
1265 static void
1266 arm_option_override (void)
1268 if (global_options_set.x_arm_arch_option)
1269 arm_selected_arch = &all_architectures[arm_arch_option];
1271 if (global_options_set.x_arm_cpu_option)
1272 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1274 if (global_options_set.x_arm_tune_option)
1275 arm_selected_tune = &all_cores[(int) arm_tune_option];
1277 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1278 SUBTARGET_OVERRIDE_OPTIONS;
1279 #endif
1281 if (arm_selected_arch)
1283 if (arm_selected_cpu)
1285 /* Check for conflict between mcpu and march. */
1286 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1288 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1289 arm_selected_cpu->name, arm_selected_arch->name);
1290 /* -march wins for code generation.
1291 -mcpu wins for default tuning. */
1292 if (!arm_selected_tune)
1293 arm_selected_tune = arm_selected_cpu;
1295 arm_selected_cpu = arm_selected_arch;
1297 else
1298 /* -mcpu wins. */
1299 arm_selected_arch = NULL;
1301 else
1302 /* Pick a CPU based on the architecture. */
1303 arm_selected_cpu = arm_selected_arch;
1306 /* If the user did not specify a processor, choose one for them. */
1307 if (!arm_selected_cpu)
1309 const struct processors * sel;
1310 unsigned int sought;
1312 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1313 if (!arm_selected_cpu->name)
1315 #ifdef SUBTARGET_CPU_DEFAULT
1316 /* Use the subtarget default CPU if none was specified by
1317 configure. */
1318 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1319 #endif
1320 /* Default to ARM6. */
1321 if (!arm_selected_cpu->name)
1322 arm_selected_cpu = &all_cores[arm6];
1325 sel = arm_selected_cpu;
1326 insn_flags = sel->flags;
1328 /* Now check to see if the user has specified some command line
1329 switch that require certain abilities from the cpu. */
1330 sought = 0;
1332 if (TARGET_INTERWORK || TARGET_THUMB)
1334 sought |= (FL_THUMB | FL_MODE32);
1336 /* There are no ARM processors that support both APCS-26 and
1337 interworking. Therefore we force FL_MODE26 to be removed
1338 from insn_flags here (if it was set), so that the search
1339 below will always be able to find a compatible processor. */
1340 insn_flags &= ~FL_MODE26;
1343 if (sought != 0 && ((sought & insn_flags) != sought))
1345 /* Try to locate a CPU type that supports all of the abilities
1346 of the default CPU, plus the extra abilities requested by
1347 the user. */
1348 for (sel = all_cores; sel->name != NULL; sel++)
1349 if ((sel->flags & sought) == (sought | insn_flags))
1350 break;
1352 if (sel->name == NULL)
1354 unsigned current_bit_count = 0;
1355 const struct processors * best_fit = NULL;
1357 /* Ideally we would like to issue an error message here
1358 saying that it was not possible to find a CPU compatible
1359 with the default CPU, but which also supports the command
1360 line options specified by the programmer, and so they
1361 ought to use the -mcpu=<name> command line option to
1362 override the default CPU type.
1364 If we cannot find a cpu that has both the
1365 characteristics of the default cpu and the given
1366 command line options we scan the array again looking
1367 for a best match. */
1368 for (sel = all_cores; sel->name != NULL; sel++)
1369 if ((sel->flags & sought) == sought)
1371 unsigned count;
1373 count = bit_count (sel->flags & insn_flags);
1375 if (count >= current_bit_count)
1377 best_fit = sel;
1378 current_bit_count = count;
1382 gcc_assert (best_fit);
1383 sel = best_fit;
1386 arm_selected_cpu = sel;
1390 gcc_assert (arm_selected_cpu);
1391 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1392 if (!arm_selected_tune)
1393 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1395 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1396 insn_flags = arm_selected_cpu->flags;
1398 arm_tune = arm_selected_tune->core;
1399 tune_flags = arm_selected_tune->flags;
1400 current_tune = arm_selected_tune->tune;
1402 /* Make sure that the processor choice does not conflict with any of the
1403 other command line choices. */
1404 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1405 error ("target CPU does not support ARM mode");
1407 /* BPABI targets use linker tricks to allow interworking on cores
1408 without thumb support. */
1409 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1411 warning (0, "target CPU does not support interworking" );
1412 target_flags &= ~MASK_INTERWORK;
1415 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1417 warning (0, "target CPU does not support THUMB instructions");
1418 target_flags &= ~MASK_THUMB;
1421 if (TARGET_APCS_FRAME && TARGET_THUMB)
1423 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1424 target_flags &= ~MASK_APCS_FRAME;
1427 /* Callee super interworking implies thumb interworking. Adding
1428 this to the flags here simplifies the logic elsewhere. */
1429 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1430 target_flags |= MASK_INTERWORK;
1432 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1433 from here where no function is being compiled currently. */
1434 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1435 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1437 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1438 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1440 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1442 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1443 target_flags |= MASK_APCS_FRAME;
1446 if (TARGET_POKE_FUNCTION_NAME)
1447 target_flags |= MASK_APCS_FRAME;
1449 if (TARGET_APCS_REENT && flag_pic)
1450 error ("-fpic and -mapcs-reent are incompatible");
1452 if (TARGET_APCS_REENT)
1453 warning (0, "APCS reentrant code not supported. Ignored");
1455 /* If this target is normally configured to use APCS frames, warn if they
1456 are turned off and debugging is turned on. */
1457 if (TARGET_ARM
1458 && write_symbols != NO_DEBUG
1459 && !TARGET_APCS_FRAME
1460 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1461 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1463 if (TARGET_APCS_FLOAT)
1464 warning (0, "passing floating point arguments in fp regs not yet supported");
1466 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1467 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1468 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1469 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1470 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1471 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1472 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1473 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1474 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1475 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1476 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1477 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1478 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1479 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1481 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1482 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1483 thumb_code = TARGET_ARM == 0;
1484 thumb1_code = TARGET_THUMB1 != 0;
1485 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1486 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1487 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1488 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1489 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1490 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1492 /* If we are not using the default (ARM mode) section anchor offset
1493 ranges, then set the correct ranges now. */
1494 if (TARGET_THUMB1)
1496 /* Thumb-1 LDR instructions cannot have negative offsets.
1497 Permissible positive offset ranges are 5-bit (for byte loads),
1498 6-bit (for halfword loads), or 7-bit (for word loads).
1499 Empirical results suggest a 7-bit anchor range gives the best
1500 overall code size. */
1501 targetm.min_anchor_offset = 0;
1502 targetm.max_anchor_offset = 127;
1504 else if (TARGET_THUMB2)
1506 /* The minimum is set such that the total size of the block
1507 for a particular anchor is 248 + 1 + 4095 bytes, which is
1508 divisible by eight, ensuring natural spacing of anchors. */
1509 targetm.min_anchor_offset = -248;
1510 targetm.max_anchor_offset = 4095;
1513 /* V5 code we generate is completely interworking capable, so we turn off
1514 TARGET_INTERWORK here to avoid many tests later on. */
1516 /* XXX However, we must pass the right pre-processor defines to CPP
1517 or GLD can get confused. This is a hack. */
1518 if (TARGET_INTERWORK)
1519 arm_cpp_interwork = 1;
1521 if (arm_arch5)
1522 target_flags &= ~MASK_INTERWORK;
1524 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1525 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1527 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1528 error ("iwmmxt abi requires an iwmmxt capable cpu");
1530 if (!global_options_set.x_arm_fpu_index)
1532 const char *target_fpu_name;
1533 bool ok;
1535 #ifdef FPUTYPE_DEFAULT
1536 target_fpu_name = FPUTYPE_DEFAULT;
1537 #else
1538 if (arm_arch_cirrus)
1539 target_fpu_name = "maverick";
1540 else
1541 target_fpu_name = "fpe2";
1542 #endif
1544 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1545 CL_TARGET);
1546 gcc_assert (ok);
1549 arm_fpu_desc = &all_fpus[arm_fpu_index];
1551 switch (arm_fpu_desc->model)
1553 case ARM_FP_MODEL_FPA:
1554 if (arm_fpu_desc->rev == 2)
1555 arm_fpu_attr = FPU_FPE2;
1556 else if (arm_fpu_desc->rev == 3)
1557 arm_fpu_attr = FPU_FPE3;
1558 else
1559 arm_fpu_attr = FPU_FPA;
1560 break;
1562 case ARM_FP_MODEL_MAVERICK:
1563 arm_fpu_attr = FPU_MAVERICK;
1564 break;
1566 case ARM_FP_MODEL_VFP:
1567 arm_fpu_attr = FPU_VFP;
1568 break;
1570 default:
1571 gcc_unreachable();
1574 if (TARGET_AAPCS_BASED
1575 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1576 error ("FPA is unsupported in the AAPCS");
1578 if (TARGET_AAPCS_BASED)
1580 if (TARGET_CALLER_INTERWORKING)
1581 error ("AAPCS does not support -mcaller-super-interworking");
1582 else
1583 if (TARGET_CALLEE_INTERWORKING)
1584 error ("AAPCS does not support -mcallee-super-interworking");
1587 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1588 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1589 will ever exist. GCC makes no attempt to support this combination. */
1590 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1591 sorry ("iWMMXt and hardware floating point");
1593 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1594 if (TARGET_THUMB2 && TARGET_IWMMXT)
1595 sorry ("Thumb-2 iWMMXt");
1597 /* __fp16 support currently assumes the core has ldrh. */
1598 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1599 sorry ("__fp16 and no ldrh");
1601 /* If soft-float is specified then don't use FPU. */
1602 if (TARGET_SOFT_FLOAT)
1603 arm_fpu_attr = FPU_NONE;
1605 if (TARGET_AAPCS_BASED)
1607 if (arm_abi == ARM_ABI_IWMMXT)
1608 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1609 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1610 && TARGET_HARD_FLOAT
1611 && TARGET_VFP)
1612 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1613 else
1614 arm_pcs_default = ARM_PCS_AAPCS;
1616 else
1618 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1619 sorry ("-mfloat-abi=hard and VFP");
1621 if (arm_abi == ARM_ABI_APCS)
1622 arm_pcs_default = ARM_PCS_APCS;
1623 else
1624 arm_pcs_default = ARM_PCS_ATPCS;
1627 /* For arm2/3 there is no need to do any scheduling if there is only
1628 a floating point emulator, or we are doing software floating-point. */
1629 if ((TARGET_SOFT_FLOAT
1630 || (TARGET_FPA && arm_fpu_desc->rev))
1631 && (tune_flags & FL_MODE32) == 0)
1632 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1634 /* Use the cp15 method if it is available. */
1635 if (target_thread_pointer == TP_AUTO)
1637 if (arm_arch6k && !TARGET_THUMB1)
1638 target_thread_pointer = TP_CP15;
1639 else
1640 target_thread_pointer = TP_SOFT;
1643 if (TARGET_HARD_TP && TARGET_THUMB1)
1644 error ("can not use -mtp=cp15 with 16-bit Thumb");
1646 /* Override the default structure alignment for AAPCS ABI. */
1647 if (!global_options_set.x_arm_structure_size_boundary)
1649 if (TARGET_AAPCS_BASED)
1650 arm_structure_size_boundary = 8;
1652 else
1654 if (arm_structure_size_boundary != 8
1655 && arm_structure_size_boundary != 32
1656 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1658 if (ARM_DOUBLEWORD_ALIGN)
1659 warning (0,
1660 "structure size boundary can only be set to 8, 32 or 64");
1661 else
1662 warning (0, "structure size boundary can only be set to 8 or 32");
1663 arm_structure_size_boundary
1664 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1668 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1670 error ("RTP PIC is incompatible with Thumb");
1671 flag_pic = 0;
1674 /* If stack checking is disabled, we can use r10 as the PIC register,
1675 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1676 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1678 if (TARGET_VXWORKS_RTP)
1679 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1680 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1683 if (flag_pic && TARGET_VXWORKS_RTP)
1684 arm_pic_register = 9;
1686 if (arm_pic_register_string != NULL)
1688 int pic_register = decode_reg_name (arm_pic_register_string);
1690 if (!flag_pic)
1691 warning (0, "-mpic-register= is useless without -fpic");
1693 /* Prevent the user from choosing an obviously stupid PIC register. */
1694 else if (pic_register < 0 || call_used_regs[pic_register]
1695 || pic_register == HARD_FRAME_POINTER_REGNUM
1696 || pic_register == STACK_POINTER_REGNUM
1697 || pic_register >= PC_REGNUM
1698 || (TARGET_VXWORKS_RTP
1699 && (unsigned int) pic_register != arm_pic_register))
1700 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1701 else
1702 arm_pic_register = pic_register;
1705 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1706 if (fix_cm3_ldrd == 2)
1708 if (arm_selected_cpu->core == cortexm3)
1709 fix_cm3_ldrd = 1;
1710 else
1711 fix_cm3_ldrd = 0;
1714 if (TARGET_THUMB1 && flag_schedule_insns)
1716 /* Don't warn since it's on by default in -O2. */
1717 flag_schedule_insns = 0;
1720 if (optimize_size)
1722 /* If optimizing for size, bump the number of instructions that we
1723 are prepared to conditionally execute (even on a StrongARM). */
1724 max_insns_skipped = 6;
1726 else
1728 /* StrongARM has early execution of branches, so a sequence
1729 that is worth skipping is shorter. */
1730 if (arm_tune_strongarm)
1731 max_insns_skipped = 3;
1734 /* Hot/Cold partitioning is not currently supported, since we can't
1735 handle literal pool placement in that case. */
1736 if (flag_reorder_blocks_and_partition)
1738 inform (input_location,
1739 "-freorder-blocks-and-partition not supported on this architecture");
1740 flag_reorder_blocks_and_partition = 0;
1741 flag_reorder_blocks = 1;
1744 if (flag_pic)
1745 /* Hoisting PIC address calculations more aggressively provides a small,
1746 but measurable, size reduction for PIC code. Therefore, we decrease
1747 the bar for unrestricted expression hoisting to the cost of PIC address
1748 calculation, which is 2 instructions. */
1749 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1750 global_options.x_param_values,
1751 global_options_set.x_param_values);
1753 /* ARM EABI defaults to strict volatile bitfields. */
1754 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1755 flag_strict_volatile_bitfields = 1;
1757 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1758 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1759 if (flag_prefetch_loop_arrays < 0
1760 && HAVE_prefetch
1761 && optimize >= 3
1762 && current_tune->num_prefetch_slots > 0)
1763 flag_prefetch_loop_arrays = 1;
1765 /* Set up parameters to be used in prefetching algorithm. Do not override the
1766 defaults unless we are tuning for a core we have researched values for. */
1767 if (current_tune->num_prefetch_slots > 0)
1768 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1769 current_tune->num_prefetch_slots,
1770 global_options.x_param_values,
1771 global_options_set.x_param_values);
1772 if (current_tune->l1_cache_line_size >= 0)
1773 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1774 current_tune->l1_cache_line_size,
1775 global_options.x_param_values,
1776 global_options_set.x_param_values);
1777 if (current_tune->l1_cache_size >= 0)
1778 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1779 current_tune->l1_cache_size,
1780 global_options.x_param_values,
1781 global_options_set.x_param_values);
1783 /* Register global variables with the garbage collector. */
1784 arm_add_gc_roots ();
1787 static void
1788 arm_add_gc_roots (void)
1790 gcc_obstack_init(&minipool_obstack);
1791 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1794 /* A table of known ARM exception types.
1795 For use with the interrupt function attribute. */
1797 typedef struct
1799 const char *const arg;
1800 const unsigned long return_value;
1802 isr_attribute_arg;
1804 static const isr_attribute_arg isr_attribute_args [] =
1806 { "IRQ", ARM_FT_ISR },
1807 { "irq", ARM_FT_ISR },
1808 { "FIQ", ARM_FT_FIQ },
1809 { "fiq", ARM_FT_FIQ },
1810 { "ABORT", ARM_FT_ISR },
1811 { "abort", ARM_FT_ISR },
1812 { "ABORT", ARM_FT_ISR },
1813 { "abort", ARM_FT_ISR },
1814 { "UNDEF", ARM_FT_EXCEPTION },
1815 { "undef", ARM_FT_EXCEPTION },
1816 { "SWI", ARM_FT_EXCEPTION },
1817 { "swi", ARM_FT_EXCEPTION },
1818 { NULL, ARM_FT_NORMAL }
1821 /* Returns the (interrupt) function type of the current
1822 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1824 static unsigned long
1825 arm_isr_value (tree argument)
1827 const isr_attribute_arg * ptr;
1828 const char * arg;
1830 if (!arm_arch_notm)
1831 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1833 /* No argument - default to IRQ. */
1834 if (argument == NULL_TREE)
1835 return ARM_FT_ISR;
1837 /* Get the value of the argument. */
1838 if (TREE_VALUE (argument) == NULL_TREE
1839 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1840 return ARM_FT_UNKNOWN;
1842 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1844 /* Check it against the list of known arguments. */
1845 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1846 if (streq (arg, ptr->arg))
1847 return ptr->return_value;
1849 /* An unrecognized interrupt type. */
1850 return ARM_FT_UNKNOWN;
1853 /* Computes the type of the current function. */
1855 static unsigned long
1856 arm_compute_func_type (void)
1858 unsigned long type = ARM_FT_UNKNOWN;
1859 tree a;
1860 tree attr;
1862 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1864 /* Decide if the current function is volatile. Such functions
1865 never return, and many memory cycles can be saved by not storing
1866 register values that will never be needed again. This optimization
1867 was added to speed up context switching in a kernel application. */
1868 if (optimize > 0
1869 && (TREE_NOTHROW (current_function_decl)
1870 || !(flag_unwind_tables
1871 || (flag_exceptions
1872 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
1873 && TREE_THIS_VOLATILE (current_function_decl))
1874 type |= ARM_FT_VOLATILE;
1876 if (cfun->static_chain_decl != NULL)
1877 type |= ARM_FT_NESTED;
1879 attr = DECL_ATTRIBUTES (current_function_decl);
1881 a = lookup_attribute ("naked", attr);
1882 if (a != NULL_TREE)
1883 type |= ARM_FT_NAKED;
1885 a = lookup_attribute ("isr", attr);
1886 if (a == NULL_TREE)
1887 a = lookup_attribute ("interrupt", attr);
1889 if (a == NULL_TREE)
1890 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1891 else
1892 type |= arm_isr_value (TREE_VALUE (a));
1894 return type;
1897 /* Returns the type of the current function. */
1899 unsigned long
1900 arm_current_func_type (void)
1902 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1903 cfun->machine->func_type = arm_compute_func_type ();
1905 return cfun->machine->func_type;
1908 bool
1909 arm_allocate_stack_slots_for_args (void)
1911 /* Naked functions should not allocate stack slots for arguments. */
1912 return !IS_NAKED (arm_current_func_type ());
1916 /* Output assembler code for a block containing the constant parts
1917 of a trampoline, leaving space for the variable parts.
1919 On the ARM, (if r8 is the static chain regnum, and remembering that
1920 referencing pc adds an offset of 8) the trampoline looks like:
1921 ldr r8, [pc, #0]
1922 ldr pc, [pc]
1923 .word static chain value
1924 .word function's address
1925 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
1927 static void
1928 arm_asm_trampoline_template (FILE *f)
1930 if (TARGET_ARM)
1932 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
1933 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
1935 else if (TARGET_THUMB2)
1937 /* The Thumb-2 trampoline is similar to the arm implementation.
1938 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
1939 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
1940 STATIC_CHAIN_REGNUM, PC_REGNUM);
1941 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
1943 else
1945 ASM_OUTPUT_ALIGN (f, 2);
1946 fprintf (f, "\t.code\t16\n");
1947 fprintf (f, ".Ltrampoline_start:\n");
1948 asm_fprintf (f, "\tpush\t{r0, r1}\n");
1949 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
1950 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
1951 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
1952 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
1953 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
1955 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
1956 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
1959 /* Emit RTL insns to initialize the variable parts of a trampoline. */
1961 static void
1962 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
1964 rtx fnaddr, mem, a_tramp;
1966 emit_block_move (m_tramp, assemble_trampoline_template (),
1967 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
1969 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
1970 emit_move_insn (mem, chain_value);
1972 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
1973 fnaddr = XEXP (DECL_RTL (fndecl), 0);
1974 emit_move_insn (mem, fnaddr);
1976 a_tramp = XEXP (m_tramp, 0);
1977 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
1978 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
1979 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
1982 /* Thumb trampolines should be entered in thumb mode, so set
1983 the bottom bit of the address. */
1985 static rtx
1986 arm_trampoline_adjust_address (rtx addr)
1988 if (TARGET_THUMB)
1989 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
1990 NULL, 0, OPTAB_LIB_WIDEN);
1991 return addr;
1994 /* Return 1 if it is possible to return using a single instruction.
1995 If SIBLING is non-null, this is a test for a return before a sibling
1996 call. SIBLING is the call insn, so we can examine its register usage. */
1999 use_return_insn (int iscond, rtx sibling)
2001 int regno;
2002 unsigned int func_type;
2003 unsigned long saved_int_regs;
2004 unsigned HOST_WIDE_INT stack_adjust;
2005 arm_stack_offsets *offsets;
2007 /* Never use a return instruction before reload has run. */
2008 if (!reload_completed)
2009 return 0;
2011 func_type = arm_current_func_type ();
2013 /* Naked, volatile and stack alignment functions need special
2014 consideration. */
2015 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2016 return 0;
2018 /* So do interrupt functions that use the frame pointer and Thumb
2019 interrupt functions. */
2020 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2021 return 0;
2023 offsets = arm_get_frame_offsets ();
2024 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2026 /* As do variadic functions. */
2027 if (crtl->args.pretend_args_size
2028 || cfun->machine->uses_anonymous_args
2029 /* Or if the function calls __builtin_eh_return () */
2030 || crtl->calls_eh_return
2031 /* Or if the function calls alloca */
2032 || cfun->calls_alloca
2033 /* Or if there is a stack adjustment. However, if the stack pointer
2034 is saved on the stack, we can use a pre-incrementing stack load. */
2035 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2036 && stack_adjust == 4)))
2037 return 0;
2039 saved_int_regs = offsets->saved_regs_mask;
2041 /* Unfortunately, the insn
2043 ldmib sp, {..., sp, ...}
2045 triggers a bug on most SA-110 based devices, such that the stack
2046 pointer won't be correctly restored if the instruction takes a
2047 page fault. We work around this problem by popping r3 along with
2048 the other registers, since that is never slower than executing
2049 another instruction.
2051 We test for !arm_arch5 here, because code for any architecture
2052 less than this could potentially be run on one of the buggy
2053 chips. */
2054 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2056 /* Validate that r3 is a call-clobbered register (always true in
2057 the default abi) ... */
2058 if (!call_used_regs[3])
2059 return 0;
2061 /* ... that it isn't being used for a return value ... */
2062 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2063 return 0;
2065 /* ... or for a tail-call argument ... */
2066 if (sibling)
2068 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2070 if (find_regno_fusage (sibling, USE, 3))
2071 return 0;
2074 /* ... and that there are no call-saved registers in r0-r2
2075 (always true in the default ABI). */
2076 if (saved_int_regs & 0x7)
2077 return 0;
2080 /* Can't be done if interworking with Thumb, and any registers have been
2081 stacked. */
2082 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2083 return 0;
2085 /* On StrongARM, conditional returns are expensive if they aren't
2086 taken and multiple registers have been stacked. */
2087 if (iscond && arm_tune_strongarm)
2089 /* Conditional return when just the LR is stored is a simple
2090 conditional-load instruction, that's not expensive. */
2091 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2092 return 0;
2094 if (flag_pic
2095 && arm_pic_register != INVALID_REGNUM
2096 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2097 return 0;
2100 /* If there are saved registers but the LR isn't saved, then we need
2101 two instructions for the return. */
2102 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2103 return 0;
2105 /* Can't be done if any of the FPA regs are pushed,
2106 since this also requires an insn. */
2107 if (TARGET_HARD_FLOAT && TARGET_FPA)
2108 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2109 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2110 return 0;
2112 /* Likewise VFP regs. */
2113 if (TARGET_HARD_FLOAT && TARGET_VFP)
2114 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2115 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2116 return 0;
2118 if (TARGET_REALLY_IWMMXT)
2119 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2120 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2121 return 0;
2123 return 1;
2126 /* Return TRUE if int I is a valid immediate ARM constant. */
2129 const_ok_for_arm (HOST_WIDE_INT i)
2131 int lowbit;
2133 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2134 be all zero, or all one. */
2135 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2136 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2137 != ((~(unsigned HOST_WIDE_INT) 0)
2138 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2139 return FALSE;
2141 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2143 /* Fast return for 0 and small values. We must do this for zero, since
2144 the code below can't handle that one case. */
2145 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2146 return TRUE;
2148 /* Get the number of trailing zeros. */
2149 lowbit = ffs((int) i) - 1;
2151 /* Only even shifts are allowed in ARM mode so round down to the
2152 nearest even number. */
2153 if (TARGET_ARM)
2154 lowbit &= ~1;
2156 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2157 return TRUE;
2159 if (TARGET_ARM)
2161 /* Allow rotated constants in ARM mode. */
2162 if (lowbit <= 4
2163 && ((i & ~0xc000003f) == 0
2164 || (i & ~0xf000000f) == 0
2165 || (i & ~0xfc000003) == 0))
2166 return TRUE;
2168 else
2170 HOST_WIDE_INT v;
2172 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2173 v = i & 0xff;
2174 v |= v << 16;
2175 if (i == v || i == (v | (v << 8)))
2176 return TRUE;
2178 /* Allow repeated pattern 0xXY00XY00. */
2179 v = i & 0xff00;
2180 v |= v << 16;
2181 if (i == v)
2182 return TRUE;
2185 return FALSE;
2188 /* Return true if I is a valid constant for the operation CODE. */
2189 static int
2190 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2192 if (const_ok_for_arm (i))
2193 return 1;
2195 switch (code)
2197 case SET:
2198 /* See if we can use movw. */
2199 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2200 return 1;
2201 else
2202 return 0;
2204 case PLUS:
2205 case COMPARE:
2206 case EQ:
2207 case NE:
2208 case GT:
2209 case LE:
2210 case LT:
2211 case GE:
2212 case GEU:
2213 case LTU:
2214 case GTU:
2215 case LEU:
2216 case UNORDERED:
2217 case ORDERED:
2218 case UNEQ:
2219 case UNGE:
2220 case UNLT:
2221 case UNGT:
2222 case UNLE:
2223 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2225 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2226 case XOR:
2227 return 0;
2229 case IOR:
2230 if (TARGET_THUMB2)
2231 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2232 return 0;
2234 case AND:
2235 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2237 default:
2238 gcc_unreachable ();
2242 /* Emit a sequence of insns to handle a large constant.
2243 CODE is the code of the operation required, it can be any of SET, PLUS,
2244 IOR, AND, XOR, MINUS;
2245 MODE is the mode in which the operation is being performed;
2246 VAL is the integer to operate on;
2247 SOURCE is the other operand (a register, or a null-pointer for SET);
2248 SUBTARGETS means it is safe to create scratch registers if that will
2249 either produce a simpler sequence, or we will want to cse the values.
2250 Return value is the number of insns emitted. */
2252 /* ??? Tweak this for thumb2. */
2254 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2255 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2257 rtx cond;
2259 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2260 cond = COND_EXEC_TEST (PATTERN (insn));
2261 else
2262 cond = NULL_RTX;
2264 if (subtargets || code == SET
2265 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2266 && REGNO (target) != REGNO (source)))
2268 /* After arm_reorg has been called, we can't fix up expensive
2269 constants by pushing them into memory so we must synthesize
2270 them in-line, regardless of the cost. This is only likely to
2271 be more costly on chips that have load delay slots and we are
2272 compiling without running the scheduler (so no splitting
2273 occurred before the final instruction emission).
2275 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2277 if (!after_arm_reorg
2278 && !cond
2279 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2280 1, 0)
2281 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2282 + (code != SET))))
2284 if (code == SET)
2286 /* Currently SET is the only monadic value for CODE, all
2287 the rest are diadic. */
2288 if (TARGET_USE_MOVT)
2289 arm_emit_movpair (target, GEN_INT (val));
2290 else
2291 emit_set_insn (target, GEN_INT (val));
2293 return 1;
2295 else
2297 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2299 if (TARGET_USE_MOVT)
2300 arm_emit_movpair (temp, GEN_INT (val));
2301 else
2302 emit_set_insn (temp, GEN_INT (val));
2304 /* For MINUS, the value is subtracted from, since we never
2305 have subtraction of a constant. */
2306 if (code == MINUS)
2307 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2308 else
2309 emit_set_insn (target,
2310 gen_rtx_fmt_ee (code, mode, source, temp));
2311 return 2;
2316 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2320 /* Return the number of instructions required to synthesize the given
2321 constant, if we start emitting them from bit-position I. */
2322 static int
2323 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2325 HOST_WIDE_INT temp1;
2326 int step_size = TARGET_ARM ? 2 : 1;
2327 int num_insns = 0;
2329 gcc_assert (TARGET_ARM || i == 0);
2333 int end;
2335 if (i <= 0)
2336 i += 32;
2337 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2339 end = i - 8;
2340 if (end < 0)
2341 end += 32;
2342 temp1 = remainder & ((0x0ff << end)
2343 | ((i < end) ? (0xff >> (32 - end)) : 0));
2344 remainder &= ~temp1;
2345 num_insns++;
2346 i -= 8 - step_size;
2348 i -= step_size;
2349 } while (remainder);
2350 return num_insns;
2353 static int
2354 find_best_start (unsigned HOST_WIDE_INT remainder)
2356 int best_consecutive_zeros = 0;
2357 int i;
2358 int best_start = 0;
2360 /* If we aren't targetting ARM, the best place to start is always at
2361 the bottom. */
2362 if (! TARGET_ARM)
2363 return 0;
2365 for (i = 0; i < 32; i += 2)
2367 int consecutive_zeros = 0;
2369 if (!(remainder & (3 << i)))
2371 while ((i < 32) && !(remainder & (3 << i)))
2373 consecutive_zeros += 2;
2374 i += 2;
2376 if (consecutive_zeros > best_consecutive_zeros)
2378 best_consecutive_zeros = consecutive_zeros;
2379 best_start = i - consecutive_zeros;
2381 i -= 2;
2385 /* So long as it won't require any more insns to do so, it's
2386 desirable to emit a small constant (in bits 0...9) in the last
2387 insn. This way there is more chance that it can be combined with
2388 a later addressing insn to form a pre-indexed load or store
2389 operation. Consider:
2391 *((volatile int *)0xe0000100) = 1;
2392 *((volatile int *)0xe0000110) = 2;
2394 We want this to wind up as:
2396 mov rA, #0xe0000000
2397 mov rB, #1
2398 str rB, [rA, #0x100]
2399 mov rB, #2
2400 str rB, [rA, #0x110]
2402 rather than having to synthesize both large constants from scratch.
2404 Therefore, we calculate how many insns would be required to emit
2405 the constant starting from `best_start', and also starting from
2406 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2407 yield a shorter sequence, we may as well use zero. */
2408 if (best_start != 0
2409 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2410 && (count_insns_for_constant (remainder, 0) <=
2411 count_insns_for_constant (remainder, best_start)))
2412 best_start = 0;
2414 return best_start;
2417 /* Emit an instruction with the indicated PATTERN. If COND is
2418 non-NULL, conditionalize the execution of the instruction on COND
2419 being true. */
2421 static void
2422 emit_constant_insn (rtx cond, rtx pattern)
2424 if (cond)
2425 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2426 emit_insn (pattern);
2429 /* As above, but extra parameter GENERATE which, if clear, suppresses
2430 RTL generation. */
2431 /* ??? This needs more work for thumb2. */
2433 static int
2434 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2435 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2436 int generate)
2438 int can_invert = 0;
2439 int can_negate = 0;
2440 int final_invert = 0;
2441 int i;
2442 int num_bits_set = 0;
2443 int set_sign_bit_copies = 0;
2444 int clear_sign_bit_copies = 0;
2445 int clear_zero_bit_copies = 0;
2446 int set_zero_bit_copies = 0;
2447 int insns = 0;
2448 unsigned HOST_WIDE_INT temp1, temp2;
2449 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2450 int step_size = TARGET_ARM ? 2 : 1;
2452 /* Find out which operations are safe for a given CODE. Also do a quick
2453 check for degenerate cases; these can occur when DImode operations
2454 are split. */
2455 switch (code)
2457 case SET:
2458 can_invert = 1;
2459 can_negate = 1;
2460 break;
2462 case PLUS:
2463 can_negate = 1;
2464 break;
2466 case IOR:
2467 if (remainder == 0xffffffff)
2469 if (generate)
2470 emit_constant_insn (cond,
2471 gen_rtx_SET (VOIDmode, target,
2472 GEN_INT (ARM_SIGN_EXTEND (val))));
2473 return 1;
2476 if (remainder == 0)
2478 if (reload_completed && rtx_equal_p (target, source))
2479 return 0;
2481 if (generate)
2482 emit_constant_insn (cond,
2483 gen_rtx_SET (VOIDmode, target, source));
2484 return 1;
2486 break;
2488 case AND:
2489 if (remainder == 0)
2491 if (generate)
2492 emit_constant_insn (cond,
2493 gen_rtx_SET (VOIDmode, target, const0_rtx));
2494 return 1;
2496 if (remainder == 0xffffffff)
2498 if (reload_completed && rtx_equal_p (target, source))
2499 return 0;
2500 if (generate)
2501 emit_constant_insn (cond,
2502 gen_rtx_SET (VOIDmode, target, source));
2503 return 1;
2505 can_invert = 1;
2506 break;
2508 case XOR:
2509 if (remainder == 0)
2511 if (reload_completed && rtx_equal_p (target, source))
2512 return 0;
2513 if (generate)
2514 emit_constant_insn (cond,
2515 gen_rtx_SET (VOIDmode, target, source));
2516 return 1;
2519 if (remainder == 0xffffffff)
2521 if (generate)
2522 emit_constant_insn (cond,
2523 gen_rtx_SET (VOIDmode, target,
2524 gen_rtx_NOT (mode, source)));
2525 return 1;
2527 break;
2529 case MINUS:
2530 /* We treat MINUS as (val - source), since (source - val) is always
2531 passed as (source + (-val)). */
2532 if (remainder == 0)
2534 if (generate)
2535 emit_constant_insn (cond,
2536 gen_rtx_SET (VOIDmode, target,
2537 gen_rtx_NEG (mode, source)));
2538 return 1;
2540 if (const_ok_for_arm (val))
2542 if (generate)
2543 emit_constant_insn (cond,
2544 gen_rtx_SET (VOIDmode, target,
2545 gen_rtx_MINUS (mode, GEN_INT (val),
2546 source)));
2547 return 1;
2549 can_negate = 1;
2551 break;
2553 default:
2554 gcc_unreachable ();
2557 /* If we can do it in one insn get out quickly. */
2558 if (const_ok_for_op (val, code))
2560 if (generate)
2561 emit_constant_insn (cond,
2562 gen_rtx_SET (VOIDmode, target,
2563 (source
2564 ? gen_rtx_fmt_ee (code, mode, source,
2565 GEN_INT (val))
2566 : GEN_INT (val))));
2567 return 1;
2570 /* Calculate a few attributes that may be useful for specific
2571 optimizations. */
2572 /* Count number of leading zeros. */
2573 for (i = 31; i >= 0; i--)
2575 if ((remainder & (1 << i)) == 0)
2576 clear_sign_bit_copies++;
2577 else
2578 break;
2581 /* Count number of leading 1's. */
2582 for (i = 31; i >= 0; i--)
2584 if ((remainder & (1 << i)) != 0)
2585 set_sign_bit_copies++;
2586 else
2587 break;
2590 /* Count number of trailing zero's. */
2591 for (i = 0; i <= 31; i++)
2593 if ((remainder & (1 << i)) == 0)
2594 clear_zero_bit_copies++;
2595 else
2596 break;
2599 /* Count number of trailing 1's. */
2600 for (i = 0; i <= 31; i++)
2602 if ((remainder & (1 << i)) != 0)
2603 set_zero_bit_copies++;
2604 else
2605 break;
2608 switch (code)
2610 case SET:
2611 /* See if we can do this by sign_extending a constant that is known
2612 to be negative. This is a good, way of doing it, since the shift
2613 may well merge into a subsequent insn. */
2614 if (set_sign_bit_copies > 1)
2616 if (const_ok_for_arm
2617 (temp1 = ARM_SIGN_EXTEND (remainder
2618 << (set_sign_bit_copies - 1))))
2620 if (generate)
2622 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2623 emit_constant_insn (cond,
2624 gen_rtx_SET (VOIDmode, new_src,
2625 GEN_INT (temp1)));
2626 emit_constant_insn (cond,
2627 gen_ashrsi3 (target, new_src,
2628 GEN_INT (set_sign_bit_copies - 1)));
2630 return 2;
2632 /* For an inverted constant, we will need to set the low bits,
2633 these will be shifted out of harm's way. */
2634 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2635 if (const_ok_for_arm (~temp1))
2637 if (generate)
2639 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2640 emit_constant_insn (cond,
2641 gen_rtx_SET (VOIDmode, new_src,
2642 GEN_INT (temp1)));
2643 emit_constant_insn (cond,
2644 gen_ashrsi3 (target, new_src,
2645 GEN_INT (set_sign_bit_copies - 1)));
2647 return 2;
2651 /* See if we can calculate the value as the difference between two
2652 valid immediates. */
2653 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2655 int topshift = clear_sign_bit_copies & ~1;
2657 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2658 & (0xff000000 >> topshift));
2660 /* If temp1 is zero, then that means the 9 most significant
2661 bits of remainder were 1 and we've caused it to overflow.
2662 When topshift is 0 we don't need to do anything since we
2663 can borrow from 'bit 32'. */
2664 if (temp1 == 0 && topshift != 0)
2665 temp1 = 0x80000000 >> (topshift - 1);
2667 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2669 if (const_ok_for_arm (temp2))
2671 if (generate)
2673 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2674 emit_constant_insn (cond,
2675 gen_rtx_SET (VOIDmode, new_src,
2676 GEN_INT (temp1)));
2677 emit_constant_insn (cond,
2678 gen_addsi3 (target, new_src,
2679 GEN_INT (-temp2)));
2682 return 2;
2686 /* See if we can generate this by setting the bottom (or the top)
2687 16 bits, and then shifting these into the other half of the
2688 word. We only look for the simplest cases, to do more would cost
2689 too much. Be careful, however, not to generate this when the
2690 alternative would take fewer insns. */
2691 if (val & 0xffff0000)
2693 temp1 = remainder & 0xffff0000;
2694 temp2 = remainder & 0x0000ffff;
2696 /* Overlaps outside this range are best done using other methods. */
2697 for (i = 9; i < 24; i++)
2699 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2700 && !const_ok_for_arm (temp2))
2702 rtx new_src = (subtargets
2703 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2704 : target);
2705 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2706 source, subtargets, generate);
2707 source = new_src;
2708 if (generate)
2709 emit_constant_insn
2710 (cond,
2711 gen_rtx_SET
2712 (VOIDmode, target,
2713 gen_rtx_IOR (mode,
2714 gen_rtx_ASHIFT (mode, source,
2715 GEN_INT (i)),
2716 source)));
2717 return insns + 1;
2721 /* Don't duplicate cases already considered. */
2722 for (i = 17; i < 24; i++)
2724 if (((temp1 | (temp1 >> i)) == remainder)
2725 && !const_ok_for_arm (temp1))
2727 rtx new_src = (subtargets
2728 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2729 : target);
2730 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2731 source, subtargets, generate);
2732 source = new_src;
2733 if (generate)
2734 emit_constant_insn
2735 (cond,
2736 gen_rtx_SET (VOIDmode, target,
2737 gen_rtx_IOR
2738 (mode,
2739 gen_rtx_LSHIFTRT (mode, source,
2740 GEN_INT (i)),
2741 source)));
2742 return insns + 1;
2746 break;
2748 case IOR:
2749 case XOR:
2750 /* If we have IOR or XOR, and the constant can be loaded in a
2751 single instruction, and we can find a temporary to put it in,
2752 then this can be done in two instructions instead of 3-4. */
2753 if (subtargets
2754 /* TARGET can't be NULL if SUBTARGETS is 0 */
2755 || (reload_completed && !reg_mentioned_p (target, source)))
2757 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2759 if (generate)
2761 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2763 emit_constant_insn (cond,
2764 gen_rtx_SET (VOIDmode, sub,
2765 GEN_INT (val)));
2766 emit_constant_insn (cond,
2767 gen_rtx_SET (VOIDmode, target,
2768 gen_rtx_fmt_ee (code, mode,
2769 source, sub)));
2771 return 2;
2775 if (code == XOR)
2776 break;
2778 /* Convert.
2779 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2780 and the remainder 0s for e.g. 0xfff00000)
2781 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2783 This can be done in 2 instructions by using shifts with mov or mvn.
2784 e.g. for
2785 x = x | 0xfff00000;
2786 we generate.
2787 mvn r0, r0, asl #12
2788 mvn r0, r0, lsr #12 */
2789 if (set_sign_bit_copies > 8
2790 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2792 if (generate)
2794 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2795 rtx shift = GEN_INT (set_sign_bit_copies);
2797 emit_constant_insn
2798 (cond,
2799 gen_rtx_SET (VOIDmode, sub,
2800 gen_rtx_NOT (mode,
2801 gen_rtx_ASHIFT (mode,
2802 source,
2803 shift))));
2804 emit_constant_insn
2805 (cond,
2806 gen_rtx_SET (VOIDmode, target,
2807 gen_rtx_NOT (mode,
2808 gen_rtx_LSHIFTRT (mode, sub,
2809 shift))));
2811 return 2;
2814 /* Convert
2815 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2817 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2819 For eg. r0 = r0 | 0xfff
2820 mvn r0, r0, lsr #12
2821 mvn r0, r0, asl #12
2824 if (set_zero_bit_copies > 8
2825 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2827 if (generate)
2829 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2830 rtx shift = GEN_INT (set_zero_bit_copies);
2832 emit_constant_insn
2833 (cond,
2834 gen_rtx_SET (VOIDmode, sub,
2835 gen_rtx_NOT (mode,
2836 gen_rtx_LSHIFTRT (mode,
2837 source,
2838 shift))));
2839 emit_constant_insn
2840 (cond,
2841 gen_rtx_SET (VOIDmode, target,
2842 gen_rtx_NOT (mode,
2843 gen_rtx_ASHIFT (mode, sub,
2844 shift))));
2846 return 2;
2849 /* This will never be reached for Thumb2 because orn is a valid
2850 instruction. This is for Thumb1 and the ARM 32 bit cases.
2852 x = y | constant (such that ~constant is a valid constant)
2853 Transform this to
2854 x = ~(~y & ~constant).
2856 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2858 if (generate)
2860 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2861 emit_constant_insn (cond,
2862 gen_rtx_SET (VOIDmode, sub,
2863 gen_rtx_NOT (mode, source)));
2864 source = sub;
2865 if (subtargets)
2866 sub = gen_reg_rtx (mode);
2867 emit_constant_insn (cond,
2868 gen_rtx_SET (VOIDmode, sub,
2869 gen_rtx_AND (mode, source,
2870 GEN_INT (temp1))));
2871 emit_constant_insn (cond,
2872 gen_rtx_SET (VOIDmode, target,
2873 gen_rtx_NOT (mode, sub)));
2875 return 3;
2877 break;
2879 case AND:
2880 /* See if two shifts will do 2 or more insn's worth of work. */
2881 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2883 HOST_WIDE_INT shift_mask = ((0xffffffff
2884 << (32 - clear_sign_bit_copies))
2885 & 0xffffffff);
2887 if ((remainder | shift_mask) != 0xffffffff)
2889 if (generate)
2891 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2892 insns = arm_gen_constant (AND, mode, cond,
2893 remainder | shift_mask,
2894 new_src, source, subtargets, 1);
2895 source = new_src;
2897 else
2899 rtx targ = subtargets ? NULL_RTX : target;
2900 insns = arm_gen_constant (AND, mode, cond,
2901 remainder | shift_mask,
2902 targ, source, subtargets, 0);
2906 if (generate)
2908 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2909 rtx shift = GEN_INT (clear_sign_bit_copies);
2911 emit_insn (gen_ashlsi3 (new_src, source, shift));
2912 emit_insn (gen_lshrsi3 (target, new_src, shift));
2915 return insns + 2;
2918 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2920 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2922 if ((remainder | shift_mask) != 0xffffffff)
2924 if (generate)
2926 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2928 insns = arm_gen_constant (AND, mode, cond,
2929 remainder | shift_mask,
2930 new_src, source, subtargets, 1);
2931 source = new_src;
2933 else
2935 rtx targ = subtargets ? NULL_RTX : target;
2937 insns = arm_gen_constant (AND, mode, cond,
2938 remainder | shift_mask,
2939 targ, source, subtargets, 0);
2943 if (generate)
2945 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2946 rtx shift = GEN_INT (clear_zero_bit_copies);
2948 emit_insn (gen_lshrsi3 (new_src, source, shift));
2949 emit_insn (gen_ashlsi3 (target, new_src, shift));
2952 return insns + 2;
2955 break;
2957 default:
2958 break;
2961 for (i = 0; i < 32; i++)
2962 if (remainder & (1 << i))
2963 num_bits_set++;
2965 if ((code == AND) || (can_invert && num_bits_set > 16))
2966 remainder ^= 0xffffffff;
2967 else if (code == PLUS && num_bits_set > 16)
2968 remainder = (-remainder) & 0xffffffff;
2970 /* For XOR, if more than half the bits are set and there's a sequence
2971 of more than 8 consecutive ones in the pattern then we can XOR by the
2972 inverted constant and then invert the final result; this may save an
2973 instruction and might also lead to the final mvn being merged with
2974 some other operation. */
2975 else if (code == XOR && num_bits_set > 16
2976 && (count_insns_for_constant (remainder ^ 0xffffffff,
2977 find_best_start
2978 (remainder ^ 0xffffffff))
2979 < count_insns_for_constant (remainder,
2980 find_best_start (remainder))))
2982 remainder ^= 0xffffffff;
2983 final_invert = 1;
2985 else
2987 can_invert = 0;
2988 can_negate = 0;
2991 /* Now try and find a way of doing the job in either two or three
2992 instructions.
2993 We start by looking for the largest block of zeros that are aligned on
2994 a 2-bit boundary, we then fill up the temps, wrapping around to the
2995 top of the word when we drop off the bottom.
2996 In the worst case this code should produce no more than four insns.
2997 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2998 best place to start. */
3000 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3001 the same. */
3003 /* Now start emitting the insns. */
3004 i = find_best_start (remainder);
3007 int end;
3009 if (i <= 0)
3010 i += 32;
3011 if (remainder & (3 << (i - 2)))
3013 end = i - 8;
3014 if (end < 0)
3015 end += 32;
3016 temp1 = remainder & ((0x0ff << end)
3017 | ((i < end) ? (0xff >> (32 - end)) : 0));
3018 remainder &= ~temp1;
3020 if (generate)
3022 rtx new_src, temp1_rtx;
3024 if (code == SET || code == MINUS)
3026 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3027 if (can_invert && code != MINUS)
3028 temp1 = ~temp1;
3030 else
3032 if ((final_invert || remainder) && subtargets)
3033 new_src = gen_reg_rtx (mode);
3034 else
3035 new_src = target;
3036 if (can_invert)
3037 temp1 = ~temp1;
3038 else if (can_negate)
3039 temp1 = -temp1;
3042 temp1 = trunc_int_for_mode (temp1, mode);
3043 temp1_rtx = GEN_INT (temp1);
3045 if (code == SET)
3047 else if (code == MINUS)
3048 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3049 else
3050 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3052 emit_constant_insn (cond,
3053 gen_rtx_SET (VOIDmode, new_src,
3054 temp1_rtx));
3055 source = new_src;
3058 if (code == SET)
3060 can_invert = 0;
3061 code = PLUS;
3063 else if (code == MINUS)
3064 code = PLUS;
3066 insns++;
3067 i -= 8 - step_size;
3069 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3070 shifts. */
3071 i -= step_size;
3073 while (remainder);
3076 if (final_invert)
3078 if (generate)
3079 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3080 gen_rtx_NOT (mode, source)));
3081 insns++;
3084 return insns;
3087 /* Canonicalize a comparison so that we are more likely to recognize it.
3088 This can be done for a few constant compares, where we can make the
3089 immediate value easier to load. */
3091 enum rtx_code
3092 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3094 enum machine_mode mode;
3095 unsigned HOST_WIDE_INT i, maxval;
3097 mode = GET_MODE (*op0);
3098 if (mode == VOIDmode)
3099 mode = GET_MODE (*op1);
3101 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3103 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3104 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3105 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3106 for GTU/LEU in Thumb mode. */
3107 if (mode == DImode)
3109 rtx tem;
3111 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3112 available. */
3113 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3114 return code;
3116 if (code == GT || code == LE
3117 || (!TARGET_ARM && (code == GTU || code == LEU)))
3119 /* Missing comparison. First try to use an available
3120 comparison. */
3121 if (GET_CODE (*op1) == CONST_INT)
3123 i = INTVAL (*op1);
3124 switch (code)
3126 case GT:
3127 case LE:
3128 if (i != maxval
3129 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3131 *op1 = GEN_INT (i + 1);
3132 return code == GT ? GE : LT;
3134 break;
3135 case GTU:
3136 case LEU:
3137 if (i != ~((unsigned HOST_WIDE_INT) 0)
3138 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3140 *op1 = GEN_INT (i + 1);
3141 return code == GTU ? GEU : LTU;
3143 break;
3144 default:
3145 gcc_unreachable ();
3149 /* If that did not work, reverse the condition. */
3150 tem = *op0;
3151 *op0 = *op1;
3152 *op1 = tem;
3153 return swap_condition (code);
3156 return code;
3159 /* Comparisons smaller than DImode. Only adjust comparisons against
3160 an out-of-range constant. */
3161 if (GET_CODE (*op1) != CONST_INT
3162 || const_ok_for_arm (INTVAL (*op1))
3163 || const_ok_for_arm (- INTVAL (*op1)))
3164 return code;
3166 i = INTVAL (*op1);
3168 switch (code)
3170 case EQ:
3171 case NE:
3172 return code;
3174 case GT:
3175 case LE:
3176 if (i != maxval
3177 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3179 *op1 = GEN_INT (i + 1);
3180 return code == GT ? GE : LT;
3182 break;
3184 case GE:
3185 case LT:
3186 if (i != ~maxval
3187 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3189 *op1 = GEN_INT (i - 1);
3190 return code == GE ? GT : LE;
3192 break;
3194 case GTU:
3195 case LEU:
3196 if (i != ~((unsigned HOST_WIDE_INT) 0)
3197 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3199 *op1 = GEN_INT (i + 1);
3200 return code == GTU ? GEU : LTU;
3202 break;
3204 case GEU:
3205 case LTU:
3206 if (i != 0
3207 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3209 *op1 = GEN_INT (i - 1);
3210 return code == GEU ? GTU : LEU;
3212 break;
3214 default:
3215 gcc_unreachable ();
3218 return code;
3222 /* Define how to find the value returned by a function. */
3224 static rtx
3225 arm_function_value(const_tree type, const_tree func,
3226 bool outgoing ATTRIBUTE_UNUSED)
3228 enum machine_mode mode;
3229 int unsignedp ATTRIBUTE_UNUSED;
3230 rtx r ATTRIBUTE_UNUSED;
3232 mode = TYPE_MODE (type);
3234 if (TARGET_AAPCS_BASED)
3235 return aapcs_allocate_return_reg (mode, type, func);
3237 /* Promote integer types. */
3238 if (INTEGRAL_TYPE_P (type))
3239 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3241 /* Promotes small structs returned in a register to full-word size
3242 for big-endian AAPCS. */
3243 if (arm_return_in_msb (type))
3245 HOST_WIDE_INT size = int_size_in_bytes (type);
3246 if (size % UNITS_PER_WORD != 0)
3248 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3249 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3253 return LIBCALL_VALUE (mode);
3256 static int
3257 libcall_eq (const void *p1, const void *p2)
3259 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3262 static hashval_t
3263 libcall_hash (const void *p1)
3265 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3268 static void
3269 add_libcall (htab_t htab, rtx libcall)
3271 *htab_find_slot (htab, libcall, INSERT) = libcall;
3274 static bool
3275 arm_libcall_uses_aapcs_base (const_rtx libcall)
3277 static bool init_done = false;
3278 static htab_t libcall_htab;
3280 if (!init_done)
3282 init_done = true;
3284 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3285 NULL);
3286 add_libcall (libcall_htab,
3287 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3288 add_libcall (libcall_htab,
3289 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3290 add_libcall (libcall_htab,
3291 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3292 add_libcall (libcall_htab,
3293 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3295 add_libcall (libcall_htab,
3296 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3297 add_libcall (libcall_htab,
3298 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3299 add_libcall (libcall_htab,
3300 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3301 add_libcall (libcall_htab,
3302 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3304 add_libcall (libcall_htab,
3305 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3306 add_libcall (libcall_htab,
3307 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3308 add_libcall (libcall_htab,
3309 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3310 add_libcall (libcall_htab,
3311 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3312 add_libcall (libcall_htab,
3313 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3314 add_libcall (libcall_htab,
3315 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3318 return libcall && htab_find (libcall_htab, libcall) != NULL;
3322 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3324 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3325 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3327 /* The following libcalls return their result in integer registers,
3328 even though they return a floating point value. */
3329 if (arm_libcall_uses_aapcs_base (libcall))
3330 return gen_rtx_REG (mode, ARG_REGISTER(1));
3334 return LIBCALL_VALUE (mode);
3337 /* Determine the amount of memory needed to store the possible return
3338 registers of an untyped call. */
3340 arm_apply_result_size (void)
3342 int size = 16;
3344 if (TARGET_32BIT)
3346 if (TARGET_HARD_FLOAT_ABI)
3348 if (TARGET_VFP)
3349 size += 32;
3350 if (TARGET_FPA)
3351 size += 12;
3352 if (TARGET_MAVERICK)
3353 size += 8;
3355 if (TARGET_IWMMXT_ABI)
3356 size += 8;
3359 return size;
3362 /* Decide whether TYPE should be returned in memory (true)
3363 or in a register (false). FNTYPE is the type of the function making
3364 the call. */
3365 static bool
3366 arm_return_in_memory (const_tree type, const_tree fntype)
3368 HOST_WIDE_INT size;
3370 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3372 if (TARGET_AAPCS_BASED)
3374 /* Simple, non-aggregate types (ie not including vectors and
3375 complex) are always returned in a register (or registers).
3376 We don't care about which register here, so we can short-cut
3377 some of the detail. */
3378 if (!AGGREGATE_TYPE_P (type)
3379 && TREE_CODE (type) != VECTOR_TYPE
3380 && TREE_CODE (type) != COMPLEX_TYPE)
3381 return false;
3383 /* Any return value that is no larger than one word can be
3384 returned in r0. */
3385 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3386 return false;
3388 /* Check any available co-processors to see if they accept the
3389 type as a register candidate (VFP, for example, can return
3390 some aggregates in consecutive registers). These aren't
3391 available if the call is variadic. */
3392 if (aapcs_select_return_coproc (type, fntype) >= 0)
3393 return false;
3395 /* Vector values should be returned using ARM registers, not
3396 memory (unless they're over 16 bytes, which will break since
3397 we only have four call-clobbered registers to play with). */
3398 if (TREE_CODE (type) == VECTOR_TYPE)
3399 return (size < 0 || size > (4 * UNITS_PER_WORD));
3401 /* The rest go in memory. */
3402 return true;
3405 if (TREE_CODE (type) == VECTOR_TYPE)
3406 return (size < 0 || size > (4 * UNITS_PER_WORD));
3408 if (!AGGREGATE_TYPE_P (type) &&
3409 (TREE_CODE (type) != VECTOR_TYPE))
3410 /* All simple types are returned in registers. */
3411 return false;
3413 if (arm_abi != ARM_ABI_APCS)
3415 /* ATPCS and later return aggregate types in memory only if they are
3416 larger than a word (or are variable size). */
3417 return (size < 0 || size > UNITS_PER_WORD);
3420 /* For the arm-wince targets we choose to be compatible with Microsoft's
3421 ARM and Thumb compilers, which always return aggregates in memory. */
3422 #ifndef ARM_WINCE
3423 /* All structures/unions bigger than one word are returned in memory.
3424 Also catch the case where int_size_in_bytes returns -1. In this case
3425 the aggregate is either huge or of variable size, and in either case
3426 we will want to return it via memory and not in a register. */
3427 if (size < 0 || size > UNITS_PER_WORD)
3428 return true;
3430 if (TREE_CODE (type) == RECORD_TYPE)
3432 tree field;
3434 /* For a struct the APCS says that we only return in a register
3435 if the type is 'integer like' and every addressable element
3436 has an offset of zero. For practical purposes this means
3437 that the structure can have at most one non bit-field element
3438 and that this element must be the first one in the structure. */
3440 /* Find the first field, ignoring non FIELD_DECL things which will
3441 have been created by C++. */
3442 for (field = TYPE_FIELDS (type);
3443 field && TREE_CODE (field) != FIELD_DECL;
3444 field = DECL_CHAIN (field))
3445 continue;
3447 if (field == NULL)
3448 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3450 /* Check that the first field is valid for returning in a register. */
3452 /* ... Floats are not allowed */
3453 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3454 return true;
3456 /* ... Aggregates that are not themselves valid for returning in
3457 a register are not allowed. */
3458 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3459 return true;
3461 /* Now check the remaining fields, if any. Only bitfields are allowed,
3462 since they are not addressable. */
3463 for (field = DECL_CHAIN (field);
3464 field;
3465 field = DECL_CHAIN (field))
3467 if (TREE_CODE (field) != FIELD_DECL)
3468 continue;
3470 if (!DECL_BIT_FIELD_TYPE (field))
3471 return true;
3474 return false;
3477 if (TREE_CODE (type) == UNION_TYPE)
3479 tree field;
3481 /* Unions can be returned in registers if every element is
3482 integral, or can be returned in an integer register. */
3483 for (field = TYPE_FIELDS (type);
3484 field;
3485 field = DECL_CHAIN (field))
3487 if (TREE_CODE (field) != FIELD_DECL)
3488 continue;
3490 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3491 return true;
3493 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3494 return true;
3497 return false;
3499 #endif /* not ARM_WINCE */
3501 /* Return all other types in memory. */
3502 return true;
3505 /* Indicate whether or not words of a double are in big-endian order. */
3508 arm_float_words_big_endian (void)
3510 if (TARGET_MAVERICK)
3511 return 0;
3513 /* For FPA, float words are always big-endian. For VFP, floats words
3514 follow the memory system mode. */
3516 if (TARGET_FPA)
3518 return 1;
3521 if (TARGET_VFP)
3522 return (TARGET_BIG_END ? 1 : 0);
3524 return 1;
3527 const struct pcs_attribute_arg
3529 const char *arg;
3530 enum arm_pcs value;
3531 } pcs_attribute_args[] =
3533 {"aapcs", ARM_PCS_AAPCS},
3534 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3535 #if 0
3536 /* We could recognize these, but changes would be needed elsewhere
3537 * to implement them. */
3538 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3539 {"atpcs", ARM_PCS_ATPCS},
3540 {"apcs", ARM_PCS_APCS},
3541 #endif
3542 {NULL, ARM_PCS_UNKNOWN}
3545 static enum arm_pcs
3546 arm_pcs_from_attribute (tree attr)
3548 const struct pcs_attribute_arg *ptr;
3549 const char *arg;
3551 /* Get the value of the argument. */
3552 if (TREE_VALUE (attr) == NULL_TREE
3553 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3554 return ARM_PCS_UNKNOWN;
3556 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3558 /* Check it against the list of known arguments. */
3559 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3560 if (streq (arg, ptr->arg))
3561 return ptr->value;
3563 /* An unrecognized interrupt type. */
3564 return ARM_PCS_UNKNOWN;
3567 /* Get the PCS variant to use for this call. TYPE is the function's type
3568 specification, DECL is the specific declartion. DECL may be null if
3569 the call could be indirect or if this is a library call. */
3570 static enum arm_pcs
3571 arm_get_pcs_model (const_tree type, const_tree decl)
3573 bool user_convention = false;
3574 enum arm_pcs user_pcs = arm_pcs_default;
3575 tree attr;
3577 gcc_assert (type);
3579 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3580 if (attr)
3582 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3583 user_convention = true;
3586 if (TARGET_AAPCS_BASED)
3588 /* Detect varargs functions. These always use the base rules
3589 (no argument is ever a candidate for a co-processor
3590 register). */
3591 bool base_rules = stdarg_p (type);
3593 if (user_convention)
3595 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3596 sorry ("non-AAPCS derived PCS variant");
3597 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3598 error ("variadic functions must use the base AAPCS variant");
3601 if (base_rules)
3602 return ARM_PCS_AAPCS;
3603 else if (user_convention)
3604 return user_pcs;
3605 else if (decl && flag_unit_at_a_time)
3607 /* Local functions never leak outside this compilation unit,
3608 so we are free to use whatever conventions are
3609 appropriate. */
3610 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3611 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3612 if (i && i->local)
3613 return ARM_PCS_AAPCS_LOCAL;
3616 else if (user_convention && user_pcs != arm_pcs_default)
3617 sorry ("PCS variant");
3619 /* For everything else we use the target's default. */
3620 return arm_pcs_default;
3624 static void
3625 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3626 const_tree fntype ATTRIBUTE_UNUSED,
3627 rtx libcall ATTRIBUTE_UNUSED,
3628 const_tree fndecl ATTRIBUTE_UNUSED)
3630 /* Record the unallocated VFP registers. */
3631 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3632 pcum->aapcs_vfp_reg_alloc = 0;
3635 /* Walk down the type tree of TYPE counting consecutive base elements.
3636 If *MODEP is VOIDmode, then set it to the first valid floating point
3637 type. If a non-floating point type is found, or if a floating point
3638 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3639 otherwise return the count in the sub-tree. */
3640 static int
3641 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3643 enum machine_mode mode;
3644 HOST_WIDE_INT size;
3646 switch (TREE_CODE (type))
3648 case REAL_TYPE:
3649 mode = TYPE_MODE (type);
3650 if (mode != DFmode && mode != SFmode)
3651 return -1;
3653 if (*modep == VOIDmode)
3654 *modep = mode;
3656 if (*modep == mode)
3657 return 1;
3659 break;
3661 case COMPLEX_TYPE:
3662 mode = TYPE_MODE (TREE_TYPE (type));
3663 if (mode != DFmode && mode != SFmode)
3664 return -1;
3666 if (*modep == VOIDmode)
3667 *modep = mode;
3669 if (*modep == mode)
3670 return 2;
3672 break;
3674 case VECTOR_TYPE:
3675 /* Use V2SImode and V4SImode as representatives of all 64-bit
3676 and 128-bit vector types, whether or not those modes are
3677 supported with the present options. */
3678 size = int_size_in_bytes (type);
3679 switch (size)
3681 case 8:
3682 mode = V2SImode;
3683 break;
3684 case 16:
3685 mode = V4SImode;
3686 break;
3687 default:
3688 return -1;
3691 if (*modep == VOIDmode)
3692 *modep = mode;
3694 /* Vector modes are considered to be opaque: two vectors are
3695 equivalent for the purposes of being homogeneous aggregates
3696 if they are the same size. */
3697 if (*modep == mode)
3698 return 1;
3700 break;
3702 case ARRAY_TYPE:
3704 int count;
3705 tree index = TYPE_DOMAIN (type);
3707 /* Can't handle incomplete types. */
3708 if (!COMPLETE_TYPE_P(type))
3709 return -1;
3711 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3712 if (count == -1
3713 || !index
3714 || !TYPE_MAX_VALUE (index)
3715 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3716 || !TYPE_MIN_VALUE (index)
3717 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3718 || count < 0)
3719 return -1;
3721 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3722 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3724 /* There must be no padding. */
3725 if (!host_integerp (TYPE_SIZE (type), 1)
3726 || (tree_low_cst (TYPE_SIZE (type), 1)
3727 != count * GET_MODE_BITSIZE (*modep)))
3728 return -1;
3730 return count;
3733 case RECORD_TYPE:
3735 int count = 0;
3736 int sub_count;
3737 tree field;
3739 /* Can't handle incomplete types. */
3740 if (!COMPLETE_TYPE_P(type))
3741 return -1;
3743 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3745 if (TREE_CODE (field) != FIELD_DECL)
3746 continue;
3748 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3749 if (sub_count < 0)
3750 return -1;
3751 count += sub_count;
3754 /* There must be no padding. */
3755 if (!host_integerp (TYPE_SIZE (type), 1)
3756 || (tree_low_cst (TYPE_SIZE (type), 1)
3757 != count * GET_MODE_BITSIZE (*modep)))
3758 return -1;
3760 return count;
3763 case UNION_TYPE:
3764 case QUAL_UNION_TYPE:
3766 /* These aren't very interesting except in a degenerate case. */
3767 int count = 0;
3768 int sub_count;
3769 tree field;
3771 /* Can't handle incomplete types. */
3772 if (!COMPLETE_TYPE_P(type))
3773 return -1;
3775 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3777 if (TREE_CODE (field) != FIELD_DECL)
3778 continue;
3780 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3781 if (sub_count < 0)
3782 return -1;
3783 count = count > sub_count ? count : sub_count;
3786 /* There must be no padding. */
3787 if (!host_integerp (TYPE_SIZE (type), 1)
3788 || (tree_low_cst (TYPE_SIZE (type), 1)
3789 != count * GET_MODE_BITSIZE (*modep)))
3790 return -1;
3792 return count;
3795 default:
3796 break;
3799 return -1;
3802 /* Return true if PCS_VARIANT should use VFP registers. */
3803 static bool
3804 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3806 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3808 static bool seen_thumb1_vfp = false;
3810 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3812 sorry ("Thumb-1 hard-float VFP ABI");
3813 /* sorry() is not immediately fatal, so only display this once. */
3814 seen_thumb1_vfp = true;
3817 return true;
3820 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3821 return false;
3823 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3824 (TARGET_VFP_DOUBLE || !is_double));
3827 static bool
3828 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3829 enum machine_mode mode, const_tree type,
3830 enum machine_mode *base_mode, int *count)
3832 enum machine_mode new_mode = VOIDmode;
3834 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3835 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3836 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3838 *count = 1;
3839 new_mode = mode;
3841 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3843 *count = 2;
3844 new_mode = (mode == DCmode ? DFmode : SFmode);
3846 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3848 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3850 if (ag_count > 0 && ag_count <= 4)
3851 *count = ag_count;
3852 else
3853 return false;
3855 else
3856 return false;
3859 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3860 return false;
3862 *base_mode = new_mode;
3863 return true;
3866 static bool
3867 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3868 enum machine_mode mode, const_tree type)
3870 int count ATTRIBUTE_UNUSED;
3871 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3873 if (!use_vfp_abi (pcs_variant, false))
3874 return false;
3875 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3876 &ag_mode, &count);
3879 static bool
3880 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3881 const_tree type)
3883 if (!use_vfp_abi (pcum->pcs_variant, false))
3884 return false;
3886 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3887 &pcum->aapcs_vfp_rmode,
3888 &pcum->aapcs_vfp_rcount);
3891 static bool
3892 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3893 const_tree type ATTRIBUTE_UNUSED)
3895 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3896 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3897 int regno;
3899 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3900 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3902 pcum->aapcs_vfp_reg_alloc = mask << regno;
3903 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3905 int i;
3906 int rcount = pcum->aapcs_vfp_rcount;
3907 int rshift = shift;
3908 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3909 rtx par;
3910 if (!TARGET_NEON)
3912 /* Avoid using unsupported vector modes. */
3913 if (rmode == V2SImode)
3914 rmode = DImode;
3915 else if (rmode == V4SImode)
3917 rmode = DImode;
3918 rcount *= 2;
3919 rshift /= 2;
3922 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3923 for (i = 0; i < rcount; i++)
3925 rtx tmp = gen_rtx_REG (rmode,
3926 FIRST_VFP_REGNUM + regno + i * rshift);
3927 tmp = gen_rtx_EXPR_LIST
3928 (VOIDmode, tmp,
3929 GEN_INT (i * GET_MODE_SIZE (rmode)));
3930 XVECEXP (par, 0, i) = tmp;
3933 pcum->aapcs_reg = par;
3935 else
3936 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3937 return true;
3939 return false;
3942 static rtx
3943 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3944 enum machine_mode mode,
3945 const_tree type ATTRIBUTE_UNUSED)
3947 if (!use_vfp_abi (pcs_variant, false))
3948 return false;
3950 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3952 int count;
3953 enum machine_mode ag_mode;
3954 int i;
3955 rtx par;
3956 int shift;
3958 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3959 &ag_mode, &count);
3961 if (!TARGET_NEON)
3963 if (ag_mode == V2SImode)
3964 ag_mode = DImode;
3965 else if (ag_mode == V4SImode)
3967 ag_mode = DImode;
3968 count *= 2;
3971 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3972 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3973 for (i = 0; i < count; i++)
3975 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3976 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3977 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
3978 XVECEXP (par, 0, i) = tmp;
3981 return par;
3984 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
3987 static void
3988 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3989 enum machine_mode mode ATTRIBUTE_UNUSED,
3990 const_tree type ATTRIBUTE_UNUSED)
3992 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
3993 pcum->aapcs_vfp_reg_alloc = 0;
3994 return;
3997 #define AAPCS_CP(X) \
3999 aapcs_ ## X ## _cum_init, \
4000 aapcs_ ## X ## _is_call_candidate, \
4001 aapcs_ ## X ## _allocate, \
4002 aapcs_ ## X ## _is_return_candidate, \
4003 aapcs_ ## X ## _allocate_return_reg, \
4004 aapcs_ ## X ## _advance \
4007 /* Table of co-processors that can be used to pass arguments in
4008 registers. Idealy no arugment should be a candidate for more than
4009 one co-processor table entry, but the table is processed in order
4010 and stops after the first match. If that entry then fails to put
4011 the argument into a co-processor register, the argument will go on
4012 the stack. */
4013 static struct
4015 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4016 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4018 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4019 BLKmode) is a candidate for this co-processor's registers; this
4020 function should ignore any position-dependent state in
4021 CUMULATIVE_ARGS and only use call-type dependent information. */
4022 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4024 /* Return true if the argument does get a co-processor register; it
4025 should set aapcs_reg to an RTX of the register allocated as is
4026 required for a return from FUNCTION_ARG. */
4027 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4029 /* Return true if a result of mode MODE (or type TYPE if MODE is
4030 BLKmode) is can be returned in this co-processor's registers. */
4031 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4033 /* Allocate and return an RTX element to hold the return type of a
4034 call, this routine must not fail and will only be called if
4035 is_return_candidate returned true with the same parameters. */
4036 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4038 /* Finish processing this argument and prepare to start processing
4039 the next one. */
4040 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4041 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4043 AAPCS_CP(vfp)
4046 #undef AAPCS_CP
4048 static int
4049 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4050 const_tree type)
4052 int i;
4054 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4055 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4056 return i;
4058 return -1;
4061 static int
4062 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4064 /* We aren't passed a decl, so we can't check that a call is local.
4065 However, it isn't clear that that would be a win anyway, since it
4066 might limit some tail-calling opportunities. */
4067 enum arm_pcs pcs_variant;
4069 if (fntype)
4071 const_tree fndecl = NULL_TREE;
4073 if (TREE_CODE (fntype) == FUNCTION_DECL)
4075 fndecl = fntype;
4076 fntype = TREE_TYPE (fntype);
4079 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4081 else
4082 pcs_variant = arm_pcs_default;
4084 if (pcs_variant != ARM_PCS_AAPCS)
4086 int i;
4088 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4089 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4090 TYPE_MODE (type),
4091 type))
4092 return i;
4094 return -1;
4097 static rtx
4098 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4099 const_tree fntype)
4101 /* We aren't passed a decl, so we can't check that a call is local.
4102 However, it isn't clear that that would be a win anyway, since it
4103 might limit some tail-calling opportunities. */
4104 enum arm_pcs pcs_variant;
4105 int unsignedp ATTRIBUTE_UNUSED;
4107 if (fntype)
4109 const_tree fndecl = NULL_TREE;
4111 if (TREE_CODE (fntype) == FUNCTION_DECL)
4113 fndecl = fntype;
4114 fntype = TREE_TYPE (fntype);
4117 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4119 else
4120 pcs_variant = arm_pcs_default;
4122 /* Promote integer types. */
4123 if (type && INTEGRAL_TYPE_P (type))
4124 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4126 if (pcs_variant != ARM_PCS_AAPCS)
4128 int i;
4130 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4131 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4132 type))
4133 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4134 mode, type);
4137 /* Promotes small structs returned in a register to full-word size
4138 for big-endian AAPCS. */
4139 if (type && arm_return_in_msb (type))
4141 HOST_WIDE_INT size = int_size_in_bytes (type);
4142 if (size % UNITS_PER_WORD != 0)
4144 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4145 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4149 return gen_rtx_REG (mode, R0_REGNUM);
4153 aapcs_libcall_value (enum machine_mode mode)
4155 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4158 /* Lay out a function argument using the AAPCS rules. The rule
4159 numbers referred to here are those in the AAPCS. */
4160 static void
4161 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4162 const_tree type, bool named)
4164 int nregs, nregs2;
4165 int ncrn;
4167 /* We only need to do this once per argument. */
4168 if (pcum->aapcs_arg_processed)
4169 return;
4171 pcum->aapcs_arg_processed = true;
4173 /* Special case: if named is false then we are handling an incoming
4174 anonymous argument which is on the stack. */
4175 if (!named)
4176 return;
4178 /* Is this a potential co-processor register candidate? */
4179 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4181 int slot = aapcs_select_call_coproc (pcum, mode, type);
4182 pcum->aapcs_cprc_slot = slot;
4184 /* We don't have to apply any of the rules from part B of the
4185 preparation phase, these are handled elsewhere in the
4186 compiler. */
4188 if (slot >= 0)
4190 /* A Co-processor register candidate goes either in its own
4191 class of registers or on the stack. */
4192 if (!pcum->aapcs_cprc_failed[slot])
4194 /* C1.cp - Try to allocate the argument to co-processor
4195 registers. */
4196 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4197 return;
4199 /* C2.cp - Put the argument on the stack and note that we
4200 can't assign any more candidates in this slot. We also
4201 need to note that we have allocated stack space, so that
4202 we won't later try to split a non-cprc candidate between
4203 core registers and the stack. */
4204 pcum->aapcs_cprc_failed[slot] = true;
4205 pcum->can_split = false;
4208 /* We didn't get a register, so this argument goes on the
4209 stack. */
4210 gcc_assert (pcum->can_split == false);
4211 return;
4215 /* C3 - For double-word aligned arguments, round the NCRN up to the
4216 next even number. */
4217 ncrn = pcum->aapcs_ncrn;
4218 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4219 ncrn++;
4221 nregs = ARM_NUM_REGS2(mode, type);
4223 /* Sigh, this test should really assert that nregs > 0, but a GCC
4224 extension allows empty structs and then gives them empty size; it
4225 then allows such a structure to be passed by value. For some of
4226 the code below we have to pretend that such an argument has
4227 non-zero size so that we 'locate' it correctly either in
4228 registers or on the stack. */
4229 gcc_assert (nregs >= 0);
4231 nregs2 = nregs ? nregs : 1;
4233 /* C4 - Argument fits entirely in core registers. */
4234 if (ncrn + nregs2 <= NUM_ARG_REGS)
4236 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4237 pcum->aapcs_next_ncrn = ncrn + nregs;
4238 return;
4241 /* C5 - Some core registers left and there are no arguments already
4242 on the stack: split this argument between the remaining core
4243 registers and the stack. */
4244 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4246 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4247 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4248 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4249 return;
4252 /* C6 - NCRN is set to 4. */
4253 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4255 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4256 return;
4259 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4260 for a call to a function whose data type is FNTYPE.
4261 For a library call, FNTYPE is NULL. */
4262 void
4263 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4264 rtx libname,
4265 tree fndecl ATTRIBUTE_UNUSED)
4267 /* Long call handling. */
4268 if (fntype)
4269 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4270 else
4271 pcum->pcs_variant = arm_pcs_default;
4273 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4275 if (arm_libcall_uses_aapcs_base (libname))
4276 pcum->pcs_variant = ARM_PCS_AAPCS;
4278 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4279 pcum->aapcs_reg = NULL_RTX;
4280 pcum->aapcs_partial = 0;
4281 pcum->aapcs_arg_processed = false;
4282 pcum->aapcs_cprc_slot = -1;
4283 pcum->can_split = true;
4285 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4287 int i;
4289 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4291 pcum->aapcs_cprc_failed[i] = false;
4292 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4295 return;
4298 /* Legacy ABIs */
4300 /* On the ARM, the offset starts at 0. */
4301 pcum->nregs = 0;
4302 pcum->iwmmxt_nregs = 0;
4303 pcum->can_split = true;
4305 /* Varargs vectors are treated the same as long long.
4306 named_count avoids having to change the way arm handles 'named' */
4307 pcum->named_count = 0;
4308 pcum->nargs = 0;
4310 if (TARGET_REALLY_IWMMXT && fntype)
4312 tree fn_arg;
4314 for (fn_arg = TYPE_ARG_TYPES (fntype);
4315 fn_arg;
4316 fn_arg = TREE_CHAIN (fn_arg))
4317 pcum->named_count += 1;
4319 if (! pcum->named_count)
4320 pcum->named_count = INT_MAX;
4325 /* Return true if mode/type need doubleword alignment. */
4326 static bool
4327 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4329 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4330 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4334 /* Determine where to put an argument to a function.
4335 Value is zero to push the argument on the stack,
4336 or a hard register in which to store the argument.
4338 MODE is the argument's machine mode.
4339 TYPE is the data type of the argument (as a tree).
4340 This is null for libcalls where that information may
4341 not be available.
4342 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4343 the preceding args and about the function being called.
4344 NAMED is nonzero if this argument is a named parameter
4345 (otherwise it is an extra parameter matching an ellipsis).
4347 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4348 other arguments are passed on the stack. If (NAMED == 0) (which happens
4349 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4350 defined), say it is passed in the stack (function_prologue will
4351 indeed make it pass in the stack if necessary). */
4353 static rtx
4354 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4355 const_tree type, bool named)
4357 int nregs;
4359 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4360 a call insn (op3 of a call_value insn). */
4361 if (mode == VOIDmode)
4362 return const0_rtx;
4364 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4366 aapcs_layout_arg (pcum, mode, type, named);
4367 return pcum->aapcs_reg;
4370 /* Varargs vectors are treated the same as long long.
4371 named_count avoids having to change the way arm handles 'named' */
4372 if (TARGET_IWMMXT_ABI
4373 && arm_vector_mode_supported_p (mode)
4374 && pcum->named_count > pcum->nargs + 1)
4376 if (pcum->iwmmxt_nregs <= 9)
4377 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4378 else
4380 pcum->can_split = false;
4381 return NULL_RTX;
4385 /* Put doubleword aligned quantities in even register pairs. */
4386 if (pcum->nregs & 1
4387 && ARM_DOUBLEWORD_ALIGN
4388 && arm_needs_doubleword_align (mode, type))
4389 pcum->nregs++;
4391 /* Only allow splitting an arg between regs and memory if all preceding
4392 args were allocated to regs. For args passed by reference we only count
4393 the reference pointer. */
4394 if (pcum->can_split)
4395 nregs = 1;
4396 else
4397 nregs = ARM_NUM_REGS2 (mode, type);
4399 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4400 return NULL_RTX;
4402 return gen_rtx_REG (mode, pcum->nregs);
4405 static unsigned int
4406 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4408 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4409 ? DOUBLEWORD_ALIGNMENT
4410 : PARM_BOUNDARY);
4413 static int
4414 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4415 tree type, bool named)
4417 int nregs = pcum->nregs;
4419 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4421 aapcs_layout_arg (pcum, mode, type, named);
4422 return pcum->aapcs_partial;
4425 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4426 return 0;
4428 if (NUM_ARG_REGS > nregs
4429 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4430 && pcum->can_split)
4431 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4433 return 0;
4436 /* Update the data in PCUM to advance over an argument
4437 of mode MODE and data type TYPE.
4438 (TYPE is null for libcalls where that information may not be available.) */
4440 static void
4441 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4442 const_tree type, bool named)
4444 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4446 aapcs_layout_arg (pcum, mode, type, named);
4448 if (pcum->aapcs_cprc_slot >= 0)
4450 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4451 type);
4452 pcum->aapcs_cprc_slot = -1;
4455 /* Generic stuff. */
4456 pcum->aapcs_arg_processed = false;
4457 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4458 pcum->aapcs_reg = NULL_RTX;
4459 pcum->aapcs_partial = 0;
4461 else
4463 pcum->nargs += 1;
4464 if (arm_vector_mode_supported_p (mode)
4465 && pcum->named_count > pcum->nargs
4466 && TARGET_IWMMXT_ABI)
4467 pcum->iwmmxt_nregs += 1;
4468 else
4469 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4473 /* Variable sized types are passed by reference. This is a GCC
4474 extension to the ARM ABI. */
4476 static bool
4477 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4478 enum machine_mode mode ATTRIBUTE_UNUSED,
4479 const_tree type, bool named ATTRIBUTE_UNUSED)
4481 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4484 /* Encode the current state of the #pragma [no_]long_calls. */
4485 typedef enum
4487 OFF, /* No #pragma [no_]long_calls is in effect. */
4488 LONG, /* #pragma long_calls is in effect. */
4489 SHORT /* #pragma no_long_calls is in effect. */
4490 } arm_pragma_enum;
4492 static arm_pragma_enum arm_pragma_long_calls = OFF;
4494 void
4495 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4497 arm_pragma_long_calls = LONG;
4500 void
4501 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4503 arm_pragma_long_calls = SHORT;
4506 void
4507 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4509 arm_pragma_long_calls = OFF;
4512 /* Handle an attribute requiring a FUNCTION_DECL;
4513 arguments as in struct attribute_spec.handler. */
4514 static tree
4515 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4516 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4518 if (TREE_CODE (*node) != FUNCTION_DECL)
4520 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4521 name);
4522 *no_add_attrs = true;
4525 return NULL_TREE;
4528 /* Handle an "interrupt" or "isr" attribute;
4529 arguments as in struct attribute_spec.handler. */
4530 static tree
4531 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4532 bool *no_add_attrs)
4534 if (DECL_P (*node))
4536 if (TREE_CODE (*node) != FUNCTION_DECL)
4538 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4539 name);
4540 *no_add_attrs = true;
4542 /* FIXME: the argument if any is checked for type attributes;
4543 should it be checked for decl ones? */
4545 else
4547 if (TREE_CODE (*node) == FUNCTION_TYPE
4548 || TREE_CODE (*node) == METHOD_TYPE)
4550 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4552 warning (OPT_Wattributes, "%qE attribute ignored",
4553 name);
4554 *no_add_attrs = true;
4557 else if (TREE_CODE (*node) == POINTER_TYPE
4558 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4559 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4560 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4562 *node = build_variant_type_copy (*node);
4563 TREE_TYPE (*node) = build_type_attribute_variant
4564 (TREE_TYPE (*node),
4565 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4566 *no_add_attrs = true;
4568 else
4570 /* Possibly pass this attribute on from the type to a decl. */
4571 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4572 | (int) ATTR_FLAG_FUNCTION_NEXT
4573 | (int) ATTR_FLAG_ARRAY_NEXT))
4575 *no_add_attrs = true;
4576 return tree_cons (name, args, NULL_TREE);
4578 else
4580 warning (OPT_Wattributes, "%qE attribute ignored",
4581 name);
4586 return NULL_TREE;
4589 /* Handle a "pcs" attribute; arguments as in struct
4590 attribute_spec.handler. */
4591 static tree
4592 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4593 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4595 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4597 warning (OPT_Wattributes, "%qE attribute ignored", name);
4598 *no_add_attrs = true;
4600 return NULL_TREE;
4603 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4604 /* Handle the "notshared" attribute. This attribute is another way of
4605 requesting hidden visibility. ARM's compiler supports
4606 "__declspec(notshared)"; we support the same thing via an
4607 attribute. */
4609 static tree
4610 arm_handle_notshared_attribute (tree *node,
4611 tree name ATTRIBUTE_UNUSED,
4612 tree args ATTRIBUTE_UNUSED,
4613 int flags ATTRIBUTE_UNUSED,
4614 bool *no_add_attrs)
4616 tree decl = TYPE_NAME (*node);
4618 if (decl)
4620 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4621 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4622 *no_add_attrs = false;
4624 return NULL_TREE;
4626 #endif
4628 /* Return 0 if the attributes for two types are incompatible, 1 if they
4629 are compatible, and 2 if they are nearly compatible (which causes a
4630 warning to be generated). */
4631 static int
4632 arm_comp_type_attributes (const_tree type1, const_tree type2)
4634 int l1, l2, s1, s2;
4636 /* Check for mismatch of non-default calling convention. */
4637 if (TREE_CODE (type1) != FUNCTION_TYPE)
4638 return 1;
4640 /* Check for mismatched call attributes. */
4641 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4642 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4643 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4644 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4646 /* Only bother to check if an attribute is defined. */
4647 if (l1 | l2 | s1 | s2)
4649 /* If one type has an attribute, the other must have the same attribute. */
4650 if ((l1 != l2) || (s1 != s2))
4651 return 0;
4653 /* Disallow mixed attributes. */
4654 if ((l1 & s2) || (l2 & s1))
4655 return 0;
4658 /* Check for mismatched ISR attribute. */
4659 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4660 if (! l1)
4661 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4662 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4663 if (! l2)
4664 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4665 if (l1 != l2)
4666 return 0;
4668 return 1;
4671 /* Assigns default attributes to newly defined type. This is used to
4672 set short_call/long_call attributes for function types of
4673 functions defined inside corresponding #pragma scopes. */
4674 static void
4675 arm_set_default_type_attributes (tree type)
4677 /* Add __attribute__ ((long_call)) to all functions, when
4678 inside #pragma long_calls or __attribute__ ((short_call)),
4679 when inside #pragma no_long_calls. */
4680 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4682 tree type_attr_list, attr_name;
4683 type_attr_list = TYPE_ATTRIBUTES (type);
4685 if (arm_pragma_long_calls == LONG)
4686 attr_name = get_identifier ("long_call");
4687 else if (arm_pragma_long_calls == SHORT)
4688 attr_name = get_identifier ("short_call");
4689 else
4690 return;
4692 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4693 TYPE_ATTRIBUTES (type) = type_attr_list;
4697 /* Return true if DECL is known to be linked into section SECTION. */
4699 static bool
4700 arm_function_in_section_p (tree decl, section *section)
4702 /* We can only be certain about functions defined in the same
4703 compilation unit. */
4704 if (!TREE_STATIC (decl))
4705 return false;
4707 /* Make sure that SYMBOL always binds to the definition in this
4708 compilation unit. */
4709 if (!targetm.binds_local_p (decl))
4710 return false;
4712 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4713 if (!DECL_SECTION_NAME (decl))
4715 /* Make sure that we will not create a unique section for DECL. */
4716 if (flag_function_sections || DECL_ONE_ONLY (decl))
4717 return false;
4720 return function_section (decl) == section;
4723 /* Return nonzero if a 32-bit "long_call" should be generated for
4724 a call from the current function to DECL. We generate a long_call
4725 if the function:
4727 a. has an __attribute__((long call))
4728 or b. is within the scope of a #pragma long_calls
4729 or c. the -mlong-calls command line switch has been specified
4731 However we do not generate a long call if the function:
4733 d. has an __attribute__ ((short_call))
4734 or e. is inside the scope of a #pragma no_long_calls
4735 or f. is defined in the same section as the current function. */
4737 bool
4738 arm_is_long_call_p (tree decl)
4740 tree attrs;
4742 if (!decl)
4743 return TARGET_LONG_CALLS;
4745 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4746 if (lookup_attribute ("short_call", attrs))
4747 return false;
4749 /* For "f", be conservative, and only cater for cases in which the
4750 whole of the current function is placed in the same section. */
4751 if (!flag_reorder_blocks_and_partition
4752 && TREE_CODE (decl) == FUNCTION_DECL
4753 && arm_function_in_section_p (decl, current_function_section ()))
4754 return false;
4756 if (lookup_attribute ("long_call", attrs))
4757 return true;
4759 return TARGET_LONG_CALLS;
4762 /* Return nonzero if it is ok to make a tail-call to DECL. */
4763 static bool
4764 arm_function_ok_for_sibcall (tree decl, tree exp)
4766 unsigned long func_type;
4768 if (cfun->machine->sibcall_blocked)
4769 return false;
4771 /* Never tailcall something for which we have no decl, or if we
4772 are generating code for Thumb-1. */
4773 if (decl == NULL || TARGET_THUMB1)
4774 return false;
4776 /* The PIC register is live on entry to VxWorks PLT entries, so we
4777 must make the call before restoring the PIC register. */
4778 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4779 return false;
4781 /* Cannot tail-call to long calls, since these are out of range of
4782 a branch instruction. */
4783 if (arm_is_long_call_p (decl))
4784 return false;
4786 /* If we are interworking and the function is not declared static
4787 then we can't tail-call it unless we know that it exists in this
4788 compilation unit (since it might be a Thumb routine). */
4789 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4790 return false;
4792 func_type = arm_current_func_type ();
4793 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4794 if (IS_INTERRUPT (func_type))
4795 return false;
4797 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4799 /* Check that the return value locations are the same. For
4800 example that we aren't returning a value from the sibling in
4801 a VFP register but then need to transfer it to a core
4802 register. */
4803 rtx a, b;
4805 a = arm_function_value (TREE_TYPE (exp), decl, false);
4806 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4807 cfun->decl, false);
4808 if (!rtx_equal_p (a, b))
4809 return false;
4812 /* Never tailcall if function may be called with a misaligned SP. */
4813 if (IS_STACKALIGN (func_type))
4814 return false;
4816 /* Everything else is ok. */
4817 return true;
4821 /* Addressing mode support functions. */
4823 /* Return nonzero if X is a legitimate immediate operand when compiling
4824 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4826 legitimate_pic_operand_p (rtx x)
4828 if (GET_CODE (x) == SYMBOL_REF
4829 || (GET_CODE (x) == CONST
4830 && GET_CODE (XEXP (x, 0)) == PLUS
4831 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4832 return 0;
4834 return 1;
4837 /* Record that the current function needs a PIC register. Initialize
4838 cfun->machine->pic_reg if we have not already done so. */
4840 static void
4841 require_pic_register (void)
4843 /* A lot of the logic here is made obscure by the fact that this
4844 routine gets called as part of the rtx cost estimation process.
4845 We don't want those calls to affect any assumptions about the real
4846 function; and further, we can't call entry_of_function() until we
4847 start the real expansion process. */
4848 if (!crtl->uses_pic_offset_table)
4850 gcc_assert (can_create_pseudo_p ());
4851 if (arm_pic_register != INVALID_REGNUM)
4853 if (!cfun->machine->pic_reg)
4854 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4856 /* Play games to avoid marking the function as needing pic
4857 if we are being called as part of the cost-estimation
4858 process. */
4859 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4860 crtl->uses_pic_offset_table = 1;
4862 else
4864 rtx seq, insn;
4866 if (!cfun->machine->pic_reg)
4867 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4869 /* Play games to avoid marking the function as needing pic
4870 if we are being called as part of the cost-estimation
4871 process. */
4872 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4874 crtl->uses_pic_offset_table = 1;
4875 start_sequence ();
4877 arm_load_pic_register (0UL);
4879 seq = get_insns ();
4880 end_sequence ();
4882 for (insn = seq; insn; insn = NEXT_INSN (insn))
4883 if (INSN_P (insn))
4884 INSN_LOCATOR (insn) = prologue_locator;
4886 /* We can be called during expansion of PHI nodes, where
4887 we can't yet emit instructions directly in the final
4888 insn stream. Queue the insns on the entry edge, they will
4889 be committed after everything else is expanded. */
4890 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4897 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4899 if (GET_CODE (orig) == SYMBOL_REF
4900 || GET_CODE (orig) == LABEL_REF)
4902 rtx insn;
4904 if (reg == 0)
4906 gcc_assert (can_create_pseudo_p ());
4907 reg = gen_reg_rtx (Pmode);
4910 /* VxWorks does not impose a fixed gap between segments; the run-time
4911 gap can be different from the object-file gap. We therefore can't
4912 use GOTOFF unless we are absolutely sure that the symbol is in the
4913 same segment as the GOT. Unfortunately, the flexibility of linker
4914 scripts means that we can't be sure of that in general, so assume
4915 that GOTOFF is never valid on VxWorks. */
4916 if ((GET_CODE (orig) == LABEL_REF
4917 || (GET_CODE (orig) == SYMBOL_REF &&
4918 SYMBOL_REF_LOCAL_P (orig)))
4919 && NEED_GOT_RELOC
4920 && !TARGET_VXWORKS_RTP)
4921 insn = arm_pic_static_addr (orig, reg);
4922 else
4924 rtx pat;
4925 rtx mem;
4927 /* If this function doesn't have a pic register, create one now. */
4928 require_pic_register ();
4930 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
4932 /* Make the MEM as close to a constant as possible. */
4933 mem = SET_SRC (pat);
4934 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
4935 MEM_READONLY_P (mem) = 1;
4936 MEM_NOTRAP_P (mem) = 1;
4938 insn = emit_insn (pat);
4941 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4942 by loop. */
4943 set_unique_reg_note (insn, REG_EQUAL, orig);
4945 return reg;
4947 else if (GET_CODE (orig) == CONST)
4949 rtx base, offset;
4951 if (GET_CODE (XEXP (orig, 0)) == PLUS
4952 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4953 return orig;
4955 /* Handle the case where we have: const (UNSPEC_TLS). */
4956 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4957 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4958 return orig;
4960 /* Handle the case where we have:
4961 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4962 CONST_INT. */
4963 if (GET_CODE (XEXP (orig, 0)) == PLUS
4964 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4965 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4967 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4968 return orig;
4971 if (reg == 0)
4973 gcc_assert (can_create_pseudo_p ());
4974 reg = gen_reg_rtx (Pmode);
4977 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4979 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4980 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4981 base == reg ? 0 : reg);
4983 if (GET_CODE (offset) == CONST_INT)
4985 /* The base register doesn't really matter, we only want to
4986 test the index for the appropriate mode. */
4987 if (!arm_legitimate_index_p (mode, offset, SET, 0))
4989 gcc_assert (can_create_pseudo_p ());
4990 offset = force_reg (Pmode, offset);
4993 if (GET_CODE (offset) == CONST_INT)
4994 return plus_constant (base, INTVAL (offset));
4997 if (GET_MODE_SIZE (mode) > 4
4998 && (GET_MODE_CLASS (mode) == MODE_INT
4999 || TARGET_SOFT_FLOAT))
5001 emit_insn (gen_addsi3 (reg, base, offset));
5002 return reg;
5005 return gen_rtx_PLUS (Pmode, base, offset);
5008 return orig;
5012 /* Find a spare register to use during the prolog of a function. */
5014 static int
5015 thumb_find_work_register (unsigned long pushed_regs_mask)
5017 int reg;
5019 /* Check the argument registers first as these are call-used. The
5020 register allocation order means that sometimes r3 might be used
5021 but earlier argument registers might not, so check them all. */
5022 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5023 if (!df_regs_ever_live_p (reg))
5024 return reg;
5026 /* Before going on to check the call-saved registers we can try a couple
5027 more ways of deducing that r3 is available. The first is when we are
5028 pushing anonymous arguments onto the stack and we have less than 4
5029 registers worth of fixed arguments(*). In this case r3 will be part of
5030 the variable argument list and so we can be sure that it will be
5031 pushed right at the start of the function. Hence it will be available
5032 for the rest of the prologue.
5033 (*): ie crtl->args.pretend_args_size is greater than 0. */
5034 if (cfun->machine->uses_anonymous_args
5035 && crtl->args.pretend_args_size > 0)
5036 return LAST_ARG_REGNUM;
5038 /* The other case is when we have fixed arguments but less than 4 registers
5039 worth. In this case r3 might be used in the body of the function, but
5040 it is not being used to convey an argument into the function. In theory
5041 we could just check crtl->args.size to see how many bytes are
5042 being passed in argument registers, but it seems that it is unreliable.
5043 Sometimes it will have the value 0 when in fact arguments are being
5044 passed. (See testcase execute/20021111-1.c for an example). So we also
5045 check the args_info.nregs field as well. The problem with this field is
5046 that it makes no allowances for arguments that are passed to the
5047 function but which are not used. Hence we could miss an opportunity
5048 when a function has an unused argument in r3. But it is better to be
5049 safe than to be sorry. */
5050 if (! cfun->machine->uses_anonymous_args
5051 && crtl->args.size >= 0
5052 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5053 && crtl->args.info.nregs < 4)
5054 return LAST_ARG_REGNUM;
5056 /* Otherwise look for a call-saved register that is going to be pushed. */
5057 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5058 if (pushed_regs_mask & (1 << reg))
5059 return reg;
5061 if (TARGET_THUMB2)
5063 /* Thumb-2 can use high regs. */
5064 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5065 if (pushed_regs_mask & (1 << reg))
5066 return reg;
5068 /* Something went wrong - thumb_compute_save_reg_mask()
5069 should have arranged for a suitable register to be pushed. */
5070 gcc_unreachable ();
5073 static GTY(()) int pic_labelno;
5075 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5076 low register. */
5078 void
5079 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5081 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5083 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5084 return;
5086 gcc_assert (flag_pic);
5088 pic_reg = cfun->machine->pic_reg;
5089 if (TARGET_VXWORKS_RTP)
5091 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5092 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5093 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5095 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5097 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5098 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5100 else
5102 /* We use an UNSPEC rather than a LABEL_REF because this label
5103 never appears in the code stream. */
5105 labelno = GEN_INT (pic_labelno++);
5106 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5107 l1 = gen_rtx_CONST (VOIDmode, l1);
5109 /* On the ARM the PC register contains 'dot + 8' at the time of the
5110 addition, on the Thumb it is 'dot + 4'. */
5111 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5112 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5113 UNSPEC_GOTSYM_OFF);
5114 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5116 if (TARGET_32BIT)
5118 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5119 if (TARGET_ARM)
5120 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5121 else
5122 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5124 else /* TARGET_THUMB1 */
5126 if (arm_pic_register != INVALID_REGNUM
5127 && REGNO (pic_reg) > LAST_LO_REGNUM)
5129 /* We will have pushed the pic register, so we should always be
5130 able to find a work register. */
5131 pic_tmp = gen_rtx_REG (SImode,
5132 thumb_find_work_register (saved_regs));
5133 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5134 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5136 else
5137 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5138 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5142 /* Need to emit this whether or not we obey regdecls,
5143 since setjmp/longjmp can cause life info to screw up. */
5144 emit_use (pic_reg);
5147 /* Generate code to load the address of a static var when flag_pic is set. */
5148 static rtx
5149 arm_pic_static_addr (rtx orig, rtx reg)
5151 rtx l1, labelno, offset_rtx, insn;
5153 gcc_assert (flag_pic);
5155 /* We use an UNSPEC rather than a LABEL_REF because this label
5156 never appears in the code stream. */
5157 labelno = GEN_INT (pic_labelno++);
5158 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5159 l1 = gen_rtx_CONST (VOIDmode, l1);
5161 /* On the ARM the PC register contains 'dot + 8' at the time of the
5162 addition, on the Thumb it is 'dot + 4'. */
5163 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5164 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5165 UNSPEC_SYMBOL_OFFSET);
5166 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5168 if (TARGET_32BIT)
5170 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5171 if (TARGET_ARM)
5172 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5173 else
5174 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5176 else /* TARGET_THUMB1 */
5178 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5179 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5182 return insn;
5185 /* Return nonzero if X is valid as an ARM state addressing register. */
5186 static int
5187 arm_address_register_rtx_p (rtx x, int strict_p)
5189 int regno;
5191 if (GET_CODE (x) != REG)
5192 return 0;
5194 regno = REGNO (x);
5196 if (strict_p)
5197 return ARM_REGNO_OK_FOR_BASE_P (regno);
5199 return (regno <= LAST_ARM_REGNUM
5200 || regno >= FIRST_PSEUDO_REGISTER
5201 || regno == FRAME_POINTER_REGNUM
5202 || regno == ARG_POINTER_REGNUM);
5205 /* Return TRUE if this rtx is the difference of a symbol and a label,
5206 and will reduce to a PC-relative relocation in the object file.
5207 Expressions like this can be left alone when generating PIC, rather
5208 than forced through the GOT. */
5209 static int
5210 pcrel_constant_p (rtx x)
5212 if (GET_CODE (x) == MINUS)
5213 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5215 return FALSE;
5218 /* Return true if X will surely end up in an index register after next
5219 splitting pass. */
5220 static bool
5221 will_be_in_index_register (const_rtx x)
5223 /* arm.md: calculate_pic_address will split this into a register. */
5224 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5227 /* Return nonzero if X is a valid ARM state address operand. */
5229 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5230 int strict_p)
5232 bool use_ldrd;
5233 enum rtx_code code = GET_CODE (x);
5235 if (arm_address_register_rtx_p (x, strict_p))
5236 return 1;
5238 use_ldrd = (TARGET_LDRD
5239 && (mode == DImode
5240 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5242 if (code == POST_INC || code == PRE_DEC
5243 || ((code == PRE_INC || code == POST_DEC)
5244 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5245 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5247 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5248 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5249 && GET_CODE (XEXP (x, 1)) == PLUS
5250 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5252 rtx addend = XEXP (XEXP (x, 1), 1);
5254 /* Don't allow ldrd post increment by register because it's hard
5255 to fixup invalid register choices. */
5256 if (use_ldrd
5257 && GET_CODE (x) == POST_MODIFY
5258 && GET_CODE (addend) == REG)
5259 return 0;
5261 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5262 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5265 /* After reload constants split into minipools will have addresses
5266 from a LABEL_REF. */
5267 else if (reload_completed
5268 && (code == LABEL_REF
5269 || (code == CONST
5270 && GET_CODE (XEXP (x, 0)) == PLUS
5271 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5272 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5273 return 1;
5275 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5276 return 0;
5278 else if (code == PLUS)
5280 rtx xop0 = XEXP (x, 0);
5281 rtx xop1 = XEXP (x, 1);
5283 return ((arm_address_register_rtx_p (xop0, strict_p)
5284 && ((GET_CODE(xop1) == CONST_INT
5285 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5286 || (!strict_p && will_be_in_index_register (xop1))))
5287 || (arm_address_register_rtx_p (xop1, strict_p)
5288 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5291 #if 0
5292 /* Reload currently can't handle MINUS, so disable this for now */
5293 else if (GET_CODE (x) == MINUS)
5295 rtx xop0 = XEXP (x, 0);
5296 rtx xop1 = XEXP (x, 1);
5298 return (arm_address_register_rtx_p (xop0, strict_p)
5299 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5301 #endif
5303 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5304 && code == SYMBOL_REF
5305 && CONSTANT_POOL_ADDRESS_P (x)
5306 && ! (flag_pic
5307 && symbol_mentioned_p (get_pool_constant (x))
5308 && ! pcrel_constant_p (get_pool_constant (x))))
5309 return 1;
5311 return 0;
5314 /* Return nonzero if X is a valid Thumb-2 address operand. */
5315 static int
5316 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5318 bool use_ldrd;
5319 enum rtx_code code = GET_CODE (x);
5321 if (arm_address_register_rtx_p (x, strict_p))
5322 return 1;
5324 use_ldrd = (TARGET_LDRD
5325 && (mode == DImode
5326 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5328 if (code == POST_INC || code == PRE_DEC
5329 || ((code == PRE_INC || code == POST_DEC)
5330 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5331 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5333 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5334 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5335 && GET_CODE (XEXP (x, 1)) == PLUS
5336 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5338 /* Thumb-2 only has autoincrement by constant. */
5339 rtx addend = XEXP (XEXP (x, 1), 1);
5340 HOST_WIDE_INT offset;
5342 if (GET_CODE (addend) != CONST_INT)
5343 return 0;
5345 offset = INTVAL(addend);
5346 if (GET_MODE_SIZE (mode) <= 4)
5347 return (offset > -256 && offset < 256);
5349 return (use_ldrd && offset > -1024 && offset < 1024
5350 && (offset & 3) == 0);
5353 /* After reload constants split into minipools will have addresses
5354 from a LABEL_REF. */
5355 else if (reload_completed
5356 && (code == LABEL_REF
5357 || (code == CONST
5358 && GET_CODE (XEXP (x, 0)) == PLUS
5359 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5360 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5361 return 1;
5363 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5364 return 0;
5366 else if (code == PLUS)
5368 rtx xop0 = XEXP (x, 0);
5369 rtx xop1 = XEXP (x, 1);
5371 return ((arm_address_register_rtx_p (xop0, strict_p)
5372 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5373 || (!strict_p && will_be_in_index_register (xop1))))
5374 || (arm_address_register_rtx_p (xop1, strict_p)
5375 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5378 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5379 && code == SYMBOL_REF
5380 && CONSTANT_POOL_ADDRESS_P (x)
5381 && ! (flag_pic
5382 && symbol_mentioned_p (get_pool_constant (x))
5383 && ! pcrel_constant_p (get_pool_constant (x))))
5384 return 1;
5386 return 0;
5389 /* Return nonzero if INDEX is valid for an address index operand in
5390 ARM state. */
5391 static int
5392 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5393 int strict_p)
5395 HOST_WIDE_INT range;
5396 enum rtx_code code = GET_CODE (index);
5398 /* Standard coprocessor addressing modes. */
5399 if (TARGET_HARD_FLOAT
5400 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5401 && (mode == SFmode || mode == DFmode
5402 || (TARGET_MAVERICK && mode == DImode)))
5403 return (code == CONST_INT && INTVAL (index) < 1024
5404 && INTVAL (index) > -1024
5405 && (INTVAL (index) & 3) == 0);
5407 /* For quad modes, we restrict the constant offset to be slightly less
5408 than what the instruction format permits. We do this because for
5409 quad mode moves, we will actually decompose them into two separate
5410 double-mode reads or writes. INDEX must therefore be a valid
5411 (double-mode) offset and so should INDEX+8. */
5412 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5413 return (code == CONST_INT
5414 && INTVAL (index) < 1016
5415 && INTVAL (index) > -1024
5416 && (INTVAL (index) & 3) == 0);
5418 /* We have no such constraint on double mode offsets, so we permit the
5419 full range of the instruction format. */
5420 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5421 return (code == CONST_INT
5422 && INTVAL (index) < 1024
5423 && INTVAL (index) > -1024
5424 && (INTVAL (index) & 3) == 0);
5426 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5427 return (code == CONST_INT
5428 && INTVAL (index) < 1024
5429 && INTVAL (index) > -1024
5430 && (INTVAL (index) & 3) == 0);
5432 if (arm_address_register_rtx_p (index, strict_p)
5433 && (GET_MODE_SIZE (mode) <= 4))
5434 return 1;
5436 if (mode == DImode || mode == DFmode)
5438 if (code == CONST_INT)
5440 HOST_WIDE_INT val = INTVAL (index);
5442 if (TARGET_LDRD)
5443 return val > -256 && val < 256;
5444 else
5445 return val > -4096 && val < 4092;
5448 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5451 if (GET_MODE_SIZE (mode) <= 4
5452 && ! (arm_arch4
5453 && (mode == HImode
5454 || mode == HFmode
5455 || (mode == QImode && outer == SIGN_EXTEND))))
5457 if (code == MULT)
5459 rtx xiop0 = XEXP (index, 0);
5460 rtx xiop1 = XEXP (index, 1);
5462 return ((arm_address_register_rtx_p (xiop0, strict_p)
5463 && power_of_two_operand (xiop1, SImode))
5464 || (arm_address_register_rtx_p (xiop1, strict_p)
5465 && power_of_two_operand (xiop0, SImode)));
5467 else if (code == LSHIFTRT || code == ASHIFTRT
5468 || code == ASHIFT || code == ROTATERT)
5470 rtx op = XEXP (index, 1);
5472 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5473 && GET_CODE (op) == CONST_INT
5474 && INTVAL (op) > 0
5475 && INTVAL (op) <= 31);
5479 /* For ARM v4 we may be doing a sign-extend operation during the
5480 load. */
5481 if (arm_arch4)
5483 if (mode == HImode
5484 || mode == HFmode
5485 || (outer == SIGN_EXTEND && mode == QImode))
5486 range = 256;
5487 else
5488 range = 4096;
5490 else
5491 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5493 return (code == CONST_INT
5494 && INTVAL (index) < range
5495 && INTVAL (index) > -range);
5498 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5499 index operand. i.e. 1, 2, 4 or 8. */
5500 static bool
5501 thumb2_index_mul_operand (rtx op)
5503 HOST_WIDE_INT val;
5505 if (GET_CODE(op) != CONST_INT)
5506 return false;
5508 val = INTVAL(op);
5509 return (val == 1 || val == 2 || val == 4 || val == 8);
5512 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5513 static int
5514 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5516 enum rtx_code code = GET_CODE (index);
5518 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5519 /* Standard coprocessor addressing modes. */
5520 if (TARGET_HARD_FLOAT
5521 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5522 && (mode == SFmode || mode == DFmode
5523 || (TARGET_MAVERICK && mode == DImode)))
5524 return (code == CONST_INT && INTVAL (index) < 1024
5525 /* Thumb-2 allows only > -256 index range for it's core register
5526 load/stores. Since we allow SF/DF in core registers, we have
5527 to use the intersection between -256~4096 (core) and -1024~1024
5528 (coprocessor). */
5529 && INTVAL (index) > -256
5530 && (INTVAL (index) & 3) == 0);
5532 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5534 /* For DImode assume values will usually live in core regs
5535 and only allow LDRD addressing modes. */
5536 if (!TARGET_LDRD || mode != DImode)
5537 return (code == CONST_INT
5538 && INTVAL (index) < 1024
5539 && INTVAL (index) > -1024
5540 && (INTVAL (index) & 3) == 0);
5543 /* For quad modes, we restrict the constant offset to be slightly less
5544 than what the instruction format permits. We do this because for
5545 quad mode moves, we will actually decompose them into two separate
5546 double-mode reads or writes. INDEX must therefore be a valid
5547 (double-mode) offset and so should INDEX+8. */
5548 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5549 return (code == CONST_INT
5550 && INTVAL (index) < 1016
5551 && INTVAL (index) > -1024
5552 && (INTVAL (index) & 3) == 0);
5554 /* We have no such constraint on double mode offsets, so we permit the
5555 full range of the instruction format. */
5556 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5557 return (code == CONST_INT
5558 && INTVAL (index) < 1024
5559 && INTVAL (index) > -1024
5560 && (INTVAL (index) & 3) == 0);
5562 if (arm_address_register_rtx_p (index, strict_p)
5563 && (GET_MODE_SIZE (mode) <= 4))
5564 return 1;
5566 if (mode == DImode || mode == DFmode)
5568 if (code == CONST_INT)
5570 HOST_WIDE_INT val = INTVAL (index);
5571 /* ??? Can we assume ldrd for thumb2? */
5572 /* Thumb-2 ldrd only has reg+const addressing modes. */
5573 /* ldrd supports offsets of +-1020.
5574 However the ldr fallback does not. */
5575 return val > -256 && val < 256 && (val & 3) == 0;
5577 else
5578 return 0;
5581 if (code == MULT)
5583 rtx xiop0 = XEXP (index, 0);
5584 rtx xiop1 = XEXP (index, 1);
5586 return ((arm_address_register_rtx_p (xiop0, strict_p)
5587 && thumb2_index_mul_operand (xiop1))
5588 || (arm_address_register_rtx_p (xiop1, strict_p)
5589 && thumb2_index_mul_operand (xiop0)));
5591 else if (code == ASHIFT)
5593 rtx op = XEXP (index, 1);
5595 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5596 && GET_CODE (op) == CONST_INT
5597 && INTVAL (op) > 0
5598 && INTVAL (op) <= 3);
5601 return (code == CONST_INT
5602 && INTVAL (index) < 4096
5603 && INTVAL (index) > -256);
5606 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5607 static int
5608 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5610 int regno;
5612 if (GET_CODE (x) != REG)
5613 return 0;
5615 regno = REGNO (x);
5617 if (strict_p)
5618 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5620 return (regno <= LAST_LO_REGNUM
5621 || regno > LAST_VIRTUAL_REGISTER
5622 || regno == FRAME_POINTER_REGNUM
5623 || (GET_MODE_SIZE (mode) >= 4
5624 && (regno == STACK_POINTER_REGNUM
5625 || regno >= FIRST_PSEUDO_REGISTER
5626 || x == hard_frame_pointer_rtx
5627 || x == arg_pointer_rtx)));
5630 /* Return nonzero if x is a legitimate index register. This is the case
5631 for any base register that can access a QImode object. */
5632 inline static int
5633 thumb1_index_register_rtx_p (rtx x, int strict_p)
5635 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5638 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5640 The AP may be eliminated to either the SP or the FP, so we use the
5641 least common denominator, e.g. SImode, and offsets from 0 to 64.
5643 ??? Verify whether the above is the right approach.
5645 ??? Also, the FP may be eliminated to the SP, so perhaps that
5646 needs special handling also.
5648 ??? Look at how the mips16 port solves this problem. It probably uses
5649 better ways to solve some of these problems.
5651 Although it is not incorrect, we don't accept QImode and HImode
5652 addresses based on the frame pointer or arg pointer until the
5653 reload pass starts. This is so that eliminating such addresses
5654 into stack based ones won't produce impossible code. */
5656 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5658 /* ??? Not clear if this is right. Experiment. */
5659 if (GET_MODE_SIZE (mode) < 4
5660 && !(reload_in_progress || reload_completed)
5661 && (reg_mentioned_p (frame_pointer_rtx, x)
5662 || reg_mentioned_p (arg_pointer_rtx, x)
5663 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5664 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5665 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5666 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5667 return 0;
5669 /* Accept any base register. SP only in SImode or larger. */
5670 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5671 return 1;
5673 /* This is PC relative data before arm_reorg runs. */
5674 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5675 && GET_CODE (x) == SYMBOL_REF
5676 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5677 return 1;
5679 /* This is PC relative data after arm_reorg runs. */
5680 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5681 && reload_completed
5682 && (GET_CODE (x) == LABEL_REF
5683 || (GET_CODE (x) == CONST
5684 && GET_CODE (XEXP (x, 0)) == PLUS
5685 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5686 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5687 return 1;
5689 /* Post-inc indexing only supported for SImode and larger. */
5690 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5691 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5692 return 1;
5694 else if (GET_CODE (x) == PLUS)
5696 /* REG+REG address can be any two index registers. */
5697 /* We disallow FRAME+REG addressing since we know that FRAME
5698 will be replaced with STACK, and SP relative addressing only
5699 permits SP+OFFSET. */
5700 if (GET_MODE_SIZE (mode) <= 4
5701 && XEXP (x, 0) != frame_pointer_rtx
5702 && XEXP (x, 1) != frame_pointer_rtx
5703 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5704 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5705 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5706 return 1;
5708 /* REG+const has 5-7 bit offset for non-SP registers. */
5709 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5710 || XEXP (x, 0) == arg_pointer_rtx)
5711 && GET_CODE (XEXP (x, 1)) == CONST_INT
5712 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5713 return 1;
5715 /* REG+const has 10-bit offset for SP, but only SImode and
5716 larger is supported. */
5717 /* ??? Should probably check for DI/DFmode overflow here
5718 just like GO_IF_LEGITIMATE_OFFSET does. */
5719 else if (GET_CODE (XEXP (x, 0)) == REG
5720 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5721 && GET_MODE_SIZE (mode) >= 4
5722 && GET_CODE (XEXP (x, 1)) == CONST_INT
5723 && INTVAL (XEXP (x, 1)) >= 0
5724 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5725 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5726 return 1;
5728 else if (GET_CODE (XEXP (x, 0)) == REG
5729 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5730 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5731 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5732 && REGNO (XEXP (x, 0))
5733 <= LAST_VIRTUAL_POINTER_REGISTER))
5734 && GET_MODE_SIZE (mode) >= 4
5735 && GET_CODE (XEXP (x, 1)) == CONST_INT
5736 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5737 return 1;
5740 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5741 && GET_MODE_SIZE (mode) == 4
5742 && GET_CODE (x) == SYMBOL_REF
5743 && CONSTANT_POOL_ADDRESS_P (x)
5744 && ! (flag_pic
5745 && symbol_mentioned_p (get_pool_constant (x))
5746 && ! pcrel_constant_p (get_pool_constant (x))))
5747 return 1;
5749 return 0;
5752 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5753 instruction of mode MODE. */
5755 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5757 switch (GET_MODE_SIZE (mode))
5759 case 1:
5760 return val >= 0 && val < 32;
5762 case 2:
5763 return val >= 0 && val < 64 && (val & 1) == 0;
5765 default:
5766 return (val >= 0
5767 && (val + GET_MODE_SIZE (mode)) <= 128
5768 && (val & 3) == 0);
5772 bool
5773 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5775 if (TARGET_ARM)
5776 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5777 else if (TARGET_THUMB2)
5778 return thumb2_legitimate_address_p (mode, x, strict_p);
5779 else /* if (TARGET_THUMB1) */
5780 return thumb1_legitimate_address_p (mode, x, strict_p);
5783 /* Build the SYMBOL_REF for __tls_get_addr. */
5785 static GTY(()) rtx tls_get_addr_libfunc;
5787 static rtx
5788 get_tls_get_addr (void)
5790 if (!tls_get_addr_libfunc)
5791 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5792 return tls_get_addr_libfunc;
5795 static rtx
5796 arm_load_tp (rtx target)
5798 if (!target)
5799 target = gen_reg_rtx (SImode);
5801 if (TARGET_HARD_TP)
5803 /* Can return in any reg. */
5804 emit_insn (gen_load_tp_hard (target));
5806 else
5808 /* Always returned in r0. Immediately copy the result into a pseudo,
5809 otherwise other uses of r0 (e.g. setting up function arguments) may
5810 clobber the value. */
5812 rtx tmp;
5814 emit_insn (gen_load_tp_soft ());
5816 tmp = gen_rtx_REG (SImode, 0);
5817 emit_move_insn (target, tmp);
5819 return target;
5822 static rtx
5823 load_tls_operand (rtx x, rtx reg)
5825 rtx tmp;
5827 if (reg == NULL_RTX)
5828 reg = gen_reg_rtx (SImode);
5830 tmp = gen_rtx_CONST (SImode, x);
5832 emit_move_insn (reg, tmp);
5834 return reg;
5837 static rtx
5838 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5840 rtx insns, label, labelno, sum;
5842 start_sequence ();
5844 labelno = GEN_INT (pic_labelno++);
5845 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5846 label = gen_rtx_CONST (VOIDmode, label);
5848 sum = gen_rtx_UNSPEC (Pmode,
5849 gen_rtvec (4, x, GEN_INT (reloc), label,
5850 GEN_INT (TARGET_ARM ? 8 : 4)),
5851 UNSPEC_TLS);
5852 reg = load_tls_operand (sum, reg);
5854 if (TARGET_ARM)
5855 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5856 else if (TARGET_THUMB2)
5857 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5858 else /* TARGET_THUMB1 */
5859 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5861 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5862 Pmode, 1, reg, Pmode);
5864 insns = get_insns ();
5865 end_sequence ();
5867 return insns;
5871 legitimize_tls_address (rtx x, rtx reg)
5873 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5874 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5876 switch (model)
5878 case TLS_MODEL_GLOBAL_DYNAMIC:
5879 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5880 dest = gen_reg_rtx (Pmode);
5881 emit_libcall_block (insns, dest, ret, x);
5882 return dest;
5884 case TLS_MODEL_LOCAL_DYNAMIC:
5885 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5887 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5888 share the LDM result with other LD model accesses. */
5889 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5890 UNSPEC_TLS);
5891 dest = gen_reg_rtx (Pmode);
5892 emit_libcall_block (insns, dest, ret, eqv);
5894 /* Load the addend. */
5895 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5896 UNSPEC_TLS);
5897 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5898 return gen_rtx_PLUS (Pmode, dest, addend);
5900 case TLS_MODEL_INITIAL_EXEC:
5901 labelno = GEN_INT (pic_labelno++);
5902 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5903 label = gen_rtx_CONST (VOIDmode, label);
5904 sum = gen_rtx_UNSPEC (Pmode,
5905 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5906 GEN_INT (TARGET_ARM ? 8 : 4)),
5907 UNSPEC_TLS);
5908 reg = load_tls_operand (sum, reg);
5910 if (TARGET_ARM)
5911 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5912 else if (TARGET_THUMB2)
5913 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5914 else
5916 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5917 emit_move_insn (reg, gen_const_mem (SImode, reg));
5920 tp = arm_load_tp (NULL_RTX);
5922 return gen_rtx_PLUS (Pmode, tp, reg);
5924 case TLS_MODEL_LOCAL_EXEC:
5925 tp = arm_load_tp (NULL_RTX);
5927 reg = gen_rtx_UNSPEC (Pmode,
5928 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5929 UNSPEC_TLS);
5930 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5932 return gen_rtx_PLUS (Pmode, tp, reg);
5934 default:
5935 abort ();
5939 /* Try machine-dependent ways of modifying an illegitimate address
5940 to be legitimate. If we find one, return the new, valid address. */
5942 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5944 if (!TARGET_ARM)
5946 /* TODO: legitimize_address for Thumb2. */
5947 if (TARGET_THUMB2)
5948 return x;
5949 return thumb_legitimize_address (x, orig_x, mode);
5952 if (arm_tls_symbol_p (x))
5953 return legitimize_tls_address (x, NULL_RTX);
5955 if (GET_CODE (x) == PLUS)
5957 rtx xop0 = XEXP (x, 0);
5958 rtx xop1 = XEXP (x, 1);
5960 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5961 xop0 = force_reg (SImode, xop0);
5963 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5964 xop1 = force_reg (SImode, xop1);
5966 if (ARM_BASE_REGISTER_RTX_P (xop0)
5967 && GET_CODE (xop1) == CONST_INT)
5969 HOST_WIDE_INT n, low_n;
5970 rtx base_reg, val;
5971 n = INTVAL (xop1);
5973 /* VFP addressing modes actually allow greater offsets, but for
5974 now we just stick with the lowest common denominator. */
5975 if (mode == DImode
5976 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5978 low_n = n & 0x0f;
5979 n &= ~0x0f;
5980 if (low_n > 4)
5982 n += 16;
5983 low_n -= 16;
5986 else
5988 low_n = ((mode) == TImode ? 0
5989 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5990 n -= low_n;
5993 base_reg = gen_reg_rtx (SImode);
5994 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5995 emit_move_insn (base_reg, val);
5996 x = plus_constant (base_reg, low_n);
5998 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5999 x = gen_rtx_PLUS (SImode, xop0, xop1);
6002 /* XXX We don't allow MINUS any more -- see comment in
6003 arm_legitimate_address_outer_p (). */
6004 else if (GET_CODE (x) == MINUS)
6006 rtx xop0 = XEXP (x, 0);
6007 rtx xop1 = XEXP (x, 1);
6009 if (CONSTANT_P (xop0))
6010 xop0 = force_reg (SImode, xop0);
6012 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6013 xop1 = force_reg (SImode, xop1);
6015 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6016 x = gen_rtx_MINUS (SImode, xop0, xop1);
6019 /* Make sure to take full advantage of the pre-indexed addressing mode
6020 with absolute addresses which often allows for the base register to
6021 be factorized for multiple adjacent memory references, and it might
6022 even allows for the mini pool to be avoided entirely. */
6023 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6025 unsigned int bits;
6026 HOST_WIDE_INT mask, base, index;
6027 rtx base_reg;
6029 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6030 use a 8-bit index. So let's use a 12-bit index for SImode only and
6031 hope that arm_gen_constant will enable ldrb to use more bits. */
6032 bits = (mode == SImode) ? 12 : 8;
6033 mask = (1 << bits) - 1;
6034 base = INTVAL (x) & ~mask;
6035 index = INTVAL (x) & mask;
6036 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6038 /* It'll most probably be more efficient to generate the base
6039 with more bits set and use a negative index instead. */
6040 base |= mask;
6041 index -= mask;
6043 base_reg = force_reg (SImode, GEN_INT (base));
6044 x = plus_constant (base_reg, index);
6047 if (flag_pic)
6049 /* We need to find and carefully transform any SYMBOL and LABEL
6050 references; so go back to the original address expression. */
6051 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6053 if (new_x != orig_x)
6054 x = new_x;
6057 return x;
6061 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6062 to be legitimate. If we find one, return the new, valid address. */
6064 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6066 if (arm_tls_symbol_p (x))
6067 return legitimize_tls_address (x, NULL_RTX);
6069 if (GET_CODE (x) == PLUS
6070 && GET_CODE (XEXP (x, 1)) == CONST_INT
6071 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6072 || INTVAL (XEXP (x, 1)) < 0))
6074 rtx xop0 = XEXP (x, 0);
6075 rtx xop1 = XEXP (x, 1);
6076 HOST_WIDE_INT offset = INTVAL (xop1);
6078 /* Try and fold the offset into a biasing of the base register and
6079 then offsetting that. Don't do this when optimizing for space
6080 since it can cause too many CSEs. */
6081 if (optimize_size && offset >= 0
6082 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6084 HOST_WIDE_INT delta;
6086 if (offset >= 256)
6087 delta = offset - (256 - GET_MODE_SIZE (mode));
6088 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6089 delta = 31 * GET_MODE_SIZE (mode);
6090 else
6091 delta = offset & (~31 * GET_MODE_SIZE (mode));
6093 xop0 = force_operand (plus_constant (xop0, offset - delta),
6094 NULL_RTX);
6095 x = plus_constant (xop0, delta);
6097 else if (offset < 0 && offset > -256)
6098 /* Small negative offsets are best done with a subtract before the
6099 dereference, forcing these into a register normally takes two
6100 instructions. */
6101 x = force_operand (x, NULL_RTX);
6102 else
6104 /* For the remaining cases, force the constant into a register. */
6105 xop1 = force_reg (SImode, xop1);
6106 x = gen_rtx_PLUS (SImode, xop0, xop1);
6109 else if (GET_CODE (x) == PLUS
6110 && s_register_operand (XEXP (x, 1), SImode)
6111 && !s_register_operand (XEXP (x, 0), SImode))
6113 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6115 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6118 if (flag_pic)
6120 /* We need to find and carefully transform any SYMBOL and LABEL
6121 references; so go back to the original address expression. */
6122 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6124 if (new_x != orig_x)
6125 x = new_x;
6128 return x;
6131 bool
6132 arm_legitimize_reload_address (rtx *p,
6133 enum machine_mode mode,
6134 int opnum, int type,
6135 int ind_levels ATTRIBUTE_UNUSED)
6137 if (GET_CODE (*p) == PLUS
6138 && GET_CODE (XEXP (*p, 0)) == REG
6139 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6140 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6142 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6143 HOST_WIDE_INT low, high;
6145 /* Detect coprocessor load/stores. */
6146 bool coproc_p = ((TARGET_HARD_FLOAT
6147 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6148 && (mode == SFmode || mode == DFmode
6149 || (mode == DImode && TARGET_MAVERICK)))
6150 || (TARGET_REALLY_IWMMXT
6151 && VALID_IWMMXT_REG_MODE (mode))
6152 || (TARGET_NEON
6153 && (VALID_NEON_DREG_MODE (mode)
6154 || VALID_NEON_QREG_MODE (mode))));
6156 /* For some conditions, bail out when lower two bits are unaligned. */
6157 if ((val & 0x3) != 0
6158 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6159 && (coproc_p
6160 /* For DI, and DF under soft-float: */
6161 || ((mode == DImode || mode == DFmode)
6162 /* Without ldrd, we use stm/ldm, which does not
6163 fair well with unaligned bits. */
6164 && (! TARGET_LDRD
6165 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6166 || TARGET_THUMB2))))
6167 return false;
6169 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6170 of which the (reg+high) gets turned into a reload add insn,
6171 we try to decompose the index into high/low values that can often
6172 also lead to better reload CSE.
6173 For example:
6174 ldr r0, [r2, #4100] // Offset too large
6175 ldr r1, [r2, #4104] // Offset too large
6177 is best reloaded as:
6178 add t1, r2, #4096
6179 ldr r0, [t1, #4]
6180 add t2, r2, #4096
6181 ldr r1, [t2, #8]
6183 which post-reload CSE can simplify in most cases to eliminate the
6184 second add instruction:
6185 add t1, r2, #4096
6186 ldr r0, [t1, #4]
6187 ldr r1, [t1, #8]
6189 The idea here is that we want to split out the bits of the constant
6190 as a mask, rather than as subtracting the maximum offset that the
6191 respective type of load/store used can handle.
6193 When encountering negative offsets, we can still utilize it even if
6194 the overall offset is positive; sometimes this may lead to an immediate
6195 that can be constructed with fewer instructions.
6196 For example:
6197 ldr r0, [r2, #0x3FFFFC]
6199 This is best reloaded as:
6200 add t1, r2, #0x400000
6201 ldr r0, [t1, #-4]
6203 The trick for spotting this for a load insn with N bits of offset
6204 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6205 negative offset that is going to make bit N and all the bits below
6206 it become zero in the remainder part.
6208 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6209 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6210 used in most cases of ARM load/store instructions. */
6212 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6213 (((VAL) & ((1 << (N)) - 1)) \
6214 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6215 : 0)
6217 if (coproc_p)
6219 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6221 /* NEON quad-word load/stores are made of two double-word accesses,
6222 so the valid index range is reduced by 8. Treat as 9-bit range if
6223 we go over it. */
6224 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6225 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6227 else if (GET_MODE_SIZE (mode) == 8)
6229 if (TARGET_LDRD)
6230 low = (TARGET_THUMB2
6231 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6232 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6233 else
6234 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6235 to access doublewords. The supported load/store offsets are
6236 -8, -4, and 4, which we try to produce here. */
6237 low = ((val & 0xf) ^ 0x8) - 0x8;
6239 else if (GET_MODE_SIZE (mode) < 8)
6241 /* NEON element load/stores do not have an offset. */
6242 if (TARGET_NEON_FP16 && mode == HFmode)
6243 return false;
6245 if (TARGET_THUMB2)
6247 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6248 Try the wider 12-bit range first, and re-try if the result
6249 is out of range. */
6250 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6251 if (low < -255)
6252 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6254 else
6256 if (mode == HImode || mode == HFmode)
6258 if (arm_arch4)
6259 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6260 else
6262 /* The storehi/movhi_bytes fallbacks can use only
6263 [-4094,+4094] of the full ldrb/strb index range. */
6264 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6265 if (low == 4095 || low == -4095)
6266 return false;
6269 else
6270 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6273 else
6274 return false;
6276 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6277 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6278 - (unsigned HOST_WIDE_INT) 0x80000000);
6279 /* Check for overflow or zero */
6280 if (low == 0 || high == 0 || (high + low != val))
6281 return false;
6283 /* Reload the high part into a base reg; leave the low part
6284 in the mem. */
6285 *p = gen_rtx_PLUS (GET_MODE (*p),
6286 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6287 GEN_INT (high)),
6288 GEN_INT (low));
6289 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6290 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6291 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6292 return true;
6295 return false;
6299 thumb_legitimize_reload_address (rtx *x_p,
6300 enum machine_mode mode,
6301 int opnum, int type,
6302 int ind_levels ATTRIBUTE_UNUSED)
6304 rtx x = *x_p;
6306 if (GET_CODE (x) == PLUS
6307 && GET_MODE_SIZE (mode) < 4
6308 && REG_P (XEXP (x, 0))
6309 && XEXP (x, 0) == stack_pointer_rtx
6310 && GET_CODE (XEXP (x, 1)) == CONST_INT
6311 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6313 rtx orig_x = x;
6315 x = copy_rtx (x);
6316 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6317 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6318 return x;
6321 /* If both registers are hi-regs, then it's better to reload the
6322 entire expression rather than each register individually. That
6323 only requires one reload register rather than two. */
6324 if (GET_CODE (x) == PLUS
6325 && REG_P (XEXP (x, 0))
6326 && REG_P (XEXP (x, 1))
6327 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6328 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6330 rtx orig_x = x;
6332 x = copy_rtx (x);
6333 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6334 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6335 return x;
6338 return NULL;
6341 /* Test for various thread-local symbols. */
6343 /* Return TRUE if X is a thread-local symbol. */
6345 static bool
6346 arm_tls_symbol_p (rtx x)
6348 if (! TARGET_HAVE_TLS)
6349 return false;
6351 if (GET_CODE (x) != SYMBOL_REF)
6352 return false;
6354 return SYMBOL_REF_TLS_MODEL (x) != 0;
6357 /* Helper for arm_tls_referenced_p. */
6359 static int
6360 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6362 if (GET_CODE (*x) == SYMBOL_REF)
6363 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6365 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6366 TLS offsets, not real symbol references. */
6367 if (GET_CODE (*x) == UNSPEC
6368 && XINT (*x, 1) == UNSPEC_TLS)
6369 return -1;
6371 return 0;
6374 /* Return TRUE if X contains any TLS symbol references. */
6376 bool
6377 arm_tls_referenced_p (rtx x)
6379 if (! TARGET_HAVE_TLS)
6380 return false;
6382 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6385 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6387 On the ARM, allow any integer (invalid ones are removed later by insn
6388 patterns), nice doubles and symbol_refs which refer to the function's
6389 constant pool XXX.
6391 When generating pic allow anything. */
6393 static bool
6394 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6396 /* At present, we have no support for Neon structure constants, so forbid
6397 them here. It might be possible to handle simple cases like 0 and -1
6398 in future. */
6399 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6400 return false;
6402 return flag_pic || !label_mentioned_p (x);
6405 static bool
6406 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6408 return (GET_CODE (x) == CONST_INT
6409 || GET_CODE (x) == CONST_DOUBLE
6410 || CONSTANT_ADDRESS_P (x)
6411 || flag_pic);
6414 static bool
6415 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6417 return (!arm_cannot_force_const_mem (mode, x)
6418 && (TARGET_32BIT
6419 ? arm_legitimate_constant_p_1 (mode, x)
6420 : thumb_legitimate_constant_p (mode, x)));
6423 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6425 static bool
6426 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6428 rtx base, offset;
6430 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6432 split_const (x, &base, &offset);
6433 if (GET_CODE (base) == SYMBOL_REF
6434 && !offset_within_block_p (base, INTVAL (offset)))
6435 return true;
6437 return arm_tls_referenced_p (x);
6440 #define REG_OR_SUBREG_REG(X) \
6441 (GET_CODE (X) == REG \
6442 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6444 #define REG_OR_SUBREG_RTX(X) \
6445 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6447 static inline int
6448 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6450 enum machine_mode mode = GET_MODE (x);
6451 int total;
6453 switch (code)
6455 case ASHIFT:
6456 case ASHIFTRT:
6457 case LSHIFTRT:
6458 case ROTATERT:
6459 case PLUS:
6460 case MINUS:
6461 case COMPARE:
6462 case NEG:
6463 case NOT:
6464 return COSTS_N_INSNS (1);
6466 case MULT:
6467 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6469 int cycles = 0;
6470 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6472 while (i)
6474 i >>= 2;
6475 cycles++;
6477 return COSTS_N_INSNS (2) + cycles;
6479 return COSTS_N_INSNS (1) + 16;
6481 case SET:
6482 return (COSTS_N_INSNS (1)
6483 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6484 + GET_CODE (SET_DEST (x)) == MEM));
6486 case CONST_INT:
6487 if (outer == SET)
6489 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6490 return 0;
6491 if (thumb_shiftable_const (INTVAL (x)))
6492 return COSTS_N_INSNS (2);
6493 return COSTS_N_INSNS (3);
6495 else if ((outer == PLUS || outer == COMPARE)
6496 && INTVAL (x) < 256 && INTVAL (x) > -256)
6497 return 0;
6498 else if ((outer == IOR || outer == XOR || outer == AND)
6499 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6500 return COSTS_N_INSNS (1);
6501 else if (outer == AND)
6503 int i;
6504 /* This duplicates the tests in the andsi3 expander. */
6505 for (i = 9; i <= 31; i++)
6506 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6507 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6508 return COSTS_N_INSNS (2);
6510 else if (outer == ASHIFT || outer == ASHIFTRT
6511 || outer == LSHIFTRT)
6512 return 0;
6513 return COSTS_N_INSNS (2);
6515 case CONST:
6516 case CONST_DOUBLE:
6517 case LABEL_REF:
6518 case SYMBOL_REF:
6519 return COSTS_N_INSNS (3);
6521 case UDIV:
6522 case UMOD:
6523 case DIV:
6524 case MOD:
6525 return 100;
6527 case TRUNCATE:
6528 return 99;
6530 case AND:
6531 case XOR:
6532 case IOR:
6533 /* XXX guess. */
6534 return 8;
6536 case MEM:
6537 /* XXX another guess. */
6538 /* Memory costs quite a lot for the first word, but subsequent words
6539 load at the equivalent of a single insn each. */
6540 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6541 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6542 ? 4 : 0));
6544 case IF_THEN_ELSE:
6545 /* XXX a guess. */
6546 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6547 return 14;
6548 return 2;
6550 case SIGN_EXTEND:
6551 case ZERO_EXTEND:
6552 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6553 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6555 if (mode == SImode)
6556 return total;
6558 if (arm_arch6)
6559 return total + COSTS_N_INSNS (1);
6561 /* Assume a two-shift sequence. Increase the cost slightly so
6562 we prefer actual shifts over an extend operation. */
6563 return total + 1 + COSTS_N_INSNS (2);
6565 default:
6566 return 99;
6570 static inline bool
6571 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6573 enum machine_mode mode = GET_MODE (x);
6574 enum rtx_code subcode;
6575 rtx operand;
6576 enum rtx_code code = GET_CODE (x);
6577 *total = 0;
6579 switch (code)
6581 case MEM:
6582 /* Memory costs quite a lot for the first word, but subsequent words
6583 load at the equivalent of a single insn each. */
6584 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6585 return true;
6587 case DIV:
6588 case MOD:
6589 case UDIV:
6590 case UMOD:
6591 if (TARGET_HARD_FLOAT && mode == SFmode)
6592 *total = COSTS_N_INSNS (2);
6593 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6594 *total = COSTS_N_INSNS (4);
6595 else
6596 *total = COSTS_N_INSNS (20);
6597 return false;
6599 case ROTATE:
6600 if (GET_CODE (XEXP (x, 1)) == REG)
6601 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6602 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6603 *total = rtx_cost (XEXP (x, 1), code, speed);
6605 /* Fall through */
6606 case ROTATERT:
6607 if (mode != SImode)
6609 *total += COSTS_N_INSNS (4);
6610 return true;
6613 /* Fall through */
6614 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6615 *total += rtx_cost (XEXP (x, 0), code, speed);
6616 if (mode == DImode)
6618 *total += COSTS_N_INSNS (3);
6619 return true;
6622 *total += COSTS_N_INSNS (1);
6623 /* Increase the cost of complex shifts because they aren't any faster,
6624 and reduce dual issue opportunities. */
6625 if (arm_tune_cortex_a9
6626 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6627 ++*total;
6629 return true;
6631 case MINUS:
6632 if (mode == DImode)
6634 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6635 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6636 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6638 *total += rtx_cost (XEXP (x, 1), code, speed);
6639 return true;
6642 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6643 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6645 *total += rtx_cost (XEXP (x, 0), code, speed);
6646 return true;
6649 return false;
6652 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6654 if (TARGET_HARD_FLOAT
6655 && (mode == SFmode
6656 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6658 *total = COSTS_N_INSNS (1);
6659 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6660 && arm_const_double_rtx (XEXP (x, 0)))
6662 *total += rtx_cost (XEXP (x, 1), code, speed);
6663 return true;
6666 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6667 && arm_const_double_rtx (XEXP (x, 1)))
6669 *total += rtx_cost (XEXP (x, 0), code, speed);
6670 return true;
6673 return false;
6675 *total = COSTS_N_INSNS (20);
6676 return false;
6679 *total = COSTS_N_INSNS (1);
6680 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6681 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6683 *total += rtx_cost (XEXP (x, 1), code, speed);
6684 return true;
6687 subcode = GET_CODE (XEXP (x, 1));
6688 if (subcode == ASHIFT || subcode == ASHIFTRT
6689 || subcode == LSHIFTRT
6690 || subcode == ROTATE || subcode == ROTATERT)
6692 *total += rtx_cost (XEXP (x, 0), code, speed);
6693 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6694 return true;
6697 /* A shift as a part of RSB costs no more than RSB itself. */
6698 if (GET_CODE (XEXP (x, 0)) == MULT
6699 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6701 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6702 *total += rtx_cost (XEXP (x, 1), code, speed);
6703 return true;
6706 if (subcode == MULT
6707 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6709 *total += rtx_cost (XEXP (x, 0), code, speed);
6710 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6711 return true;
6714 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6715 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6717 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6718 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6719 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6720 *total += COSTS_N_INSNS (1);
6722 return true;
6725 /* Fall through */
6727 case PLUS:
6728 if (code == PLUS && arm_arch6 && mode == SImode
6729 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6730 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6732 *total = COSTS_N_INSNS (1);
6733 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6734 speed);
6735 *total += rtx_cost (XEXP (x, 1), code, speed);
6736 return true;
6739 /* MLA: All arguments must be registers. We filter out
6740 multiplication by a power of two, so that we fall down into
6741 the code below. */
6742 if (GET_CODE (XEXP (x, 0)) == MULT
6743 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6745 /* The cost comes from the cost of the multiply. */
6746 return false;
6749 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6751 if (TARGET_HARD_FLOAT
6752 && (mode == SFmode
6753 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6755 *total = COSTS_N_INSNS (1);
6756 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6757 && arm_const_double_rtx (XEXP (x, 1)))
6759 *total += rtx_cost (XEXP (x, 0), code, speed);
6760 return true;
6763 return false;
6766 *total = COSTS_N_INSNS (20);
6767 return false;
6770 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6771 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6773 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6774 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6775 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6776 *total += COSTS_N_INSNS (1);
6777 return true;
6780 /* Fall through */
6782 case AND: case XOR: case IOR:
6784 /* Normally the frame registers will be spilt into reg+const during
6785 reload, so it is a bad idea to combine them with other instructions,
6786 since then they might not be moved outside of loops. As a compromise
6787 we allow integration with ops that have a constant as their second
6788 operand. */
6789 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6790 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6791 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6792 *total = COSTS_N_INSNS (1);
6794 if (mode == DImode)
6796 *total += COSTS_N_INSNS (2);
6797 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6798 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6800 *total += rtx_cost (XEXP (x, 0), code, speed);
6801 return true;
6804 return false;
6807 *total += COSTS_N_INSNS (1);
6808 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6809 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6811 *total += rtx_cost (XEXP (x, 0), code, speed);
6812 return true;
6814 subcode = GET_CODE (XEXP (x, 0));
6815 if (subcode == ASHIFT || subcode == ASHIFTRT
6816 || subcode == LSHIFTRT
6817 || subcode == ROTATE || subcode == ROTATERT)
6819 *total += rtx_cost (XEXP (x, 1), code, speed);
6820 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6821 return true;
6824 if (subcode == MULT
6825 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6827 *total += rtx_cost (XEXP (x, 1), code, speed);
6828 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6829 return true;
6832 if (subcode == UMIN || subcode == UMAX
6833 || subcode == SMIN || subcode == SMAX)
6835 *total = COSTS_N_INSNS (3);
6836 return true;
6839 return false;
6841 case MULT:
6842 /* This should have been handled by the CPU specific routines. */
6843 gcc_unreachable ();
6845 case TRUNCATE:
6846 if (arm_arch3m && mode == SImode
6847 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6848 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6849 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6850 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6851 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6852 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6854 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6855 return true;
6857 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6858 return false;
6860 case NEG:
6861 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6863 if (TARGET_HARD_FLOAT
6864 && (mode == SFmode
6865 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6867 *total = COSTS_N_INSNS (1);
6868 return false;
6870 *total = COSTS_N_INSNS (2);
6871 return false;
6874 /* Fall through */
6875 case NOT:
6876 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6877 if (mode == SImode && code == NOT)
6879 subcode = GET_CODE (XEXP (x, 0));
6880 if (subcode == ASHIFT || subcode == ASHIFTRT
6881 || subcode == LSHIFTRT
6882 || subcode == ROTATE || subcode == ROTATERT
6883 || (subcode == MULT
6884 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6886 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6887 /* Register shifts cost an extra cycle. */
6888 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6889 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6890 subcode, speed);
6891 return true;
6895 return false;
6897 case IF_THEN_ELSE:
6898 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6900 *total = COSTS_N_INSNS (4);
6901 return true;
6904 operand = XEXP (x, 0);
6906 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6907 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6908 && GET_CODE (XEXP (operand, 0)) == REG
6909 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6910 *total += COSTS_N_INSNS (1);
6911 *total += (rtx_cost (XEXP (x, 1), code, speed)
6912 + rtx_cost (XEXP (x, 2), code, speed));
6913 return true;
6915 case NE:
6916 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6918 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6919 return true;
6921 goto scc_insn;
6923 case GE:
6924 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6925 && mode == SImode && XEXP (x, 1) == const0_rtx)
6927 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6928 return true;
6930 goto scc_insn;
6932 case LT:
6933 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6934 && mode == SImode && XEXP (x, 1) == const0_rtx)
6936 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6937 return true;
6939 goto scc_insn;
6941 case EQ:
6942 case GT:
6943 case LE:
6944 case GEU:
6945 case LTU:
6946 case GTU:
6947 case LEU:
6948 case UNORDERED:
6949 case ORDERED:
6950 case UNEQ:
6951 case UNGE:
6952 case UNLT:
6953 case UNGT:
6954 case UNLE:
6955 scc_insn:
6956 /* SCC insns. In the case where the comparison has already been
6957 performed, then they cost 2 instructions. Otherwise they need
6958 an additional comparison before them. */
6959 *total = COSTS_N_INSNS (2);
6960 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6962 return true;
6965 /* Fall through */
6966 case COMPARE:
6967 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6969 *total = 0;
6970 return true;
6973 *total += COSTS_N_INSNS (1);
6974 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6975 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6977 *total += rtx_cost (XEXP (x, 0), code, speed);
6978 return true;
6981 subcode = GET_CODE (XEXP (x, 0));
6982 if (subcode == ASHIFT || subcode == ASHIFTRT
6983 || subcode == LSHIFTRT
6984 || subcode == ROTATE || subcode == ROTATERT)
6986 *total += rtx_cost (XEXP (x, 1), code, speed);
6987 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6988 return true;
6991 if (subcode == MULT
6992 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6994 *total += rtx_cost (XEXP (x, 1), code, speed);
6995 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6996 return true;
6999 return false;
7001 case UMIN:
7002 case UMAX:
7003 case SMIN:
7004 case SMAX:
7005 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7006 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7007 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7008 *total += rtx_cost (XEXP (x, 1), code, speed);
7009 return true;
7011 case ABS:
7012 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7014 if (TARGET_HARD_FLOAT
7015 && (mode == SFmode
7016 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7018 *total = COSTS_N_INSNS (1);
7019 return false;
7021 *total = COSTS_N_INSNS (20);
7022 return false;
7024 *total = COSTS_N_INSNS (1);
7025 if (mode == DImode)
7026 *total += COSTS_N_INSNS (3);
7027 return false;
7029 case SIGN_EXTEND:
7030 case ZERO_EXTEND:
7031 *total = 0;
7032 if (GET_MODE_CLASS (mode) == MODE_INT)
7034 rtx op = XEXP (x, 0);
7035 enum machine_mode opmode = GET_MODE (op);
7037 if (mode == DImode)
7038 *total += COSTS_N_INSNS (1);
7040 if (opmode != SImode)
7042 if (MEM_P (op))
7044 /* If !arm_arch4, we use one of the extendhisi2_mem
7045 or movhi_bytes patterns for HImode. For a QImode
7046 sign extension, we first zero-extend from memory
7047 and then perform a shift sequence. */
7048 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7049 *total += COSTS_N_INSNS (2);
7051 else if (arm_arch6)
7052 *total += COSTS_N_INSNS (1);
7054 /* We don't have the necessary insn, so we need to perform some
7055 other operation. */
7056 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7057 /* An and with constant 255. */
7058 *total += COSTS_N_INSNS (1);
7059 else
7060 /* A shift sequence. Increase costs slightly to avoid
7061 combining two shifts into an extend operation. */
7062 *total += COSTS_N_INSNS (2) + 1;
7065 return false;
7068 switch (GET_MODE (XEXP (x, 0)))
7070 case V8QImode:
7071 case V4HImode:
7072 case V2SImode:
7073 case V4QImode:
7074 case V2HImode:
7075 *total = COSTS_N_INSNS (1);
7076 return false;
7078 default:
7079 gcc_unreachable ();
7081 gcc_unreachable ();
7083 case ZERO_EXTRACT:
7084 case SIGN_EXTRACT:
7085 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7086 return true;
7088 case CONST_INT:
7089 if (const_ok_for_arm (INTVAL (x))
7090 || const_ok_for_arm (~INTVAL (x)))
7091 *total = COSTS_N_INSNS (1);
7092 else
7093 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7094 INTVAL (x), NULL_RTX,
7095 NULL_RTX, 0, 0));
7096 return true;
7098 case CONST:
7099 case LABEL_REF:
7100 case SYMBOL_REF:
7101 *total = COSTS_N_INSNS (3);
7102 return true;
7104 case HIGH:
7105 *total = COSTS_N_INSNS (1);
7106 return true;
7108 case LO_SUM:
7109 *total = COSTS_N_INSNS (1);
7110 *total += rtx_cost (XEXP (x, 0), code, speed);
7111 return true;
7113 case CONST_DOUBLE:
7114 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7115 && (mode == SFmode || !TARGET_VFP_SINGLE))
7116 *total = COSTS_N_INSNS (1);
7117 else
7118 *total = COSTS_N_INSNS (4);
7119 return true;
7121 default:
7122 *total = COSTS_N_INSNS (4);
7123 return false;
7127 /* Estimates the size cost of thumb1 instructions.
7128 For now most of the code is copied from thumb1_rtx_costs. We need more
7129 fine grain tuning when we have more related test cases. */
7130 static inline int
7131 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7133 enum machine_mode mode = GET_MODE (x);
7135 switch (code)
7137 case ASHIFT:
7138 case ASHIFTRT:
7139 case LSHIFTRT:
7140 case ROTATERT:
7141 case PLUS:
7142 case MINUS:
7143 case COMPARE:
7144 case NEG:
7145 case NOT:
7146 return COSTS_N_INSNS (1);
7148 case MULT:
7149 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7151 /* Thumb1 mul instruction can't operate on const. We must Load it
7152 into a register first. */
7153 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7154 return COSTS_N_INSNS (1) + const_size;
7156 return COSTS_N_INSNS (1);
7158 case SET:
7159 return (COSTS_N_INSNS (1)
7160 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7161 + GET_CODE (SET_DEST (x)) == MEM));
7163 case CONST_INT:
7164 if (outer == SET)
7166 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7167 return COSTS_N_INSNS (1);
7168 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7169 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7170 return COSTS_N_INSNS (2);
7171 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7172 if (thumb_shiftable_const (INTVAL (x)))
7173 return COSTS_N_INSNS (2);
7174 return COSTS_N_INSNS (3);
7176 else if ((outer == PLUS || outer == COMPARE)
7177 && INTVAL (x) < 256 && INTVAL (x) > -256)
7178 return 0;
7179 else if ((outer == IOR || outer == XOR || outer == AND)
7180 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7181 return COSTS_N_INSNS (1);
7182 else if (outer == AND)
7184 int i;
7185 /* This duplicates the tests in the andsi3 expander. */
7186 for (i = 9; i <= 31; i++)
7187 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7188 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7189 return COSTS_N_INSNS (2);
7191 else if (outer == ASHIFT || outer == ASHIFTRT
7192 || outer == LSHIFTRT)
7193 return 0;
7194 return COSTS_N_INSNS (2);
7196 case CONST:
7197 case CONST_DOUBLE:
7198 case LABEL_REF:
7199 case SYMBOL_REF:
7200 return COSTS_N_INSNS (3);
7202 case UDIV:
7203 case UMOD:
7204 case DIV:
7205 case MOD:
7206 return 100;
7208 case TRUNCATE:
7209 return 99;
7211 case AND:
7212 case XOR:
7213 case IOR:
7214 /* XXX guess. */
7215 return 8;
7217 case MEM:
7218 /* XXX another guess. */
7219 /* Memory costs quite a lot for the first word, but subsequent words
7220 load at the equivalent of a single insn each. */
7221 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7222 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7223 ? 4 : 0));
7225 case IF_THEN_ELSE:
7226 /* XXX a guess. */
7227 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7228 return 14;
7229 return 2;
7231 case ZERO_EXTEND:
7232 /* XXX still guessing. */
7233 switch (GET_MODE (XEXP (x, 0)))
7235 case QImode:
7236 return (1 + (mode == DImode ? 4 : 0)
7237 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7239 case HImode:
7240 return (4 + (mode == DImode ? 4 : 0)
7241 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7243 case SImode:
7244 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7246 default:
7247 return 99;
7250 default:
7251 return 99;
7255 /* RTX costs when optimizing for size. */
7256 static bool
7257 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7258 int *total)
7260 enum machine_mode mode = GET_MODE (x);
7261 if (TARGET_THUMB1)
7263 *total = thumb1_size_rtx_costs (x, code, outer_code);
7264 return true;
7267 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7268 switch (code)
7270 case MEM:
7271 /* A memory access costs 1 insn if the mode is small, or the address is
7272 a single register, otherwise it costs one insn per word. */
7273 if (REG_P (XEXP (x, 0)))
7274 *total = COSTS_N_INSNS (1);
7275 else if (flag_pic
7276 && GET_CODE (XEXP (x, 0)) == PLUS
7277 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7278 /* This will be split into two instructions.
7279 See arm.md:calculate_pic_address. */
7280 *total = COSTS_N_INSNS (2);
7281 else
7282 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7283 return true;
7285 case DIV:
7286 case MOD:
7287 case UDIV:
7288 case UMOD:
7289 /* Needs a libcall, so it costs about this. */
7290 *total = COSTS_N_INSNS (2);
7291 return false;
7293 case ROTATE:
7294 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7296 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7297 return true;
7299 /* Fall through */
7300 case ROTATERT:
7301 case ASHIFT:
7302 case LSHIFTRT:
7303 case ASHIFTRT:
7304 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7306 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7307 return true;
7309 else if (mode == SImode)
7311 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7312 /* Slightly disparage register shifts, but not by much. */
7313 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7314 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7315 return true;
7318 /* Needs a libcall. */
7319 *total = COSTS_N_INSNS (2);
7320 return false;
7322 case MINUS:
7323 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7324 && (mode == SFmode || !TARGET_VFP_SINGLE))
7326 *total = COSTS_N_INSNS (1);
7327 return false;
7330 if (mode == SImode)
7332 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7333 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7335 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7336 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7337 || subcode1 == ROTATE || subcode1 == ROTATERT
7338 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7339 || subcode1 == ASHIFTRT)
7341 /* It's just the cost of the two operands. */
7342 *total = 0;
7343 return false;
7346 *total = COSTS_N_INSNS (1);
7347 return false;
7350 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7351 return false;
7353 case PLUS:
7354 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7355 && (mode == SFmode || !TARGET_VFP_SINGLE))
7357 *total = COSTS_N_INSNS (1);
7358 return false;
7361 /* A shift as a part of ADD costs nothing. */
7362 if (GET_CODE (XEXP (x, 0)) == MULT
7363 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7365 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7366 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7367 *total += rtx_cost (XEXP (x, 1), code, false);
7368 return true;
7371 /* Fall through */
7372 case AND: case XOR: case IOR:
7373 if (mode == SImode)
7375 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7377 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7378 || subcode == LSHIFTRT || subcode == ASHIFTRT
7379 || (code == AND && subcode == NOT))
7381 /* It's just the cost of the two operands. */
7382 *total = 0;
7383 return false;
7387 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7388 return false;
7390 case MULT:
7391 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7392 return false;
7394 case NEG:
7395 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7396 && (mode == SFmode || !TARGET_VFP_SINGLE))
7398 *total = COSTS_N_INSNS (1);
7399 return false;
7402 /* Fall through */
7403 case NOT:
7404 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7406 return false;
7408 case IF_THEN_ELSE:
7409 *total = 0;
7410 return false;
7412 case COMPARE:
7413 if (cc_register (XEXP (x, 0), VOIDmode))
7414 * total = 0;
7415 else
7416 *total = COSTS_N_INSNS (1);
7417 return false;
7419 case ABS:
7420 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7421 && (mode == SFmode || !TARGET_VFP_SINGLE))
7422 *total = COSTS_N_INSNS (1);
7423 else
7424 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7425 return false;
7427 case SIGN_EXTEND:
7428 case ZERO_EXTEND:
7429 return arm_rtx_costs_1 (x, outer_code, total, 0);
7431 case CONST_INT:
7432 if (const_ok_for_arm (INTVAL (x)))
7433 /* A multiplication by a constant requires another instruction
7434 to load the constant to a register. */
7435 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7436 ? 1 : 0);
7437 else if (const_ok_for_arm (~INTVAL (x)))
7438 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7439 else if (const_ok_for_arm (-INTVAL (x)))
7441 if (outer_code == COMPARE || outer_code == PLUS
7442 || outer_code == MINUS)
7443 *total = 0;
7444 else
7445 *total = COSTS_N_INSNS (1);
7447 else
7448 *total = COSTS_N_INSNS (2);
7449 return true;
7451 case CONST:
7452 case LABEL_REF:
7453 case SYMBOL_REF:
7454 *total = COSTS_N_INSNS (2);
7455 return true;
7457 case CONST_DOUBLE:
7458 *total = COSTS_N_INSNS (4);
7459 return true;
7461 case HIGH:
7462 case LO_SUM:
7463 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7464 cost of these slightly. */
7465 *total = COSTS_N_INSNS (1) + 1;
7466 return true;
7468 default:
7469 if (mode != VOIDmode)
7470 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7471 else
7472 *total = COSTS_N_INSNS (4); /* How knows? */
7473 return false;
7477 /* RTX costs when optimizing for size. */
7478 static bool
7479 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7480 bool speed)
7482 if (!speed)
7483 return arm_size_rtx_costs (x, (enum rtx_code) code,
7484 (enum rtx_code) outer_code, total);
7485 else
7486 return current_tune->rtx_costs (x, (enum rtx_code) code,
7487 (enum rtx_code) outer_code,
7488 total, speed);
7491 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7492 supported on any "slowmul" cores, so it can be ignored. */
7494 static bool
7495 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7496 int *total, bool speed)
7498 enum machine_mode mode = GET_MODE (x);
7500 if (TARGET_THUMB)
7502 *total = thumb1_rtx_costs (x, code, outer_code);
7503 return true;
7506 switch (code)
7508 case MULT:
7509 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7510 || mode == DImode)
7512 *total = COSTS_N_INSNS (20);
7513 return false;
7516 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7518 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7519 & (unsigned HOST_WIDE_INT) 0xffffffff);
7520 int cost, const_ok = const_ok_for_arm (i);
7521 int j, booth_unit_size;
7523 /* Tune as appropriate. */
7524 cost = const_ok ? 4 : 8;
7525 booth_unit_size = 2;
7526 for (j = 0; i && j < 32; j += booth_unit_size)
7528 i >>= booth_unit_size;
7529 cost++;
7532 *total = COSTS_N_INSNS (cost);
7533 *total += rtx_cost (XEXP (x, 0), code, speed);
7534 return true;
7537 *total = COSTS_N_INSNS (20);
7538 return false;
7540 default:
7541 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7546 /* RTX cost for cores with a fast multiply unit (M variants). */
7548 static bool
7549 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7550 int *total, bool speed)
7552 enum machine_mode mode = GET_MODE (x);
7554 if (TARGET_THUMB1)
7556 *total = thumb1_rtx_costs (x, code, outer_code);
7557 return true;
7560 /* ??? should thumb2 use different costs? */
7561 switch (code)
7563 case MULT:
7564 /* There is no point basing this on the tuning, since it is always the
7565 fast variant if it exists at all. */
7566 if (mode == DImode
7567 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7568 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7569 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7571 *total = COSTS_N_INSNS(2);
7572 return false;
7576 if (mode == DImode)
7578 *total = COSTS_N_INSNS (5);
7579 return false;
7582 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7584 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7585 & (unsigned HOST_WIDE_INT) 0xffffffff);
7586 int cost, const_ok = const_ok_for_arm (i);
7587 int j, booth_unit_size;
7589 /* Tune as appropriate. */
7590 cost = const_ok ? 4 : 8;
7591 booth_unit_size = 8;
7592 for (j = 0; i && j < 32; j += booth_unit_size)
7594 i >>= booth_unit_size;
7595 cost++;
7598 *total = COSTS_N_INSNS(cost);
7599 return false;
7602 if (mode == SImode)
7604 *total = COSTS_N_INSNS (4);
7605 return false;
7608 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7610 if (TARGET_HARD_FLOAT
7611 && (mode == SFmode
7612 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7614 *total = COSTS_N_INSNS (1);
7615 return false;
7619 /* Requires a lib call */
7620 *total = COSTS_N_INSNS (20);
7621 return false;
7623 default:
7624 return arm_rtx_costs_1 (x, outer_code, total, speed);
7629 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7630 so it can be ignored. */
7632 static bool
7633 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7634 int *total, bool speed)
7636 enum machine_mode mode = GET_MODE (x);
7638 if (TARGET_THUMB)
7640 *total = thumb1_rtx_costs (x, code, outer_code);
7641 return true;
7644 switch (code)
7646 case COMPARE:
7647 if (GET_CODE (XEXP (x, 0)) != MULT)
7648 return arm_rtx_costs_1 (x, outer_code, total, speed);
7650 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7651 will stall until the multiplication is complete. */
7652 *total = COSTS_N_INSNS (3);
7653 return false;
7655 case MULT:
7656 /* There is no point basing this on the tuning, since it is always the
7657 fast variant if it exists at all. */
7658 if (mode == DImode
7659 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7660 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7661 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7663 *total = COSTS_N_INSNS (2);
7664 return false;
7668 if (mode == DImode)
7670 *total = COSTS_N_INSNS (5);
7671 return false;
7674 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7676 /* If operand 1 is a constant we can more accurately
7677 calculate the cost of the multiply. The multiplier can
7678 retire 15 bits on the first cycle and a further 12 on the
7679 second. We do, of course, have to load the constant into
7680 a register first. */
7681 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7682 /* There's a general overhead of one cycle. */
7683 int cost = 1;
7684 unsigned HOST_WIDE_INT masked_const;
7686 if (i & 0x80000000)
7687 i = ~i;
7689 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7691 masked_const = i & 0xffff8000;
7692 if (masked_const != 0)
7694 cost++;
7695 masked_const = i & 0xf8000000;
7696 if (masked_const != 0)
7697 cost++;
7699 *total = COSTS_N_INSNS (cost);
7700 return false;
7703 if (mode == SImode)
7705 *total = COSTS_N_INSNS (3);
7706 return false;
7709 /* Requires a lib call */
7710 *total = COSTS_N_INSNS (20);
7711 return false;
7713 default:
7714 return arm_rtx_costs_1 (x, outer_code, total, speed);
7719 /* RTX costs for 9e (and later) cores. */
7721 static bool
7722 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7723 int *total, bool speed)
7725 enum machine_mode mode = GET_MODE (x);
7727 if (TARGET_THUMB1)
7729 switch (code)
7731 case MULT:
7732 *total = COSTS_N_INSNS (3);
7733 return true;
7735 default:
7736 *total = thumb1_rtx_costs (x, code, outer_code);
7737 return true;
7741 switch (code)
7743 case MULT:
7744 /* There is no point basing this on the tuning, since it is always the
7745 fast variant if it exists at all. */
7746 if (mode == DImode
7747 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7748 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7749 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7751 *total = COSTS_N_INSNS (2);
7752 return false;
7756 if (mode == DImode)
7758 *total = COSTS_N_INSNS (5);
7759 return false;
7762 if (mode == SImode)
7764 *total = COSTS_N_INSNS (2);
7765 return false;
7768 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7770 if (TARGET_HARD_FLOAT
7771 && (mode == SFmode
7772 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7774 *total = COSTS_N_INSNS (1);
7775 return false;
7779 *total = COSTS_N_INSNS (20);
7780 return false;
7782 default:
7783 return arm_rtx_costs_1 (x, outer_code, total, speed);
7786 /* All address computations that can be done are free, but rtx cost returns
7787 the same for practically all of them. So we weight the different types
7788 of address here in the order (most pref first):
7789 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7790 static inline int
7791 arm_arm_address_cost (rtx x)
7793 enum rtx_code c = GET_CODE (x);
7795 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7796 return 0;
7797 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7798 return 10;
7800 if (c == PLUS)
7802 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7803 return 2;
7805 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7806 return 3;
7808 return 4;
7811 return 6;
7814 static inline int
7815 arm_thumb_address_cost (rtx x)
7817 enum rtx_code c = GET_CODE (x);
7819 if (c == REG)
7820 return 1;
7821 if (c == PLUS
7822 && GET_CODE (XEXP (x, 0)) == REG
7823 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7824 return 1;
7826 return 2;
7829 static int
7830 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7832 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7835 /* Adjust cost hook for XScale. */
7836 static bool
7837 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7839 /* Some true dependencies can have a higher cost depending
7840 on precisely how certain input operands are used. */
7841 if (REG_NOTE_KIND(link) == 0
7842 && recog_memoized (insn) >= 0
7843 && recog_memoized (dep) >= 0)
7845 int shift_opnum = get_attr_shift (insn);
7846 enum attr_type attr_type = get_attr_type (dep);
7848 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7849 operand for INSN. If we have a shifted input operand and the
7850 instruction we depend on is another ALU instruction, then we may
7851 have to account for an additional stall. */
7852 if (shift_opnum != 0
7853 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7855 rtx shifted_operand;
7856 int opno;
7858 /* Get the shifted operand. */
7859 extract_insn (insn);
7860 shifted_operand = recog_data.operand[shift_opnum];
7862 /* Iterate over all the operands in DEP. If we write an operand
7863 that overlaps with SHIFTED_OPERAND, then we have increase the
7864 cost of this dependency. */
7865 extract_insn (dep);
7866 preprocess_constraints ();
7867 for (opno = 0; opno < recog_data.n_operands; opno++)
7869 /* We can ignore strict inputs. */
7870 if (recog_data.operand_type[opno] == OP_IN)
7871 continue;
7873 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7874 shifted_operand))
7876 *cost = 2;
7877 return false;
7882 return true;
7885 /* Adjust cost hook for Cortex A9. */
7886 static bool
7887 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7889 switch (REG_NOTE_KIND (link))
7891 case REG_DEP_ANTI:
7892 *cost = 0;
7893 return false;
7895 case REG_DEP_TRUE:
7896 case REG_DEP_OUTPUT:
7897 if (recog_memoized (insn) >= 0
7898 && recog_memoized (dep) >= 0)
7900 if (GET_CODE (PATTERN (insn)) == SET)
7902 if (GET_MODE_CLASS
7903 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7904 || GET_MODE_CLASS
7905 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7907 enum attr_type attr_type_insn = get_attr_type (insn);
7908 enum attr_type attr_type_dep = get_attr_type (dep);
7910 /* By default all dependencies of the form
7911 s0 = s0 <op> s1
7912 s0 = s0 <op> s2
7913 have an extra latency of 1 cycle because
7914 of the input and output dependency in this
7915 case. However this gets modeled as an true
7916 dependency and hence all these checks. */
7917 if (REG_P (SET_DEST (PATTERN (insn)))
7918 && REG_P (SET_DEST (PATTERN (dep)))
7919 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7920 SET_DEST (PATTERN (dep))))
7922 /* FMACS is a special case where the dependant
7923 instruction can be issued 3 cycles before
7924 the normal latency in case of an output
7925 dependency. */
7926 if ((attr_type_insn == TYPE_FMACS
7927 || attr_type_insn == TYPE_FMACD)
7928 && (attr_type_dep == TYPE_FMACS
7929 || attr_type_dep == TYPE_FMACD))
7931 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7932 *cost = insn_default_latency (dep) - 3;
7933 else
7934 *cost = insn_default_latency (dep);
7935 return false;
7937 else
7939 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7940 *cost = insn_default_latency (dep) + 1;
7941 else
7942 *cost = insn_default_latency (dep);
7944 return false;
7949 break;
7951 default:
7952 gcc_unreachable ();
7955 return true;
7958 /* Adjust cost hook for FA726TE. */
7959 static bool
7960 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7962 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
7963 have penalty of 3. */
7964 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
7965 && recog_memoized (insn) >= 0
7966 && recog_memoized (dep) >= 0
7967 && get_attr_conds (dep) == CONDS_SET)
7969 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
7970 if (get_attr_conds (insn) == CONDS_USE
7971 && get_attr_type (insn) != TYPE_BRANCH)
7973 *cost = 3;
7974 return false;
7977 if (GET_CODE (PATTERN (insn)) == COND_EXEC
7978 || get_attr_conds (insn) == CONDS_USE)
7980 *cost = 0;
7981 return false;
7985 return true;
7988 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7989 It corrects the value of COST based on the relationship between
7990 INSN and DEP through the dependence LINK. It returns the new
7991 value. There is a per-core adjust_cost hook to adjust scheduler costs
7992 and the per-core hook can choose to completely override the generic
7993 adjust_cost function. Only put bits of code into arm_adjust_cost that
7994 are common across all cores. */
7995 static int
7996 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7998 rtx i_pat, d_pat;
8000 /* When generating Thumb-1 code, we want to place flag-setting operations
8001 close to a conditional branch which depends on them, so that we can
8002 omit the comparison. */
8003 if (TARGET_THUMB1
8004 && REG_NOTE_KIND (link) == 0
8005 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8006 && recog_memoized (dep) >= 0
8007 && get_attr_conds (dep) == CONDS_SET)
8008 return 0;
8010 if (current_tune->sched_adjust_cost != NULL)
8012 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8013 return cost;
8016 /* XXX This is not strictly true for the FPA. */
8017 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8018 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8019 return 0;
8021 /* Call insns don't incur a stall, even if they follow a load. */
8022 if (REG_NOTE_KIND (link) == 0
8023 && GET_CODE (insn) == CALL_INSN)
8024 return 1;
8026 if ((i_pat = single_set (insn)) != NULL
8027 && GET_CODE (SET_SRC (i_pat)) == MEM
8028 && (d_pat = single_set (dep)) != NULL
8029 && GET_CODE (SET_DEST (d_pat)) == MEM)
8031 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8032 /* This is a load after a store, there is no conflict if the load reads
8033 from a cached area. Assume that loads from the stack, and from the
8034 constant pool are cached, and that others will miss. This is a
8035 hack. */
8037 if ((GET_CODE (src_mem) == SYMBOL_REF
8038 && CONSTANT_POOL_ADDRESS_P (src_mem))
8039 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8040 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8041 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8042 return 1;
8045 return cost;
8048 static int
8049 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8051 if (TARGET_32BIT)
8052 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8053 else
8054 return (optimize > 0) ? 2 : 0;
8057 static int fp_consts_inited = 0;
8059 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8060 static const char * const strings_fp[8] =
8062 "0", "1", "2", "3",
8063 "4", "5", "0.5", "10"
8066 static REAL_VALUE_TYPE values_fp[8];
8068 static void
8069 init_fp_table (void)
8071 int i;
8072 REAL_VALUE_TYPE r;
8074 if (TARGET_VFP)
8075 fp_consts_inited = 1;
8076 else
8077 fp_consts_inited = 8;
8079 for (i = 0; i < fp_consts_inited; i++)
8081 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8082 values_fp[i] = r;
8086 /* Return TRUE if rtx X is a valid immediate FP constant. */
8088 arm_const_double_rtx (rtx x)
8090 REAL_VALUE_TYPE r;
8091 int i;
8093 if (!fp_consts_inited)
8094 init_fp_table ();
8096 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8097 if (REAL_VALUE_MINUS_ZERO (r))
8098 return 0;
8100 for (i = 0; i < fp_consts_inited; i++)
8101 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8102 return 1;
8104 return 0;
8107 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8109 neg_const_double_rtx_ok_for_fpa (rtx x)
8111 REAL_VALUE_TYPE r;
8112 int i;
8114 if (!fp_consts_inited)
8115 init_fp_table ();
8117 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8118 r = real_value_negate (&r);
8119 if (REAL_VALUE_MINUS_ZERO (r))
8120 return 0;
8122 for (i = 0; i < 8; i++)
8123 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8124 return 1;
8126 return 0;
8130 /* VFPv3 has a fairly wide range of representable immediates, formed from
8131 "quarter-precision" floating-point values. These can be evaluated using this
8132 formula (with ^ for exponentiation):
8134 -1^s * n * 2^-r
8136 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8137 16 <= n <= 31 and 0 <= r <= 7.
8139 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8141 - A (most-significant) is the sign bit.
8142 - BCD are the exponent (encoded as r XOR 3).
8143 - EFGH are the mantissa (encoded as n - 16).
8146 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8147 fconst[sd] instruction, or -1 if X isn't suitable. */
8148 static int
8149 vfp3_const_double_index (rtx x)
8151 REAL_VALUE_TYPE r, m;
8152 int sign, exponent;
8153 unsigned HOST_WIDE_INT mantissa, mant_hi;
8154 unsigned HOST_WIDE_INT mask;
8155 HOST_WIDE_INT m1, m2;
8156 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8158 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8159 return -1;
8161 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8163 /* We can't represent these things, so detect them first. */
8164 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8165 return -1;
8167 /* Extract sign, exponent and mantissa. */
8168 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8169 r = real_value_abs (&r);
8170 exponent = REAL_EXP (&r);
8171 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8172 highest (sign) bit, with a fixed binary point at bit point_pos.
8173 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8174 bits for the mantissa, this may fail (low bits would be lost). */
8175 real_ldexp (&m, &r, point_pos - exponent);
8176 REAL_VALUE_TO_INT (&m1, &m2, m);
8177 mantissa = m1;
8178 mant_hi = m2;
8180 /* If there are bits set in the low part of the mantissa, we can't
8181 represent this value. */
8182 if (mantissa != 0)
8183 return -1;
8185 /* Now make it so that mantissa contains the most-significant bits, and move
8186 the point_pos to indicate that the least-significant bits have been
8187 discarded. */
8188 point_pos -= HOST_BITS_PER_WIDE_INT;
8189 mantissa = mant_hi;
8191 /* We can permit four significant bits of mantissa only, plus a high bit
8192 which is always 1. */
8193 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8194 if ((mantissa & mask) != 0)
8195 return -1;
8197 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8198 mantissa >>= point_pos - 5;
8200 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8201 floating-point immediate zero with Neon using an integer-zero load, but
8202 that case is handled elsewhere.) */
8203 if (mantissa == 0)
8204 return -1;
8206 gcc_assert (mantissa >= 16 && mantissa <= 31);
8208 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8209 normalized significands are in the range [1, 2). (Our mantissa is shifted
8210 left 4 places at this point relative to normalized IEEE754 values). GCC
8211 internally uses [0.5, 1) (see real.c), so the exponent returned from
8212 REAL_EXP must be altered. */
8213 exponent = 5 - exponent;
8215 if (exponent < 0 || exponent > 7)
8216 return -1;
8218 /* Sign, mantissa and exponent are now in the correct form to plug into the
8219 formula described in the comment above. */
8220 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8223 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8225 vfp3_const_double_rtx (rtx x)
8227 if (!TARGET_VFP3)
8228 return 0;
8230 return vfp3_const_double_index (x) != -1;
8233 /* Recognize immediates which can be used in various Neon instructions. Legal
8234 immediates are described by the following table (for VMVN variants, the
8235 bitwise inverse of the constant shown is recognized. In either case, VMOV
8236 is output and the correct instruction to use for a given constant is chosen
8237 by the assembler). The constant shown is replicated across all elements of
8238 the destination vector.
8240 insn elems variant constant (binary)
8241 ---- ----- ------- -----------------
8242 vmov i32 0 00000000 00000000 00000000 abcdefgh
8243 vmov i32 1 00000000 00000000 abcdefgh 00000000
8244 vmov i32 2 00000000 abcdefgh 00000000 00000000
8245 vmov i32 3 abcdefgh 00000000 00000000 00000000
8246 vmov i16 4 00000000 abcdefgh
8247 vmov i16 5 abcdefgh 00000000
8248 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8249 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8250 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8251 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8252 vmvn i16 10 00000000 abcdefgh
8253 vmvn i16 11 abcdefgh 00000000
8254 vmov i32 12 00000000 00000000 abcdefgh 11111111
8255 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8256 vmov i32 14 00000000 abcdefgh 11111111 11111111
8257 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8258 vmov i8 16 abcdefgh
8259 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8260 eeeeeeee ffffffff gggggggg hhhhhhhh
8261 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8263 For case 18, B = !b. Representable values are exactly those accepted by
8264 vfp3_const_double_index, but are output as floating-point numbers rather
8265 than indices.
8267 Variants 0-5 (inclusive) may also be used as immediates for the second
8268 operand of VORR/VBIC instructions.
8270 The INVERSE argument causes the bitwise inverse of the given operand to be
8271 recognized instead (used for recognizing legal immediates for the VAND/VORN
8272 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8273 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8274 output, rather than the real insns vbic/vorr).
8276 INVERSE makes no difference to the recognition of float vectors.
8278 The return value is the variant of immediate as shown in the above table, or
8279 -1 if the given value doesn't match any of the listed patterns.
8281 static int
8282 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8283 rtx *modconst, int *elementwidth)
8285 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8286 matches = 1; \
8287 for (i = 0; i < idx; i += (STRIDE)) \
8288 if (!(TEST)) \
8289 matches = 0; \
8290 if (matches) \
8292 immtype = (CLASS); \
8293 elsize = (ELSIZE); \
8294 break; \
8297 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8298 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8299 unsigned char bytes[16];
8300 int immtype = -1, matches;
8301 unsigned int invmask = inverse ? 0xff : 0;
8303 /* Vectors of float constants. */
8304 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8306 rtx el0 = CONST_VECTOR_ELT (op, 0);
8307 REAL_VALUE_TYPE r0;
8309 if (!vfp3_const_double_rtx (el0))
8310 return -1;
8312 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8314 for (i = 1; i < n_elts; i++)
8316 rtx elt = CONST_VECTOR_ELT (op, i);
8317 REAL_VALUE_TYPE re;
8319 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8321 if (!REAL_VALUES_EQUAL (r0, re))
8322 return -1;
8325 if (modconst)
8326 *modconst = CONST_VECTOR_ELT (op, 0);
8328 if (elementwidth)
8329 *elementwidth = 0;
8331 return 18;
8334 /* Splat vector constant out into a byte vector. */
8335 for (i = 0; i < n_elts; i++)
8337 rtx el = CONST_VECTOR_ELT (op, i);
8338 unsigned HOST_WIDE_INT elpart;
8339 unsigned int part, parts;
8341 if (GET_CODE (el) == CONST_INT)
8343 elpart = INTVAL (el);
8344 parts = 1;
8346 else if (GET_CODE (el) == CONST_DOUBLE)
8348 elpart = CONST_DOUBLE_LOW (el);
8349 parts = 2;
8351 else
8352 gcc_unreachable ();
8354 for (part = 0; part < parts; part++)
8356 unsigned int byte;
8357 for (byte = 0; byte < innersize; byte++)
8359 bytes[idx++] = (elpart & 0xff) ^ invmask;
8360 elpart >>= BITS_PER_UNIT;
8362 if (GET_CODE (el) == CONST_DOUBLE)
8363 elpart = CONST_DOUBLE_HIGH (el);
8367 /* Sanity check. */
8368 gcc_assert (idx == GET_MODE_SIZE (mode));
8372 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8373 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8375 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8376 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8378 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8379 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8381 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8382 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8384 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8386 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8388 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8389 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8391 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8392 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8394 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8395 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8397 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8398 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8400 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8402 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8404 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8405 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8407 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8408 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8410 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8411 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8413 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8414 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8416 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8418 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8419 && bytes[i] == bytes[(i + 8) % idx]);
8421 while (0);
8423 if (immtype == -1)
8424 return -1;
8426 if (elementwidth)
8427 *elementwidth = elsize;
8429 if (modconst)
8431 unsigned HOST_WIDE_INT imm = 0;
8433 /* Un-invert bytes of recognized vector, if necessary. */
8434 if (invmask != 0)
8435 for (i = 0; i < idx; i++)
8436 bytes[i] ^= invmask;
8438 if (immtype == 17)
8440 /* FIXME: Broken on 32-bit H_W_I hosts. */
8441 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8443 for (i = 0; i < 8; i++)
8444 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8445 << (i * BITS_PER_UNIT);
8447 *modconst = GEN_INT (imm);
8449 else
8451 unsigned HOST_WIDE_INT imm = 0;
8453 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8454 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8456 *modconst = GEN_INT (imm);
8460 return immtype;
8461 #undef CHECK
8464 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8465 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8466 float elements), and a modified constant (whatever should be output for a
8467 VMOV) in *MODCONST. */
8470 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8471 rtx *modconst, int *elementwidth)
8473 rtx tmpconst;
8474 int tmpwidth;
8475 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8477 if (retval == -1)
8478 return 0;
8480 if (modconst)
8481 *modconst = tmpconst;
8483 if (elementwidth)
8484 *elementwidth = tmpwidth;
8486 return 1;
8489 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8490 the immediate is valid, write a constant suitable for using as an operand
8491 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8492 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8495 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8496 rtx *modconst, int *elementwidth)
8498 rtx tmpconst;
8499 int tmpwidth;
8500 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8502 if (retval < 0 || retval > 5)
8503 return 0;
8505 if (modconst)
8506 *modconst = tmpconst;
8508 if (elementwidth)
8509 *elementwidth = tmpwidth;
8511 return 1;
8514 /* Return a string suitable for output of Neon immediate logic operation
8515 MNEM. */
8517 char *
8518 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8519 int inverse, int quad)
8521 int width, is_valid;
8522 static char templ[40];
8524 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8526 gcc_assert (is_valid != 0);
8528 if (quad)
8529 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8530 else
8531 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8533 return templ;
8536 /* Output a sequence of pairwise operations to implement a reduction.
8537 NOTE: We do "too much work" here, because pairwise operations work on two
8538 registers-worth of operands in one go. Unfortunately we can't exploit those
8539 extra calculations to do the full operation in fewer steps, I don't think.
8540 Although all vector elements of the result but the first are ignored, we
8541 actually calculate the same result in each of the elements. An alternative
8542 such as initially loading a vector with zero to use as each of the second
8543 operands would use up an additional register and take an extra instruction,
8544 for no particular gain. */
8546 void
8547 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8548 rtx (*reduc) (rtx, rtx, rtx))
8550 enum machine_mode inner = GET_MODE_INNER (mode);
8551 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8552 rtx tmpsum = op1;
8554 for (i = parts / 2; i >= 1; i /= 2)
8556 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8557 emit_insn (reduc (dest, tmpsum, tmpsum));
8558 tmpsum = dest;
8562 /* If VALS is a vector constant that can be loaded into a register
8563 using VDUP, generate instructions to do so and return an RTX to
8564 assign to the register. Otherwise return NULL_RTX. */
8566 static rtx
8567 neon_vdup_constant (rtx vals)
8569 enum machine_mode mode = GET_MODE (vals);
8570 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8571 int n_elts = GET_MODE_NUNITS (mode);
8572 bool all_same = true;
8573 rtx x;
8574 int i;
8576 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8577 return NULL_RTX;
8579 for (i = 0; i < n_elts; ++i)
8581 x = XVECEXP (vals, 0, i);
8582 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8583 all_same = false;
8586 if (!all_same)
8587 /* The elements are not all the same. We could handle repeating
8588 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8589 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8590 vdup.i16). */
8591 return NULL_RTX;
8593 /* We can load this constant by using VDUP and a constant in a
8594 single ARM register. This will be cheaper than a vector
8595 load. */
8597 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8598 return gen_rtx_VEC_DUPLICATE (mode, x);
8601 /* Generate code to load VALS, which is a PARALLEL containing only
8602 constants (for vec_init) or CONST_VECTOR, efficiently into a
8603 register. Returns an RTX to copy into the register, or NULL_RTX
8604 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8607 neon_make_constant (rtx vals)
8609 enum machine_mode mode = GET_MODE (vals);
8610 rtx target;
8611 rtx const_vec = NULL_RTX;
8612 int n_elts = GET_MODE_NUNITS (mode);
8613 int n_const = 0;
8614 int i;
8616 if (GET_CODE (vals) == CONST_VECTOR)
8617 const_vec = vals;
8618 else if (GET_CODE (vals) == PARALLEL)
8620 /* A CONST_VECTOR must contain only CONST_INTs and
8621 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8622 Only store valid constants in a CONST_VECTOR. */
8623 for (i = 0; i < n_elts; ++i)
8625 rtx x = XVECEXP (vals, 0, i);
8626 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8627 n_const++;
8629 if (n_const == n_elts)
8630 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8632 else
8633 gcc_unreachable ();
8635 if (const_vec != NULL
8636 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8637 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8638 return const_vec;
8639 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8640 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8641 pipeline cycle; creating the constant takes one or two ARM
8642 pipeline cycles. */
8643 return target;
8644 else if (const_vec != NULL_RTX)
8645 /* Load from constant pool. On Cortex-A8 this takes two cycles
8646 (for either double or quad vectors). We can not take advantage
8647 of single-cycle VLD1 because we need a PC-relative addressing
8648 mode. */
8649 return const_vec;
8650 else
8651 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8652 We can not construct an initializer. */
8653 return NULL_RTX;
8656 /* Initialize vector TARGET to VALS. */
8658 void
8659 neon_expand_vector_init (rtx target, rtx vals)
8661 enum machine_mode mode = GET_MODE (target);
8662 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8663 int n_elts = GET_MODE_NUNITS (mode);
8664 int n_var = 0, one_var = -1;
8665 bool all_same = true;
8666 rtx x, mem;
8667 int i;
8669 for (i = 0; i < n_elts; ++i)
8671 x = XVECEXP (vals, 0, i);
8672 if (!CONSTANT_P (x))
8673 ++n_var, one_var = i;
8675 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8676 all_same = false;
8679 if (n_var == 0)
8681 rtx constant = neon_make_constant (vals);
8682 if (constant != NULL_RTX)
8684 emit_move_insn (target, constant);
8685 return;
8689 /* Splat a single non-constant element if we can. */
8690 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8692 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8693 emit_insn (gen_rtx_SET (VOIDmode, target,
8694 gen_rtx_VEC_DUPLICATE (mode, x)));
8695 return;
8698 /* One field is non-constant. Load constant then overwrite varying
8699 field. This is more efficient than using the stack. */
8700 if (n_var == 1)
8702 rtx copy = copy_rtx (vals);
8703 rtx index = GEN_INT (one_var);
8705 /* Load constant part of vector, substitute neighboring value for
8706 varying element. */
8707 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8708 neon_expand_vector_init (target, copy);
8710 /* Insert variable. */
8711 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8712 switch (mode)
8714 case V8QImode:
8715 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8716 break;
8717 case V16QImode:
8718 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8719 break;
8720 case V4HImode:
8721 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8722 break;
8723 case V8HImode:
8724 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8725 break;
8726 case V2SImode:
8727 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8728 break;
8729 case V4SImode:
8730 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8731 break;
8732 case V2SFmode:
8733 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8734 break;
8735 case V4SFmode:
8736 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8737 break;
8738 case V2DImode:
8739 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8740 break;
8741 default:
8742 gcc_unreachable ();
8744 return;
8747 /* Construct the vector in memory one field at a time
8748 and load the whole vector. */
8749 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8750 for (i = 0; i < n_elts; i++)
8751 emit_move_insn (adjust_address_nv (mem, inner_mode,
8752 i * GET_MODE_SIZE (inner_mode)),
8753 XVECEXP (vals, 0, i));
8754 emit_move_insn (target, mem);
8757 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8758 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8759 reported source locations are bogus. */
8761 static void
8762 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8763 const char *err)
8765 HOST_WIDE_INT lane;
8767 gcc_assert (GET_CODE (operand) == CONST_INT);
8769 lane = INTVAL (operand);
8771 if (lane < low || lane >= high)
8772 error (err);
8775 /* Bounds-check lanes. */
8777 void
8778 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8780 bounds_check (operand, low, high, "lane out of range");
8783 /* Bounds-check constants. */
8785 void
8786 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8788 bounds_check (operand, low, high, "constant out of range");
8791 HOST_WIDE_INT
8792 neon_element_bits (enum machine_mode mode)
8794 if (mode == DImode)
8795 return GET_MODE_BITSIZE (mode);
8796 else
8797 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8801 /* Predicates for `match_operand' and `match_operator'. */
8803 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8805 cirrus_memory_offset (rtx op)
8807 /* Reject eliminable registers. */
8808 if (! (reload_in_progress || reload_completed)
8809 && ( reg_mentioned_p (frame_pointer_rtx, op)
8810 || reg_mentioned_p (arg_pointer_rtx, op)
8811 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8812 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8813 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8814 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8815 return 0;
8817 if (GET_CODE (op) == MEM)
8819 rtx ind;
8821 ind = XEXP (op, 0);
8823 /* Match: (mem (reg)). */
8824 if (GET_CODE (ind) == REG)
8825 return 1;
8827 /* Match:
8828 (mem (plus (reg)
8829 (const))). */
8830 if (GET_CODE (ind) == PLUS
8831 && GET_CODE (XEXP (ind, 0)) == REG
8832 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8833 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8834 return 1;
8837 return 0;
8840 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8841 WB is true if full writeback address modes are allowed and is false
8842 if limited writeback address modes (POST_INC and PRE_DEC) are
8843 allowed. */
8846 arm_coproc_mem_operand (rtx op, bool wb)
8848 rtx ind;
8850 /* Reject eliminable registers. */
8851 if (! (reload_in_progress || reload_completed)
8852 && ( reg_mentioned_p (frame_pointer_rtx, op)
8853 || reg_mentioned_p (arg_pointer_rtx, op)
8854 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8855 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8856 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8857 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8858 return FALSE;
8860 /* Constants are converted into offsets from labels. */
8861 if (GET_CODE (op) != MEM)
8862 return FALSE;
8864 ind = XEXP (op, 0);
8866 if (reload_completed
8867 && (GET_CODE (ind) == LABEL_REF
8868 || (GET_CODE (ind) == CONST
8869 && GET_CODE (XEXP (ind, 0)) == PLUS
8870 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8871 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8872 return TRUE;
8874 /* Match: (mem (reg)). */
8875 if (GET_CODE (ind) == REG)
8876 return arm_address_register_rtx_p (ind, 0);
8878 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8879 acceptable in any case (subject to verification by
8880 arm_address_register_rtx_p). We need WB to be true to accept
8881 PRE_INC and POST_DEC. */
8882 if (GET_CODE (ind) == POST_INC
8883 || GET_CODE (ind) == PRE_DEC
8884 || (wb
8885 && (GET_CODE (ind) == PRE_INC
8886 || GET_CODE (ind) == POST_DEC)))
8887 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8889 if (wb
8890 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8891 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8892 && GET_CODE (XEXP (ind, 1)) == PLUS
8893 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8894 ind = XEXP (ind, 1);
8896 /* Match:
8897 (plus (reg)
8898 (const)). */
8899 if (GET_CODE (ind) == PLUS
8900 && GET_CODE (XEXP (ind, 0)) == REG
8901 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8902 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8903 && INTVAL (XEXP (ind, 1)) > -1024
8904 && INTVAL (XEXP (ind, 1)) < 1024
8905 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8906 return TRUE;
8908 return FALSE;
8911 /* Return TRUE if OP is a memory operand which we can load or store a vector
8912 to/from. TYPE is one of the following values:
8913 0 - Vector load/stor (vldr)
8914 1 - Core registers (ldm)
8915 2 - Element/structure loads (vld1)
8918 neon_vector_mem_operand (rtx op, int type)
8920 rtx ind;
8922 /* Reject eliminable registers. */
8923 if (! (reload_in_progress || reload_completed)
8924 && ( reg_mentioned_p (frame_pointer_rtx, op)
8925 || reg_mentioned_p (arg_pointer_rtx, op)
8926 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8927 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8928 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8929 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8930 return FALSE;
8932 /* Constants are converted into offsets from labels. */
8933 if (GET_CODE (op) != MEM)
8934 return FALSE;
8936 ind = XEXP (op, 0);
8938 if (reload_completed
8939 && (GET_CODE (ind) == LABEL_REF
8940 || (GET_CODE (ind) == CONST
8941 && GET_CODE (XEXP (ind, 0)) == PLUS
8942 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8943 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8944 return TRUE;
8946 /* Match: (mem (reg)). */
8947 if (GET_CODE (ind) == REG)
8948 return arm_address_register_rtx_p (ind, 0);
8950 /* Allow post-increment with Neon registers. */
8951 if ((type != 1 && GET_CODE (ind) == POST_INC)
8952 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8953 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8955 /* FIXME: vld1 allows register post-modify. */
8957 /* Match:
8958 (plus (reg)
8959 (const)). */
8960 if (type == 0
8961 && GET_CODE (ind) == PLUS
8962 && GET_CODE (XEXP (ind, 0)) == REG
8963 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8964 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8965 && INTVAL (XEXP (ind, 1)) > -1024
8966 && INTVAL (XEXP (ind, 1)) < 1016
8967 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8968 return TRUE;
8970 return FALSE;
8973 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8974 type. */
8976 neon_struct_mem_operand (rtx op)
8978 rtx ind;
8980 /* Reject eliminable registers. */
8981 if (! (reload_in_progress || reload_completed)
8982 && ( reg_mentioned_p (frame_pointer_rtx, op)
8983 || reg_mentioned_p (arg_pointer_rtx, op)
8984 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8985 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8986 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8987 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8988 return FALSE;
8990 /* Constants are converted into offsets from labels. */
8991 if (GET_CODE (op) != MEM)
8992 return FALSE;
8994 ind = XEXP (op, 0);
8996 if (reload_completed
8997 && (GET_CODE (ind) == LABEL_REF
8998 || (GET_CODE (ind) == CONST
8999 && GET_CODE (XEXP (ind, 0)) == PLUS
9000 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9001 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9002 return TRUE;
9004 /* Match: (mem (reg)). */
9005 if (GET_CODE (ind) == REG)
9006 return arm_address_register_rtx_p (ind, 0);
9008 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9009 if (GET_CODE (ind) == POST_INC
9010 || GET_CODE (ind) == PRE_DEC)
9011 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9013 return FALSE;
9016 /* Return true if X is a register that will be eliminated later on. */
9018 arm_eliminable_register (rtx x)
9020 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9021 || REGNO (x) == ARG_POINTER_REGNUM
9022 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9023 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9026 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9027 coprocessor registers. Otherwise return NO_REGS. */
9029 enum reg_class
9030 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9032 if (mode == HFmode)
9034 if (!TARGET_NEON_FP16)
9035 return GENERAL_REGS;
9036 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9037 return NO_REGS;
9038 return GENERAL_REGS;
9041 /* The neon move patterns handle all legitimate vector and struct
9042 addresses. */
9043 if (TARGET_NEON
9044 && MEM_P (x)
9045 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9046 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9047 || VALID_NEON_STRUCT_MODE (mode)))
9048 return NO_REGS;
9050 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9051 return NO_REGS;
9053 return GENERAL_REGS;
9056 /* Values which must be returned in the most-significant end of the return
9057 register. */
9059 static bool
9060 arm_return_in_msb (const_tree valtype)
9062 return (TARGET_AAPCS_BASED
9063 && BYTES_BIG_ENDIAN
9064 && (AGGREGATE_TYPE_P (valtype)
9065 || TREE_CODE (valtype) == COMPLEX_TYPE));
9068 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9069 Use by the Cirrus Maverick code which has to workaround
9070 a hardware bug triggered by such instructions. */
9071 static bool
9072 arm_memory_load_p (rtx insn)
9074 rtx body, lhs, rhs;;
9076 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9077 return false;
9079 body = PATTERN (insn);
9081 if (GET_CODE (body) != SET)
9082 return false;
9084 lhs = XEXP (body, 0);
9085 rhs = XEXP (body, 1);
9087 lhs = REG_OR_SUBREG_RTX (lhs);
9089 /* If the destination is not a general purpose
9090 register we do not have to worry. */
9091 if (GET_CODE (lhs) != REG
9092 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9093 return false;
9095 /* As well as loads from memory we also have to react
9096 to loads of invalid constants which will be turned
9097 into loads from the minipool. */
9098 return (GET_CODE (rhs) == MEM
9099 || GET_CODE (rhs) == SYMBOL_REF
9100 || note_invalid_constants (insn, -1, false));
9103 /* Return TRUE if INSN is a Cirrus instruction. */
9104 static bool
9105 arm_cirrus_insn_p (rtx insn)
9107 enum attr_cirrus attr;
9109 /* get_attr cannot accept USE or CLOBBER. */
9110 if (!insn
9111 || GET_CODE (insn) != INSN
9112 || GET_CODE (PATTERN (insn)) == USE
9113 || GET_CODE (PATTERN (insn)) == CLOBBER)
9114 return 0;
9116 attr = get_attr_cirrus (insn);
9118 return attr != CIRRUS_NOT;
9121 /* Cirrus reorg for invalid instruction combinations. */
9122 static void
9123 cirrus_reorg (rtx first)
9125 enum attr_cirrus attr;
9126 rtx body = PATTERN (first);
9127 rtx t;
9128 int nops;
9130 /* Any branch must be followed by 2 non Cirrus instructions. */
9131 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9133 nops = 0;
9134 t = next_nonnote_insn (first);
9136 if (arm_cirrus_insn_p (t))
9137 ++ nops;
9139 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9140 ++ nops;
9142 while (nops --)
9143 emit_insn_after (gen_nop (), first);
9145 return;
9148 /* (float (blah)) is in parallel with a clobber. */
9149 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9150 body = XVECEXP (body, 0, 0);
9152 if (GET_CODE (body) == SET)
9154 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9156 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9157 be followed by a non Cirrus insn. */
9158 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9160 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9161 emit_insn_after (gen_nop (), first);
9163 return;
9165 else if (arm_memory_load_p (first))
9167 unsigned int arm_regno;
9169 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9170 ldr/cfmv64hr combination where the Rd field is the same
9171 in both instructions must be split with a non Cirrus
9172 insn. Example:
9174 ldr r0, blah
9176 cfmvsr mvf0, r0. */
9178 /* Get Arm register number for ldr insn. */
9179 if (GET_CODE (lhs) == REG)
9180 arm_regno = REGNO (lhs);
9181 else
9183 gcc_assert (GET_CODE (rhs) == REG);
9184 arm_regno = REGNO (rhs);
9187 /* Next insn. */
9188 first = next_nonnote_insn (first);
9190 if (! arm_cirrus_insn_p (first))
9191 return;
9193 body = PATTERN (first);
9195 /* (float (blah)) is in parallel with a clobber. */
9196 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9197 body = XVECEXP (body, 0, 0);
9199 if (GET_CODE (body) == FLOAT)
9200 body = XEXP (body, 0);
9202 if (get_attr_cirrus (first) == CIRRUS_MOVE
9203 && GET_CODE (XEXP (body, 1)) == REG
9204 && arm_regno == REGNO (XEXP (body, 1)))
9205 emit_insn_after (gen_nop (), first);
9207 return;
9211 /* get_attr cannot accept USE or CLOBBER. */
9212 if (!first
9213 || GET_CODE (first) != INSN
9214 || GET_CODE (PATTERN (first)) == USE
9215 || GET_CODE (PATTERN (first)) == CLOBBER)
9216 return;
9218 attr = get_attr_cirrus (first);
9220 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9221 must be followed by a non-coprocessor instruction. */
9222 if (attr == CIRRUS_COMPARE)
9224 nops = 0;
9226 t = next_nonnote_insn (first);
9228 if (arm_cirrus_insn_p (t))
9229 ++ nops;
9231 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9232 ++ nops;
9234 while (nops --)
9235 emit_insn_after (gen_nop (), first);
9237 return;
9241 /* Return TRUE if X references a SYMBOL_REF. */
9243 symbol_mentioned_p (rtx x)
9245 const char * fmt;
9246 int i;
9248 if (GET_CODE (x) == SYMBOL_REF)
9249 return 1;
9251 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9252 are constant offsets, not symbols. */
9253 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9254 return 0;
9256 fmt = GET_RTX_FORMAT (GET_CODE (x));
9258 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9260 if (fmt[i] == 'E')
9262 int j;
9264 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9265 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9266 return 1;
9268 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9269 return 1;
9272 return 0;
9275 /* Return TRUE if X references a LABEL_REF. */
9277 label_mentioned_p (rtx x)
9279 const char * fmt;
9280 int i;
9282 if (GET_CODE (x) == LABEL_REF)
9283 return 1;
9285 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9286 instruction, but they are constant offsets, not symbols. */
9287 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9288 return 0;
9290 fmt = GET_RTX_FORMAT (GET_CODE (x));
9291 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9293 if (fmt[i] == 'E')
9295 int j;
9297 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9298 if (label_mentioned_p (XVECEXP (x, i, j)))
9299 return 1;
9301 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9302 return 1;
9305 return 0;
9309 tls_mentioned_p (rtx x)
9311 switch (GET_CODE (x))
9313 case CONST:
9314 return tls_mentioned_p (XEXP (x, 0));
9316 case UNSPEC:
9317 if (XINT (x, 1) == UNSPEC_TLS)
9318 return 1;
9320 default:
9321 return 0;
9325 /* Must not copy any rtx that uses a pc-relative address. */
9327 static int
9328 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9330 if (GET_CODE (*x) == UNSPEC
9331 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9332 return 1;
9333 return 0;
9336 static bool
9337 arm_cannot_copy_insn_p (rtx insn)
9339 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9342 enum rtx_code
9343 minmax_code (rtx x)
9345 enum rtx_code code = GET_CODE (x);
9347 switch (code)
9349 case SMAX:
9350 return GE;
9351 case SMIN:
9352 return LE;
9353 case UMIN:
9354 return LEU;
9355 case UMAX:
9356 return GEU;
9357 default:
9358 gcc_unreachable ();
9362 /* Return 1 if memory locations are adjacent. */
9364 adjacent_mem_locations (rtx a, rtx b)
9366 /* We don't guarantee to preserve the order of these memory refs. */
9367 if (volatile_refs_p (a) || volatile_refs_p (b))
9368 return 0;
9370 if ((GET_CODE (XEXP (a, 0)) == REG
9371 || (GET_CODE (XEXP (a, 0)) == PLUS
9372 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9373 && (GET_CODE (XEXP (b, 0)) == REG
9374 || (GET_CODE (XEXP (b, 0)) == PLUS
9375 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9377 HOST_WIDE_INT val0 = 0, val1 = 0;
9378 rtx reg0, reg1;
9379 int val_diff;
9381 if (GET_CODE (XEXP (a, 0)) == PLUS)
9383 reg0 = XEXP (XEXP (a, 0), 0);
9384 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9386 else
9387 reg0 = XEXP (a, 0);
9389 if (GET_CODE (XEXP (b, 0)) == PLUS)
9391 reg1 = XEXP (XEXP (b, 0), 0);
9392 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9394 else
9395 reg1 = XEXP (b, 0);
9397 /* Don't accept any offset that will require multiple
9398 instructions to handle, since this would cause the
9399 arith_adjacentmem pattern to output an overlong sequence. */
9400 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9401 return 0;
9403 /* Don't allow an eliminable register: register elimination can make
9404 the offset too large. */
9405 if (arm_eliminable_register (reg0))
9406 return 0;
9408 val_diff = val1 - val0;
9410 if (arm_ld_sched)
9412 /* If the target has load delay slots, then there's no benefit
9413 to using an ldm instruction unless the offset is zero and
9414 we are optimizing for size. */
9415 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9416 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9417 && (val_diff == 4 || val_diff == -4));
9420 return ((REGNO (reg0) == REGNO (reg1))
9421 && (val_diff == 4 || val_diff == -4));
9424 return 0;
9427 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9428 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9429 instruction. ADD_OFFSET is nonzero if the base address register needs
9430 to be modified with an add instruction before we can use it. */
9432 static bool
9433 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9434 int nops, HOST_WIDE_INT add_offset)
9436 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9437 if the offset isn't small enough. The reason 2 ldrs are faster
9438 is because these ARMs are able to do more than one cache access
9439 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9440 whilst the ARM8 has a double bandwidth cache. This means that
9441 these cores can do both an instruction fetch and a data fetch in
9442 a single cycle, so the trick of calculating the address into a
9443 scratch register (one of the result regs) and then doing a load
9444 multiple actually becomes slower (and no smaller in code size).
9445 That is the transformation
9447 ldr rd1, [rbase + offset]
9448 ldr rd2, [rbase + offset + 4]
9452 add rd1, rbase, offset
9453 ldmia rd1, {rd1, rd2}
9455 produces worse code -- '3 cycles + any stalls on rd2' instead of
9456 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9457 access per cycle, the first sequence could never complete in less
9458 than 6 cycles, whereas the ldm sequence would only take 5 and
9459 would make better use of sequential accesses if not hitting the
9460 cache.
9462 We cheat here and test 'arm_ld_sched' which we currently know to
9463 only be true for the ARM8, ARM9 and StrongARM. If this ever
9464 changes, then the test below needs to be reworked. */
9465 if (nops == 2 && arm_ld_sched && add_offset != 0)
9466 return false;
9468 /* XScale has load-store double instructions, but they have stricter
9469 alignment requirements than load-store multiple, so we cannot
9470 use them.
9472 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9473 the pipeline until completion.
9475 NREGS CYCLES
9481 An ldr instruction takes 1-3 cycles, but does not block the
9482 pipeline.
9484 NREGS CYCLES
9485 1 1-3
9486 2 2-6
9487 3 3-9
9488 4 4-12
9490 Best case ldr will always win. However, the more ldr instructions
9491 we issue, the less likely we are to be able to schedule them well.
9492 Using ldr instructions also increases code size.
9494 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9495 for counts of 3 or 4 regs. */
9496 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9497 return false;
9498 return true;
9501 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9502 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9503 an array ORDER which describes the sequence to use when accessing the
9504 offsets that produces an ascending order. In this sequence, each
9505 offset must be larger by exactly 4 than the previous one. ORDER[0]
9506 must have been filled in with the lowest offset by the caller.
9507 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9508 we use to verify that ORDER produces an ascending order of registers.
9509 Return true if it was possible to construct such an order, false if
9510 not. */
9512 static bool
9513 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9514 int *unsorted_regs)
9516 int i;
9517 for (i = 1; i < nops; i++)
9519 int j;
9521 order[i] = order[i - 1];
9522 for (j = 0; j < nops; j++)
9523 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9525 /* We must find exactly one offset that is higher than the
9526 previous one by 4. */
9527 if (order[i] != order[i - 1])
9528 return false;
9529 order[i] = j;
9531 if (order[i] == order[i - 1])
9532 return false;
9533 /* The register numbers must be ascending. */
9534 if (unsorted_regs != NULL
9535 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9536 return false;
9538 return true;
9541 /* Used to determine in a peephole whether a sequence of load
9542 instructions can be changed into a load-multiple instruction.
9543 NOPS is the number of separate load instructions we are examining. The
9544 first NOPS entries in OPERANDS are the destination registers, the
9545 next NOPS entries are memory operands. If this function is
9546 successful, *BASE is set to the common base register of the memory
9547 accesses; *LOAD_OFFSET is set to the first memory location's offset
9548 from that base register.
9549 REGS is an array filled in with the destination register numbers.
9550 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9551 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9552 the sequence of registers in REGS matches the loads from ascending memory
9553 locations, and the function verifies that the register numbers are
9554 themselves ascending. If CHECK_REGS is false, the register numbers
9555 are stored in the order they are found in the operands. */
9556 static int
9557 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9558 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9560 int unsorted_regs[MAX_LDM_STM_OPS];
9561 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9562 int order[MAX_LDM_STM_OPS];
9563 rtx base_reg_rtx = NULL;
9564 int base_reg = -1;
9565 int i, ldm_case;
9567 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9568 easily extended if required. */
9569 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9571 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9573 /* Loop over the operands and check that the memory references are
9574 suitable (i.e. immediate offsets from the same base register). At
9575 the same time, extract the target register, and the memory
9576 offsets. */
9577 for (i = 0; i < nops; i++)
9579 rtx reg;
9580 rtx offset;
9582 /* Convert a subreg of a mem into the mem itself. */
9583 if (GET_CODE (operands[nops + i]) == SUBREG)
9584 operands[nops + i] = alter_subreg (operands + (nops + i));
9586 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9588 /* Don't reorder volatile memory references; it doesn't seem worth
9589 looking for the case where the order is ok anyway. */
9590 if (MEM_VOLATILE_P (operands[nops + i]))
9591 return 0;
9593 offset = const0_rtx;
9595 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9596 || (GET_CODE (reg) == SUBREG
9597 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9598 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9599 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9600 == REG)
9601 || (GET_CODE (reg) == SUBREG
9602 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9603 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9604 == CONST_INT)))
9606 if (i == 0)
9608 base_reg = REGNO (reg);
9609 base_reg_rtx = reg;
9610 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9611 return 0;
9613 else if (base_reg != (int) REGNO (reg))
9614 /* Not addressed from the same base register. */
9615 return 0;
9617 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9618 ? REGNO (operands[i])
9619 : REGNO (SUBREG_REG (operands[i])));
9621 /* If it isn't an integer register, or if it overwrites the
9622 base register but isn't the last insn in the list, then
9623 we can't do this. */
9624 if (unsorted_regs[i] < 0
9625 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9626 || unsorted_regs[i] > 14
9627 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9628 return 0;
9630 unsorted_offsets[i] = INTVAL (offset);
9631 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9632 order[0] = i;
9634 else
9635 /* Not a suitable memory address. */
9636 return 0;
9639 /* All the useful information has now been extracted from the
9640 operands into unsorted_regs and unsorted_offsets; additionally,
9641 order[0] has been set to the lowest offset in the list. Sort
9642 the offsets into order, verifying that they are adjacent, and
9643 check that the register numbers are ascending. */
9644 if (!compute_offset_order (nops, unsorted_offsets, order,
9645 check_regs ? unsorted_regs : NULL))
9646 return 0;
9648 if (saved_order)
9649 memcpy (saved_order, order, sizeof order);
9651 if (base)
9653 *base = base_reg;
9655 for (i = 0; i < nops; i++)
9656 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9658 *load_offset = unsorted_offsets[order[0]];
9661 if (TARGET_THUMB1
9662 && !peep2_reg_dead_p (nops, base_reg_rtx))
9663 return 0;
9665 if (unsorted_offsets[order[0]] == 0)
9666 ldm_case = 1; /* ldmia */
9667 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9668 ldm_case = 2; /* ldmib */
9669 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9670 ldm_case = 3; /* ldmda */
9671 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9672 ldm_case = 4; /* ldmdb */
9673 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9674 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9675 ldm_case = 5;
9676 else
9677 return 0;
9679 if (!multiple_operation_profitable_p (false, nops,
9680 ldm_case == 5
9681 ? unsorted_offsets[order[0]] : 0))
9682 return 0;
9684 return ldm_case;
9687 /* Used to determine in a peephole whether a sequence of store instructions can
9688 be changed into a store-multiple instruction.
9689 NOPS is the number of separate store instructions we are examining.
9690 NOPS_TOTAL is the total number of instructions recognized by the peephole
9691 pattern.
9692 The first NOPS entries in OPERANDS are the source registers, the next
9693 NOPS entries are memory operands. If this function is successful, *BASE is
9694 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9695 to the first memory location's offset from that base register. REGS is an
9696 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9697 likewise filled with the corresponding rtx's.
9698 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9699 numbers to an ascending order of stores.
9700 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9701 from ascending memory locations, and the function verifies that the register
9702 numbers are themselves ascending. If CHECK_REGS is false, the register
9703 numbers are stored in the order they are found in the operands. */
9704 static int
9705 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9706 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9707 HOST_WIDE_INT *load_offset, bool check_regs)
9709 int unsorted_regs[MAX_LDM_STM_OPS];
9710 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9711 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9712 int order[MAX_LDM_STM_OPS];
9713 int base_reg = -1;
9714 rtx base_reg_rtx = NULL;
9715 int i, stm_case;
9717 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9718 easily extended if required. */
9719 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9721 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9723 /* Loop over the operands and check that the memory references are
9724 suitable (i.e. immediate offsets from the same base register). At
9725 the same time, extract the target register, and the memory
9726 offsets. */
9727 for (i = 0; i < nops; i++)
9729 rtx reg;
9730 rtx offset;
9732 /* Convert a subreg of a mem into the mem itself. */
9733 if (GET_CODE (operands[nops + i]) == SUBREG)
9734 operands[nops + i] = alter_subreg (operands + (nops + i));
9736 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9738 /* Don't reorder volatile memory references; it doesn't seem worth
9739 looking for the case where the order is ok anyway. */
9740 if (MEM_VOLATILE_P (operands[nops + i]))
9741 return 0;
9743 offset = const0_rtx;
9745 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9746 || (GET_CODE (reg) == SUBREG
9747 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9748 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9749 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9750 == REG)
9751 || (GET_CODE (reg) == SUBREG
9752 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9753 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9754 == CONST_INT)))
9756 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9757 ? operands[i] : SUBREG_REG (operands[i]));
9758 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9760 if (i == 0)
9762 base_reg = REGNO (reg);
9763 base_reg_rtx = reg;
9764 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9765 return 0;
9767 else if (base_reg != (int) REGNO (reg))
9768 /* Not addressed from the same base register. */
9769 return 0;
9771 /* If it isn't an integer register, then we can't do this. */
9772 if (unsorted_regs[i] < 0
9773 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9774 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9775 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9776 || unsorted_regs[i] > 14)
9777 return 0;
9779 unsorted_offsets[i] = INTVAL (offset);
9780 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9781 order[0] = i;
9783 else
9784 /* Not a suitable memory address. */
9785 return 0;
9788 /* All the useful information has now been extracted from the
9789 operands into unsorted_regs and unsorted_offsets; additionally,
9790 order[0] has been set to the lowest offset in the list. Sort
9791 the offsets into order, verifying that they are adjacent, and
9792 check that the register numbers are ascending. */
9793 if (!compute_offset_order (nops, unsorted_offsets, order,
9794 check_regs ? unsorted_regs : NULL))
9795 return 0;
9797 if (saved_order)
9798 memcpy (saved_order, order, sizeof order);
9800 if (base)
9802 *base = base_reg;
9804 for (i = 0; i < nops; i++)
9806 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9807 if (reg_rtxs)
9808 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9811 *load_offset = unsorted_offsets[order[0]];
9814 if (TARGET_THUMB1
9815 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9816 return 0;
9818 if (unsorted_offsets[order[0]] == 0)
9819 stm_case = 1; /* stmia */
9820 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9821 stm_case = 2; /* stmib */
9822 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9823 stm_case = 3; /* stmda */
9824 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9825 stm_case = 4; /* stmdb */
9826 else
9827 return 0;
9829 if (!multiple_operation_profitable_p (false, nops, 0))
9830 return 0;
9832 return stm_case;
9835 /* Routines for use in generating RTL. */
9837 /* Generate a load-multiple instruction. COUNT is the number of loads in
9838 the instruction; REGS and MEMS are arrays containing the operands.
9839 BASEREG is the base register to be used in addressing the memory operands.
9840 WBACK_OFFSET is nonzero if the instruction should update the base
9841 register. */
9843 static rtx
9844 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9845 HOST_WIDE_INT wback_offset)
9847 int i = 0, j;
9848 rtx result;
9850 if (!multiple_operation_profitable_p (false, count, 0))
9852 rtx seq;
9854 start_sequence ();
9856 for (i = 0; i < count; i++)
9857 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9859 if (wback_offset != 0)
9860 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9862 seq = get_insns ();
9863 end_sequence ();
9865 return seq;
9868 result = gen_rtx_PARALLEL (VOIDmode,
9869 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9870 if (wback_offset != 0)
9872 XVECEXP (result, 0, 0)
9873 = gen_rtx_SET (VOIDmode, basereg,
9874 plus_constant (basereg, wback_offset));
9875 i = 1;
9876 count++;
9879 for (j = 0; i < count; i++, j++)
9880 XVECEXP (result, 0, i)
9881 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9883 return result;
9886 /* Generate a store-multiple instruction. COUNT is the number of stores in
9887 the instruction; REGS and MEMS are arrays containing the operands.
9888 BASEREG is the base register to be used in addressing the memory operands.
9889 WBACK_OFFSET is nonzero if the instruction should update the base
9890 register. */
9892 static rtx
9893 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9894 HOST_WIDE_INT wback_offset)
9896 int i = 0, j;
9897 rtx result;
9899 if (GET_CODE (basereg) == PLUS)
9900 basereg = XEXP (basereg, 0);
9902 if (!multiple_operation_profitable_p (false, count, 0))
9904 rtx seq;
9906 start_sequence ();
9908 for (i = 0; i < count; i++)
9909 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9911 if (wback_offset != 0)
9912 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9914 seq = get_insns ();
9915 end_sequence ();
9917 return seq;
9920 result = gen_rtx_PARALLEL (VOIDmode,
9921 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9922 if (wback_offset != 0)
9924 XVECEXP (result, 0, 0)
9925 = gen_rtx_SET (VOIDmode, basereg,
9926 plus_constant (basereg, wback_offset));
9927 i = 1;
9928 count++;
9931 for (j = 0; i < count; i++, j++)
9932 XVECEXP (result, 0, i)
9933 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9935 return result;
9938 /* Generate either a load-multiple or a store-multiple instruction. This
9939 function can be used in situations where we can start with a single MEM
9940 rtx and adjust its address upwards.
9941 COUNT is the number of operations in the instruction, not counting a
9942 possible update of the base register. REGS is an array containing the
9943 register operands.
9944 BASEREG is the base register to be used in addressing the memory operands,
9945 which are constructed from BASEMEM.
9946 WRITE_BACK specifies whether the generated instruction should include an
9947 update of the base register.
9948 OFFSETP is used to pass an offset to and from this function; this offset
9949 is not used when constructing the address (instead BASEMEM should have an
9950 appropriate offset in its address), it is used only for setting
9951 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9953 static rtx
9954 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9955 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9957 rtx mems[MAX_LDM_STM_OPS];
9958 HOST_WIDE_INT offset = *offsetp;
9959 int i;
9961 gcc_assert (count <= MAX_LDM_STM_OPS);
9963 if (GET_CODE (basereg) == PLUS)
9964 basereg = XEXP (basereg, 0);
9966 for (i = 0; i < count; i++)
9968 rtx addr = plus_constant (basereg, i * 4);
9969 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9970 offset += 4;
9973 if (write_back)
9974 *offsetp = offset;
9976 if (is_load)
9977 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9978 write_back ? 4 * count : 0);
9979 else
9980 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9981 write_back ? 4 * count : 0);
9985 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9986 rtx basemem, HOST_WIDE_INT *offsetp)
9988 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9989 offsetp);
9993 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9994 rtx basemem, HOST_WIDE_INT *offsetp)
9996 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9997 offsetp);
10000 /* Called from a peephole2 expander to turn a sequence of loads into an
10001 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10002 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10003 is true if we can reorder the registers because they are used commutatively
10004 subsequently.
10005 Returns true iff we could generate a new instruction. */
10007 bool
10008 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10010 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10011 rtx mems[MAX_LDM_STM_OPS];
10012 int i, j, base_reg;
10013 rtx base_reg_rtx;
10014 HOST_WIDE_INT offset;
10015 int write_back = FALSE;
10016 int ldm_case;
10017 rtx addr;
10019 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10020 &base_reg, &offset, !sort_regs);
10022 if (ldm_case == 0)
10023 return false;
10025 if (sort_regs)
10026 for (i = 0; i < nops - 1; i++)
10027 for (j = i + 1; j < nops; j++)
10028 if (regs[i] > regs[j])
10030 int t = regs[i];
10031 regs[i] = regs[j];
10032 regs[j] = t;
10034 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10036 if (TARGET_THUMB1)
10038 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10039 gcc_assert (ldm_case == 1 || ldm_case == 5);
10040 write_back = TRUE;
10043 if (ldm_case == 5)
10045 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10046 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10047 offset = 0;
10048 if (!TARGET_THUMB1)
10050 base_reg = regs[0];
10051 base_reg_rtx = newbase;
10055 for (i = 0; i < nops; i++)
10057 addr = plus_constant (base_reg_rtx, offset + i * 4);
10058 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10059 SImode, addr, 0);
10061 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10062 write_back ? offset + i * 4 : 0));
10063 return true;
10066 /* Called from a peephole2 expander to turn a sequence of stores into an
10067 STM instruction. OPERANDS are the operands found by the peephole matcher;
10068 NOPS indicates how many separate stores we are trying to combine.
10069 Returns true iff we could generate a new instruction. */
10071 bool
10072 gen_stm_seq (rtx *operands, int nops)
10074 int i;
10075 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10076 rtx mems[MAX_LDM_STM_OPS];
10077 int base_reg;
10078 rtx base_reg_rtx;
10079 HOST_WIDE_INT offset;
10080 int write_back = FALSE;
10081 int stm_case;
10082 rtx addr;
10083 bool base_reg_dies;
10085 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10086 mem_order, &base_reg, &offset, true);
10088 if (stm_case == 0)
10089 return false;
10091 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10093 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10094 if (TARGET_THUMB1)
10096 gcc_assert (base_reg_dies);
10097 write_back = TRUE;
10100 if (stm_case == 5)
10102 gcc_assert (base_reg_dies);
10103 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10104 offset = 0;
10107 addr = plus_constant (base_reg_rtx, offset);
10109 for (i = 0; i < nops; i++)
10111 addr = plus_constant (base_reg_rtx, offset + i * 4);
10112 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10113 SImode, addr, 0);
10115 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10116 write_back ? offset + i * 4 : 0));
10117 return true;
10120 /* Called from a peephole2 expander to turn a sequence of stores that are
10121 preceded by constant loads into an STM instruction. OPERANDS are the
10122 operands found by the peephole matcher; NOPS indicates how many
10123 separate stores we are trying to combine; there are 2 * NOPS
10124 instructions in the peephole.
10125 Returns true iff we could generate a new instruction. */
10127 bool
10128 gen_const_stm_seq (rtx *operands, int nops)
10130 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10131 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10132 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10133 rtx mems[MAX_LDM_STM_OPS];
10134 int base_reg;
10135 rtx base_reg_rtx;
10136 HOST_WIDE_INT offset;
10137 int write_back = FALSE;
10138 int stm_case;
10139 rtx addr;
10140 bool base_reg_dies;
10141 int i, j;
10142 HARD_REG_SET allocated;
10144 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10145 mem_order, &base_reg, &offset, false);
10147 if (stm_case == 0)
10148 return false;
10150 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10152 /* If the same register is used more than once, try to find a free
10153 register. */
10154 CLEAR_HARD_REG_SET (allocated);
10155 for (i = 0; i < nops; i++)
10157 for (j = i + 1; j < nops; j++)
10158 if (regs[i] == regs[j])
10160 rtx t = peep2_find_free_register (0, nops * 2,
10161 TARGET_THUMB1 ? "l" : "r",
10162 SImode, &allocated);
10163 if (t == NULL_RTX)
10164 return false;
10165 reg_rtxs[i] = t;
10166 regs[i] = REGNO (t);
10170 /* Compute an ordering that maps the register numbers to an ascending
10171 sequence. */
10172 reg_order[0] = 0;
10173 for (i = 0; i < nops; i++)
10174 if (regs[i] < regs[reg_order[0]])
10175 reg_order[0] = i;
10177 for (i = 1; i < nops; i++)
10179 int this_order = reg_order[i - 1];
10180 for (j = 0; j < nops; j++)
10181 if (regs[j] > regs[reg_order[i - 1]]
10182 && (this_order == reg_order[i - 1]
10183 || regs[j] < regs[this_order]))
10184 this_order = j;
10185 reg_order[i] = this_order;
10188 /* Ensure that registers that must be live after the instruction end
10189 up with the correct value. */
10190 for (i = 0; i < nops; i++)
10192 int this_order = reg_order[i];
10193 if ((this_order != mem_order[i]
10194 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10195 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10196 return false;
10199 /* Load the constants. */
10200 for (i = 0; i < nops; i++)
10202 rtx op = operands[2 * nops + mem_order[i]];
10203 sorted_regs[i] = regs[reg_order[i]];
10204 emit_move_insn (reg_rtxs[reg_order[i]], op);
10207 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10209 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10210 if (TARGET_THUMB1)
10212 gcc_assert (base_reg_dies);
10213 write_back = TRUE;
10216 if (stm_case == 5)
10218 gcc_assert (base_reg_dies);
10219 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10220 offset = 0;
10223 addr = plus_constant (base_reg_rtx, offset);
10225 for (i = 0; i < nops; i++)
10227 addr = plus_constant (base_reg_rtx, offset + i * 4);
10228 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10229 SImode, addr, 0);
10231 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10232 write_back ? offset + i * 4 : 0));
10233 return true;
10237 arm_gen_movmemqi (rtx *operands)
10239 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10240 HOST_WIDE_INT srcoffset, dstoffset;
10241 int i;
10242 rtx src, dst, srcbase, dstbase;
10243 rtx part_bytes_reg = NULL;
10244 rtx mem;
10246 if (GET_CODE (operands[2]) != CONST_INT
10247 || GET_CODE (operands[3]) != CONST_INT
10248 || INTVAL (operands[2]) > 64
10249 || INTVAL (operands[3]) & 3)
10250 return 0;
10252 dstbase = operands[0];
10253 srcbase = operands[1];
10255 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10256 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10258 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10259 out_words_to_go = INTVAL (operands[2]) / 4;
10260 last_bytes = INTVAL (operands[2]) & 3;
10261 dstoffset = srcoffset = 0;
10263 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10264 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10266 for (i = 0; in_words_to_go >= 2; i+=4)
10268 if (in_words_to_go > 4)
10269 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10270 TRUE, srcbase, &srcoffset));
10271 else
10272 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10273 src, FALSE, srcbase,
10274 &srcoffset));
10276 if (out_words_to_go)
10278 if (out_words_to_go > 4)
10279 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10280 TRUE, dstbase, &dstoffset));
10281 else if (out_words_to_go != 1)
10282 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10283 out_words_to_go, dst,
10284 (last_bytes == 0
10285 ? FALSE : TRUE),
10286 dstbase, &dstoffset));
10287 else
10289 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10290 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10291 if (last_bytes != 0)
10293 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10294 dstoffset += 4;
10299 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10300 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10303 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10304 if (out_words_to_go)
10306 rtx sreg;
10308 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10309 sreg = copy_to_reg (mem);
10311 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10312 emit_move_insn (mem, sreg);
10313 in_words_to_go--;
10315 gcc_assert (!in_words_to_go); /* Sanity check */
10318 if (in_words_to_go)
10320 gcc_assert (in_words_to_go > 0);
10322 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10323 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10326 gcc_assert (!last_bytes || part_bytes_reg);
10328 if (BYTES_BIG_ENDIAN && last_bytes)
10330 rtx tmp = gen_reg_rtx (SImode);
10332 /* The bytes we want are in the top end of the word. */
10333 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10334 GEN_INT (8 * (4 - last_bytes))));
10335 part_bytes_reg = tmp;
10337 while (last_bytes)
10339 mem = adjust_automodify_address (dstbase, QImode,
10340 plus_constant (dst, last_bytes - 1),
10341 dstoffset + last_bytes - 1);
10342 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10344 if (--last_bytes)
10346 tmp = gen_reg_rtx (SImode);
10347 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10348 part_bytes_reg = tmp;
10353 else
10355 if (last_bytes > 1)
10357 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10358 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10359 last_bytes -= 2;
10360 if (last_bytes)
10362 rtx tmp = gen_reg_rtx (SImode);
10363 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10364 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10365 part_bytes_reg = tmp;
10366 dstoffset += 2;
10370 if (last_bytes)
10372 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10373 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10377 return 1;
10380 /* Select a dominance comparison mode if possible for a test of the general
10381 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10382 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10383 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10384 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10385 In all cases OP will be either EQ or NE, but we don't need to know which
10386 here. If we are unable to support a dominance comparison we return
10387 CC mode. This will then fail to match for the RTL expressions that
10388 generate this call. */
10389 enum machine_mode
10390 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10392 enum rtx_code cond1, cond2;
10393 int swapped = 0;
10395 /* Currently we will probably get the wrong result if the individual
10396 comparisons are not simple. This also ensures that it is safe to
10397 reverse a comparison if necessary. */
10398 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10399 != CCmode)
10400 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10401 != CCmode))
10402 return CCmode;
10404 /* The if_then_else variant of this tests the second condition if the
10405 first passes, but is true if the first fails. Reverse the first
10406 condition to get a true "inclusive-or" expression. */
10407 if (cond_or == DOM_CC_NX_OR_Y)
10408 cond1 = reverse_condition (cond1);
10410 /* If the comparisons are not equal, and one doesn't dominate the other,
10411 then we can't do this. */
10412 if (cond1 != cond2
10413 && !comparison_dominates_p (cond1, cond2)
10414 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10415 return CCmode;
10417 if (swapped)
10419 enum rtx_code temp = cond1;
10420 cond1 = cond2;
10421 cond2 = temp;
10424 switch (cond1)
10426 case EQ:
10427 if (cond_or == DOM_CC_X_AND_Y)
10428 return CC_DEQmode;
10430 switch (cond2)
10432 case EQ: return CC_DEQmode;
10433 case LE: return CC_DLEmode;
10434 case LEU: return CC_DLEUmode;
10435 case GE: return CC_DGEmode;
10436 case GEU: return CC_DGEUmode;
10437 default: gcc_unreachable ();
10440 case LT:
10441 if (cond_or == DOM_CC_X_AND_Y)
10442 return CC_DLTmode;
10444 switch (cond2)
10446 case LT:
10447 return CC_DLTmode;
10448 case LE:
10449 return CC_DLEmode;
10450 case NE:
10451 return CC_DNEmode;
10452 default:
10453 gcc_unreachable ();
10456 case GT:
10457 if (cond_or == DOM_CC_X_AND_Y)
10458 return CC_DGTmode;
10460 switch (cond2)
10462 case GT:
10463 return CC_DGTmode;
10464 case GE:
10465 return CC_DGEmode;
10466 case NE:
10467 return CC_DNEmode;
10468 default:
10469 gcc_unreachable ();
10472 case LTU:
10473 if (cond_or == DOM_CC_X_AND_Y)
10474 return CC_DLTUmode;
10476 switch (cond2)
10478 case LTU:
10479 return CC_DLTUmode;
10480 case LEU:
10481 return CC_DLEUmode;
10482 case NE:
10483 return CC_DNEmode;
10484 default:
10485 gcc_unreachable ();
10488 case GTU:
10489 if (cond_or == DOM_CC_X_AND_Y)
10490 return CC_DGTUmode;
10492 switch (cond2)
10494 case GTU:
10495 return CC_DGTUmode;
10496 case GEU:
10497 return CC_DGEUmode;
10498 case NE:
10499 return CC_DNEmode;
10500 default:
10501 gcc_unreachable ();
10504 /* The remaining cases only occur when both comparisons are the
10505 same. */
10506 case NE:
10507 gcc_assert (cond1 == cond2);
10508 return CC_DNEmode;
10510 case LE:
10511 gcc_assert (cond1 == cond2);
10512 return CC_DLEmode;
10514 case GE:
10515 gcc_assert (cond1 == cond2);
10516 return CC_DGEmode;
10518 case LEU:
10519 gcc_assert (cond1 == cond2);
10520 return CC_DLEUmode;
10522 case GEU:
10523 gcc_assert (cond1 == cond2);
10524 return CC_DGEUmode;
10526 default:
10527 gcc_unreachable ();
10531 enum machine_mode
10532 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10534 /* All floating point compares return CCFP if it is an equality
10535 comparison, and CCFPE otherwise. */
10536 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10538 switch (op)
10540 case EQ:
10541 case NE:
10542 case UNORDERED:
10543 case ORDERED:
10544 case UNLT:
10545 case UNLE:
10546 case UNGT:
10547 case UNGE:
10548 case UNEQ:
10549 case LTGT:
10550 return CCFPmode;
10552 case LT:
10553 case LE:
10554 case GT:
10555 case GE:
10556 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10557 return CCFPmode;
10558 return CCFPEmode;
10560 default:
10561 gcc_unreachable ();
10565 /* A compare with a shifted operand. Because of canonicalization, the
10566 comparison will have to be swapped when we emit the assembler. */
10567 if (GET_MODE (y) == SImode
10568 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10569 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10570 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10571 || GET_CODE (x) == ROTATERT))
10572 return CC_SWPmode;
10574 /* This operation is performed swapped, but since we only rely on the Z
10575 flag we don't need an additional mode. */
10576 if (GET_MODE (y) == SImode
10577 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10578 && GET_CODE (x) == NEG
10579 && (op == EQ || op == NE))
10580 return CC_Zmode;
10582 /* This is a special case that is used by combine to allow a
10583 comparison of a shifted byte load to be split into a zero-extend
10584 followed by a comparison of the shifted integer (only valid for
10585 equalities and unsigned inequalities). */
10586 if (GET_MODE (x) == SImode
10587 && GET_CODE (x) == ASHIFT
10588 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10589 && GET_CODE (XEXP (x, 0)) == SUBREG
10590 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10591 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10592 && (op == EQ || op == NE
10593 || op == GEU || op == GTU || op == LTU || op == LEU)
10594 && GET_CODE (y) == CONST_INT)
10595 return CC_Zmode;
10597 /* A construct for a conditional compare, if the false arm contains
10598 0, then both conditions must be true, otherwise either condition
10599 must be true. Not all conditions are possible, so CCmode is
10600 returned if it can't be done. */
10601 if (GET_CODE (x) == IF_THEN_ELSE
10602 && (XEXP (x, 2) == const0_rtx
10603 || XEXP (x, 2) == const1_rtx)
10604 && COMPARISON_P (XEXP (x, 0))
10605 && COMPARISON_P (XEXP (x, 1)))
10606 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10607 INTVAL (XEXP (x, 2)));
10609 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10610 if (GET_CODE (x) == AND
10611 && (op == EQ || op == NE)
10612 && COMPARISON_P (XEXP (x, 0))
10613 && COMPARISON_P (XEXP (x, 1)))
10614 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10615 DOM_CC_X_AND_Y);
10617 if (GET_CODE (x) == IOR
10618 && (op == EQ || op == NE)
10619 && COMPARISON_P (XEXP (x, 0))
10620 && COMPARISON_P (XEXP (x, 1)))
10621 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10622 DOM_CC_X_OR_Y);
10624 /* An operation (on Thumb) where we want to test for a single bit.
10625 This is done by shifting that bit up into the top bit of a
10626 scratch register; we can then branch on the sign bit. */
10627 if (TARGET_THUMB1
10628 && GET_MODE (x) == SImode
10629 && (op == EQ || op == NE)
10630 && GET_CODE (x) == ZERO_EXTRACT
10631 && XEXP (x, 1) == const1_rtx)
10632 return CC_Nmode;
10634 /* An operation that sets the condition codes as a side-effect, the
10635 V flag is not set correctly, so we can only use comparisons where
10636 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10637 instead.) */
10638 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10639 if (GET_MODE (x) == SImode
10640 && y == const0_rtx
10641 && (op == EQ || op == NE || op == LT || op == GE)
10642 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10643 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10644 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10645 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10646 || GET_CODE (x) == LSHIFTRT
10647 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10648 || GET_CODE (x) == ROTATERT
10649 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10650 return CC_NOOVmode;
10652 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10653 return CC_Zmode;
10655 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10656 && GET_CODE (x) == PLUS
10657 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10658 return CC_Cmode;
10660 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10662 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10663 available. */
10664 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10665 return CCmode;
10667 switch (op)
10669 case EQ:
10670 case NE:
10671 /* A DImode comparison against zero can be implemented by
10672 or'ing the two halves together. */
10673 if (y == const0_rtx)
10674 return CC_Zmode;
10676 /* We can do an equality test in three Thumb instructions. */
10677 if (!TARGET_ARM)
10678 return CC_Zmode;
10680 /* FALLTHROUGH */
10682 case LTU:
10683 case LEU:
10684 case GTU:
10685 case GEU:
10686 /* DImode unsigned comparisons can be implemented by cmp +
10687 cmpeq without a scratch register. Not worth doing in
10688 Thumb-2. */
10689 if (TARGET_ARM)
10690 return CC_CZmode;
10692 /* FALLTHROUGH */
10694 case LT:
10695 case LE:
10696 case GT:
10697 case GE:
10698 /* DImode signed and unsigned comparisons can be implemented
10699 by cmp + sbcs with a scratch register, but that does not
10700 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10701 gcc_assert (op != EQ && op != NE);
10702 return CC_NCVmode;
10704 default:
10705 gcc_unreachable ();
10709 return CCmode;
10712 /* X and Y are two things to compare using CODE. Emit the compare insn and
10713 return the rtx for register 0 in the proper mode. FP means this is a
10714 floating point compare: I don't think that it is needed on the arm. */
10716 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10718 enum machine_mode mode;
10719 rtx cc_reg;
10720 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10722 /* We might have X as a constant, Y as a register because of the predicates
10723 used for cmpdi. If so, force X to a register here. */
10724 if (dimode_comparison && !REG_P (x))
10725 x = force_reg (DImode, x);
10727 mode = SELECT_CC_MODE (code, x, y);
10728 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10730 if (dimode_comparison
10731 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10732 && mode != CC_CZmode)
10734 rtx clobber, set;
10736 /* To compare two non-zero values for equality, XOR them and
10737 then compare against zero. Not used for ARM mode; there
10738 CC_CZmode is cheaper. */
10739 if (mode == CC_Zmode && y != const0_rtx)
10741 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10742 y = const0_rtx;
10744 /* A scratch register is required. */
10745 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10746 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10747 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10749 else
10750 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10752 return cc_reg;
10755 /* Generate a sequence of insns that will generate the correct return
10756 address mask depending on the physical architecture that the program
10757 is running on. */
10759 arm_gen_return_addr_mask (void)
10761 rtx reg = gen_reg_rtx (Pmode);
10763 emit_insn (gen_return_addr_mask (reg));
10764 return reg;
10767 void
10768 arm_reload_in_hi (rtx *operands)
10770 rtx ref = operands[1];
10771 rtx base, scratch;
10772 HOST_WIDE_INT offset = 0;
10774 if (GET_CODE (ref) == SUBREG)
10776 offset = SUBREG_BYTE (ref);
10777 ref = SUBREG_REG (ref);
10780 if (GET_CODE (ref) == REG)
10782 /* We have a pseudo which has been spilt onto the stack; there
10783 are two cases here: the first where there is a simple
10784 stack-slot replacement and a second where the stack-slot is
10785 out of range, or is used as a subreg. */
10786 if (reg_equiv_mem (REGNO (ref)))
10788 ref = reg_equiv_mem (REGNO (ref));
10789 base = find_replacement (&XEXP (ref, 0));
10791 else
10792 /* The slot is out of range, or was dressed up in a SUBREG. */
10793 base = reg_equiv_address (REGNO (ref));
10795 else
10796 base = find_replacement (&XEXP (ref, 0));
10798 /* Handle the case where the address is too complex to be offset by 1. */
10799 if (GET_CODE (base) == MINUS
10800 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10802 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10804 emit_set_insn (base_plus, base);
10805 base = base_plus;
10807 else if (GET_CODE (base) == PLUS)
10809 /* The addend must be CONST_INT, or we would have dealt with it above. */
10810 HOST_WIDE_INT hi, lo;
10812 offset += INTVAL (XEXP (base, 1));
10813 base = XEXP (base, 0);
10815 /* Rework the address into a legal sequence of insns. */
10816 /* Valid range for lo is -4095 -> 4095 */
10817 lo = (offset >= 0
10818 ? (offset & 0xfff)
10819 : -((-offset) & 0xfff));
10821 /* Corner case, if lo is the max offset then we would be out of range
10822 once we have added the additional 1 below, so bump the msb into the
10823 pre-loading insn(s). */
10824 if (lo == 4095)
10825 lo &= 0x7ff;
10827 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10828 ^ (HOST_WIDE_INT) 0x80000000)
10829 - (HOST_WIDE_INT) 0x80000000);
10831 gcc_assert (hi + lo == offset);
10833 if (hi != 0)
10835 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10837 /* Get the base address; addsi3 knows how to handle constants
10838 that require more than one insn. */
10839 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10840 base = base_plus;
10841 offset = lo;
10845 /* Operands[2] may overlap operands[0] (though it won't overlap
10846 operands[1]), that's why we asked for a DImode reg -- so we can
10847 use the bit that does not overlap. */
10848 if (REGNO (operands[2]) == REGNO (operands[0]))
10849 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10850 else
10851 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10853 emit_insn (gen_zero_extendqisi2 (scratch,
10854 gen_rtx_MEM (QImode,
10855 plus_constant (base,
10856 offset))));
10857 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10858 gen_rtx_MEM (QImode,
10859 plus_constant (base,
10860 offset + 1))));
10861 if (!BYTES_BIG_ENDIAN)
10862 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10863 gen_rtx_IOR (SImode,
10864 gen_rtx_ASHIFT
10865 (SImode,
10866 gen_rtx_SUBREG (SImode, operands[0], 0),
10867 GEN_INT (8)),
10868 scratch));
10869 else
10870 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10871 gen_rtx_IOR (SImode,
10872 gen_rtx_ASHIFT (SImode, scratch,
10873 GEN_INT (8)),
10874 gen_rtx_SUBREG (SImode, operands[0], 0)));
10877 /* Handle storing a half-word to memory during reload by synthesizing as two
10878 byte stores. Take care not to clobber the input values until after we
10879 have moved them somewhere safe. This code assumes that if the DImode
10880 scratch in operands[2] overlaps either the input value or output address
10881 in some way, then that value must die in this insn (we absolutely need
10882 two scratch registers for some corner cases). */
10883 void
10884 arm_reload_out_hi (rtx *operands)
10886 rtx ref = operands[0];
10887 rtx outval = operands[1];
10888 rtx base, scratch;
10889 HOST_WIDE_INT offset = 0;
10891 if (GET_CODE (ref) == SUBREG)
10893 offset = SUBREG_BYTE (ref);
10894 ref = SUBREG_REG (ref);
10897 if (GET_CODE (ref) == REG)
10899 /* We have a pseudo which has been spilt onto the stack; there
10900 are two cases here: the first where there is a simple
10901 stack-slot replacement and a second where the stack-slot is
10902 out of range, or is used as a subreg. */
10903 if (reg_equiv_mem (REGNO (ref)))
10905 ref = reg_equiv_mem (REGNO (ref));
10906 base = find_replacement (&XEXP (ref, 0));
10908 else
10909 /* The slot is out of range, or was dressed up in a SUBREG. */
10910 base = reg_equiv_address (REGNO (ref));
10912 else
10913 base = find_replacement (&XEXP (ref, 0));
10915 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10917 /* Handle the case where the address is too complex to be offset by 1. */
10918 if (GET_CODE (base) == MINUS
10919 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10921 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10923 /* Be careful not to destroy OUTVAL. */
10924 if (reg_overlap_mentioned_p (base_plus, outval))
10926 /* Updating base_plus might destroy outval, see if we can
10927 swap the scratch and base_plus. */
10928 if (!reg_overlap_mentioned_p (scratch, outval))
10930 rtx tmp = scratch;
10931 scratch = base_plus;
10932 base_plus = tmp;
10934 else
10936 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10938 /* Be conservative and copy OUTVAL into the scratch now,
10939 this should only be necessary if outval is a subreg
10940 of something larger than a word. */
10941 /* XXX Might this clobber base? I can't see how it can,
10942 since scratch is known to overlap with OUTVAL, and
10943 must be wider than a word. */
10944 emit_insn (gen_movhi (scratch_hi, outval));
10945 outval = scratch_hi;
10949 emit_set_insn (base_plus, base);
10950 base = base_plus;
10952 else if (GET_CODE (base) == PLUS)
10954 /* The addend must be CONST_INT, or we would have dealt with it above. */
10955 HOST_WIDE_INT hi, lo;
10957 offset += INTVAL (XEXP (base, 1));
10958 base = XEXP (base, 0);
10960 /* Rework the address into a legal sequence of insns. */
10961 /* Valid range for lo is -4095 -> 4095 */
10962 lo = (offset >= 0
10963 ? (offset & 0xfff)
10964 : -((-offset) & 0xfff));
10966 /* Corner case, if lo is the max offset then we would be out of range
10967 once we have added the additional 1 below, so bump the msb into the
10968 pre-loading insn(s). */
10969 if (lo == 4095)
10970 lo &= 0x7ff;
10972 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10973 ^ (HOST_WIDE_INT) 0x80000000)
10974 - (HOST_WIDE_INT) 0x80000000);
10976 gcc_assert (hi + lo == offset);
10978 if (hi != 0)
10980 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10982 /* Be careful not to destroy OUTVAL. */
10983 if (reg_overlap_mentioned_p (base_plus, outval))
10985 /* Updating base_plus might destroy outval, see if we
10986 can swap the scratch and base_plus. */
10987 if (!reg_overlap_mentioned_p (scratch, outval))
10989 rtx tmp = scratch;
10990 scratch = base_plus;
10991 base_plus = tmp;
10993 else
10995 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10997 /* Be conservative and copy outval into scratch now,
10998 this should only be necessary if outval is a
10999 subreg of something larger than a word. */
11000 /* XXX Might this clobber base? I can't see how it
11001 can, since scratch is known to overlap with
11002 outval. */
11003 emit_insn (gen_movhi (scratch_hi, outval));
11004 outval = scratch_hi;
11008 /* Get the base address; addsi3 knows how to handle constants
11009 that require more than one insn. */
11010 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11011 base = base_plus;
11012 offset = lo;
11016 if (BYTES_BIG_ENDIAN)
11018 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11019 plus_constant (base, offset + 1)),
11020 gen_lowpart (QImode, outval)));
11021 emit_insn (gen_lshrsi3 (scratch,
11022 gen_rtx_SUBREG (SImode, outval, 0),
11023 GEN_INT (8)));
11024 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11025 gen_lowpart (QImode, scratch)));
11027 else
11029 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11030 gen_lowpart (QImode, outval)));
11031 emit_insn (gen_lshrsi3 (scratch,
11032 gen_rtx_SUBREG (SImode, outval, 0),
11033 GEN_INT (8)));
11034 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11035 plus_constant (base, offset + 1)),
11036 gen_lowpart (QImode, scratch)));
11040 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11041 (padded to the size of a word) should be passed in a register. */
11043 static bool
11044 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11046 if (TARGET_AAPCS_BASED)
11047 return must_pass_in_stack_var_size (mode, type);
11048 else
11049 return must_pass_in_stack_var_size_or_pad (mode, type);
11053 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11054 Return true if an argument passed on the stack should be padded upwards,
11055 i.e. if the least-significant byte has useful data.
11056 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11057 aggregate types are placed in the lowest memory address. */
11059 bool
11060 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11062 if (!TARGET_AAPCS_BASED)
11063 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11065 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11066 return false;
11068 return true;
11072 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11073 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11074 byte of the register has useful data, and return the opposite if the
11075 most significant byte does.
11076 For AAPCS, small aggregates and small complex types are always padded
11077 upwards. */
11079 bool
11080 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11081 tree type, int first ATTRIBUTE_UNUSED)
11083 if (TARGET_AAPCS_BASED
11084 && BYTES_BIG_ENDIAN
11085 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11086 && int_size_in_bytes (type) <= 4)
11087 return true;
11089 /* Otherwise, use default padding. */
11090 return !BYTES_BIG_ENDIAN;
11094 /* Print a symbolic form of X to the debug file, F. */
11095 static void
11096 arm_print_value (FILE *f, rtx x)
11098 switch (GET_CODE (x))
11100 case CONST_INT:
11101 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11102 return;
11104 case CONST_DOUBLE:
11105 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11106 return;
11108 case CONST_VECTOR:
11110 int i;
11112 fprintf (f, "<");
11113 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11115 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11116 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11117 fputc (',', f);
11119 fprintf (f, ">");
11121 return;
11123 case CONST_STRING:
11124 fprintf (f, "\"%s\"", XSTR (x, 0));
11125 return;
11127 case SYMBOL_REF:
11128 fprintf (f, "`%s'", XSTR (x, 0));
11129 return;
11131 case LABEL_REF:
11132 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11133 return;
11135 case CONST:
11136 arm_print_value (f, XEXP (x, 0));
11137 return;
11139 case PLUS:
11140 arm_print_value (f, XEXP (x, 0));
11141 fprintf (f, "+");
11142 arm_print_value (f, XEXP (x, 1));
11143 return;
11145 case PC:
11146 fprintf (f, "pc");
11147 return;
11149 default:
11150 fprintf (f, "????");
11151 return;
11155 /* Routines for manipulation of the constant pool. */
11157 /* Arm instructions cannot load a large constant directly into a
11158 register; they have to come from a pc relative load. The constant
11159 must therefore be placed in the addressable range of the pc
11160 relative load. Depending on the precise pc relative load
11161 instruction the range is somewhere between 256 bytes and 4k. This
11162 means that we often have to dump a constant inside a function, and
11163 generate code to branch around it.
11165 It is important to minimize this, since the branches will slow
11166 things down and make the code larger.
11168 Normally we can hide the table after an existing unconditional
11169 branch so that there is no interruption of the flow, but in the
11170 worst case the code looks like this:
11172 ldr rn, L1
11174 b L2
11175 align
11176 L1: .long value
11180 ldr rn, L3
11182 b L4
11183 align
11184 L3: .long value
11188 We fix this by performing a scan after scheduling, which notices
11189 which instructions need to have their operands fetched from the
11190 constant table and builds the table.
11192 The algorithm starts by building a table of all the constants that
11193 need fixing up and all the natural barriers in the function (places
11194 where a constant table can be dropped without breaking the flow).
11195 For each fixup we note how far the pc-relative replacement will be
11196 able to reach and the offset of the instruction into the function.
11198 Having built the table we then group the fixes together to form
11199 tables that are as large as possible (subject to addressing
11200 constraints) and emit each table of constants after the last
11201 barrier that is within range of all the instructions in the group.
11202 If a group does not contain a barrier, then we forcibly create one
11203 by inserting a jump instruction into the flow. Once the table has
11204 been inserted, the insns are then modified to reference the
11205 relevant entry in the pool.
11207 Possible enhancements to the algorithm (not implemented) are:
11209 1) For some processors and object formats, there may be benefit in
11210 aligning the pools to the start of cache lines; this alignment
11211 would need to be taken into account when calculating addressability
11212 of a pool. */
11214 /* These typedefs are located at the start of this file, so that
11215 they can be used in the prototypes there. This comment is to
11216 remind readers of that fact so that the following structures
11217 can be understood more easily.
11219 typedef struct minipool_node Mnode;
11220 typedef struct minipool_fixup Mfix; */
11222 struct minipool_node
11224 /* Doubly linked chain of entries. */
11225 Mnode * next;
11226 Mnode * prev;
11227 /* The maximum offset into the code that this entry can be placed. While
11228 pushing fixes for forward references, all entries are sorted in order
11229 of increasing max_address. */
11230 HOST_WIDE_INT max_address;
11231 /* Similarly for an entry inserted for a backwards ref. */
11232 HOST_WIDE_INT min_address;
11233 /* The number of fixes referencing this entry. This can become zero
11234 if we "unpush" an entry. In this case we ignore the entry when we
11235 come to emit the code. */
11236 int refcount;
11237 /* The offset from the start of the minipool. */
11238 HOST_WIDE_INT offset;
11239 /* The value in table. */
11240 rtx value;
11241 /* The mode of value. */
11242 enum machine_mode mode;
11243 /* The size of the value. With iWMMXt enabled
11244 sizes > 4 also imply an alignment of 8-bytes. */
11245 int fix_size;
11248 struct minipool_fixup
11250 Mfix * next;
11251 rtx insn;
11252 HOST_WIDE_INT address;
11253 rtx * loc;
11254 enum machine_mode mode;
11255 int fix_size;
11256 rtx value;
11257 Mnode * minipool;
11258 HOST_WIDE_INT forwards;
11259 HOST_WIDE_INT backwards;
11262 /* Fixes less than a word need padding out to a word boundary. */
11263 #define MINIPOOL_FIX_SIZE(mode) \
11264 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11266 static Mnode * minipool_vector_head;
11267 static Mnode * minipool_vector_tail;
11268 static rtx minipool_vector_label;
11269 static int minipool_pad;
11271 /* The linked list of all minipool fixes required for this function. */
11272 Mfix * minipool_fix_head;
11273 Mfix * minipool_fix_tail;
11274 /* The fix entry for the current minipool, once it has been placed. */
11275 Mfix * minipool_barrier;
11277 /* Determines if INSN is the start of a jump table. Returns the end
11278 of the TABLE or NULL_RTX. */
11279 static rtx
11280 is_jump_table (rtx insn)
11282 rtx table;
11284 if (GET_CODE (insn) == JUMP_INSN
11285 && JUMP_LABEL (insn) != NULL
11286 && ((table = next_real_insn (JUMP_LABEL (insn)))
11287 == next_real_insn (insn))
11288 && table != NULL
11289 && GET_CODE (table) == JUMP_INSN
11290 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11291 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11292 return table;
11294 return NULL_RTX;
11297 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11298 #define JUMP_TABLES_IN_TEXT_SECTION 0
11299 #endif
11301 static HOST_WIDE_INT
11302 get_jump_table_size (rtx insn)
11304 /* ADDR_VECs only take room if read-only data does into the text
11305 section. */
11306 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11308 rtx body = PATTERN (insn);
11309 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11310 HOST_WIDE_INT size;
11311 HOST_WIDE_INT modesize;
11313 modesize = GET_MODE_SIZE (GET_MODE (body));
11314 size = modesize * XVECLEN (body, elt);
11315 switch (modesize)
11317 case 1:
11318 /* Round up size of TBB table to a halfword boundary. */
11319 size = (size + 1) & ~(HOST_WIDE_INT)1;
11320 break;
11321 case 2:
11322 /* No padding necessary for TBH. */
11323 break;
11324 case 4:
11325 /* Add two bytes for alignment on Thumb. */
11326 if (TARGET_THUMB)
11327 size += 2;
11328 break;
11329 default:
11330 gcc_unreachable ();
11332 return size;
11335 return 0;
11338 /* Move a minipool fix MP from its current location to before MAX_MP.
11339 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11340 constraints may need updating. */
11341 static Mnode *
11342 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11343 HOST_WIDE_INT max_address)
11345 /* The code below assumes these are different. */
11346 gcc_assert (mp != max_mp);
11348 if (max_mp == NULL)
11350 if (max_address < mp->max_address)
11351 mp->max_address = max_address;
11353 else
11355 if (max_address > max_mp->max_address - mp->fix_size)
11356 mp->max_address = max_mp->max_address - mp->fix_size;
11357 else
11358 mp->max_address = max_address;
11360 /* Unlink MP from its current position. Since max_mp is non-null,
11361 mp->prev must be non-null. */
11362 mp->prev->next = mp->next;
11363 if (mp->next != NULL)
11364 mp->next->prev = mp->prev;
11365 else
11366 minipool_vector_tail = mp->prev;
11368 /* Re-insert it before MAX_MP. */
11369 mp->next = max_mp;
11370 mp->prev = max_mp->prev;
11371 max_mp->prev = mp;
11373 if (mp->prev != NULL)
11374 mp->prev->next = mp;
11375 else
11376 minipool_vector_head = mp;
11379 /* Save the new entry. */
11380 max_mp = mp;
11382 /* Scan over the preceding entries and adjust their addresses as
11383 required. */
11384 while (mp->prev != NULL
11385 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11387 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11388 mp = mp->prev;
11391 return max_mp;
11394 /* Add a constant to the minipool for a forward reference. Returns the
11395 node added or NULL if the constant will not fit in this pool. */
11396 static Mnode *
11397 add_minipool_forward_ref (Mfix *fix)
11399 /* If set, max_mp is the first pool_entry that has a lower
11400 constraint than the one we are trying to add. */
11401 Mnode * max_mp = NULL;
11402 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11403 Mnode * mp;
11405 /* If the minipool starts before the end of FIX->INSN then this FIX
11406 can not be placed into the current pool. Furthermore, adding the
11407 new constant pool entry may cause the pool to start FIX_SIZE bytes
11408 earlier. */
11409 if (minipool_vector_head &&
11410 (fix->address + get_attr_length (fix->insn)
11411 >= minipool_vector_head->max_address - fix->fix_size))
11412 return NULL;
11414 /* Scan the pool to see if a constant with the same value has
11415 already been added. While we are doing this, also note the
11416 location where we must insert the constant if it doesn't already
11417 exist. */
11418 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11420 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11421 && fix->mode == mp->mode
11422 && (GET_CODE (fix->value) != CODE_LABEL
11423 || (CODE_LABEL_NUMBER (fix->value)
11424 == CODE_LABEL_NUMBER (mp->value)))
11425 && rtx_equal_p (fix->value, mp->value))
11427 /* More than one fix references this entry. */
11428 mp->refcount++;
11429 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11432 /* Note the insertion point if necessary. */
11433 if (max_mp == NULL
11434 && mp->max_address > max_address)
11435 max_mp = mp;
11437 /* If we are inserting an 8-bytes aligned quantity and
11438 we have not already found an insertion point, then
11439 make sure that all such 8-byte aligned quantities are
11440 placed at the start of the pool. */
11441 if (ARM_DOUBLEWORD_ALIGN
11442 && max_mp == NULL
11443 && fix->fix_size >= 8
11444 && mp->fix_size < 8)
11446 max_mp = mp;
11447 max_address = mp->max_address;
11451 /* The value is not currently in the minipool, so we need to create
11452 a new entry for it. If MAX_MP is NULL, the entry will be put on
11453 the end of the list since the placement is less constrained than
11454 any existing entry. Otherwise, we insert the new fix before
11455 MAX_MP and, if necessary, adjust the constraints on the other
11456 entries. */
11457 mp = XNEW (Mnode);
11458 mp->fix_size = fix->fix_size;
11459 mp->mode = fix->mode;
11460 mp->value = fix->value;
11461 mp->refcount = 1;
11462 /* Not yet required for a backwards ref. */
11463 mp->min_address = -65536;
11465 if (max_mp == NULL)
11467 mp->max_address = max_address;
11468 mp->next = NULL;
11469 mp->prev = minipool_vector_tail;
11471 if (mp->prev == NULL)
11473 minipool_vector_head = mp;
11474 minipool_vector_label = gen_label_rtx ();
11476 else
11477 mp->prev->next = mp;
11479 minipool_vector_tail = mp;
11481 else
11483 if (max_address > max_mp->max_address - mp->fix_size)
11484 mp->max_address = max_mp->max_address - mp->fix_size;
11485 else
11486 mp->max_address = max_address;
11488 mp->next = max_mp;
11489 mp->prev = max_mp->prev;
11490 max_mp->prev = mp;
11491 if (mp->prev != NULL)
11492 mp->prev->next = mp;
11493 else
11494 minipool_vector_head = mp;
11497 /* Save the new entry. */
11498 max_mp = mp;
11500 /* Scan over the preceding entries and adjust their addresses as
11501 required. */
11502 while (mp->prev != NULL
11503 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11505 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11506 mp = mp->prev;
11509 return max_mp;
11512 static Mnode *
11513 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11514 HOST_WIDE_INT min_address)
11516 HOST_WIDE_INT offset;
11518 /* The code below assumes these are different. */
11519 gcc_assert (mp != min_mp);
11521 if (min_mp == NULL)
11523 if (min_address > mp->min_address)
11524 mp->min_address = min_address;
11526 else
11528 /* We will adjust this below if it is too loose. */
11529 mp->min_address = min_address;
11531 /* Unlink MP from its current position. Since min_mp is non-null,
11532 mp->next must be non-null. */
11533 mp->next->prev = mp->prev;
11534 if (mp->prev != NULL)
11535 mp->prev->next = mp->next;
11536 else
11537 minipool_vector_head = mp->next;
11539 /* Reinsert it after MIN_MP. */
11540 mp->prev = min_mp;
11541 mp->next = min_mp->next;
11542 min_mp->next = mp;
11543 if (mp->next != NULL)
11544 mp->next->prev = mp;
11545 else
11546 minipool_vector_tail = mp;
11549 min_mp = mp;
11551 offset = 0;
11552 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11554 mp->offset = offset;
11555 if (mp->refcount > 0)
11556 offset += mp->fix_size;
11558 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11559 mp->next->min_address = mp->min_address + mp->fix_size;
11562 return min_mp;
11565 /* Add a constant to the minipool for a backward reference. Returns the
11566 node added or NULL if the constant will not fit in this pool.
11568 Note that the code for insertion for a backwards reference can be
11569 somewhat confusing because the calculated offsets for each fix do
11570 not take into account the size of the pool (which is still under
11571 construction. */
11572 static Mnode *
11573 add_minipool_backward_ref (Mfix *fix)
11575 /* If set, min_mp is the last pool_entry that has a lower constraint
11576 than the one we are trying to add. */
11577 Mnode *min_mp = NULL;
11578 /* This can be negative, since it is only a constraint. */
11579 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11580 Mnode *mp;
11582 /* If we can't reach the current pool from this insn, or if we can't
11583 insert this entry at the end of the pool without pushing other
11584 fixes out of range, then we don't try. This ensures that we
11585 can't fail later on. */
11586 if (min_address >= minipool_barrier->address
11587 || (minipool_vector_tail->min_address + fix->fix_size
11588 >= minipool_barrier->address))
11589 return NULL;
11591 /* Scan the pool to see if a constant with the same value has
11592 already been added. While we are doing this, also note the
11593 location where we must insert the constant if it doesn't already
11594 exist. */
11595 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11597 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11598 && fix->mode == mp->mode
11599 && (GET_CODE (fix->value) != CODE_LABEL
11600 || (CODE_LABEL_NUMBER (fix->value)
11601 == CODE_LABEL_NUMBER (mp->value)))
11602 && rtx_equal_p (fix->value, mp->value)
11603 /* Check that there is enough slack to move this entry to the
11604 end of the table (this is conservative). */
11605 && (mp->max_address
11606 > (minipool_barrier->address
11607 + minipool_vector_tail->offset
11608 + minipool_vector_tail->fix_size)))
11610 mp->refcount++;
11611 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11614 if (min_mp != NULL)
11615 mp->min_address += fix->fix_size;
11616 else
11618 /* Note the insertion point if necessary. */
11619 if (mp->min_address < min_address)
11621 /* For now, we do not allow the insertion of 8-byte alignment
11622 requiring nodes anywhere but at the start of the pool. */
11623 if (ARM_DOUBLEWORD_ALIGN
11624 && fix->fix_size >= 8 && mp->fix_size < 8)
11625 return NULL;
11626 else
11627 min_mp = mp;
11629 else if (mp->max_address
11630 < minipool_barrier->address + mp->offset + fix->fix_size)
11632 /* Inserting before this entry would push the fix beyond
11633 its maximum address (which can happen if we have
11634 re-located a forwards fix); force the new fix to come
11635 after it. */
11636 if (ARM_DOUBLEWORD_ALIGN
11637 && fix->fix_size >= 8 && mp->fix_size < 8)
11638 return NULL;
11639 else
11641 min_mp = mp;
11642 min_address = mp->min_address + fix->fix_size;
11645 /* Do not insert a non-8-byte aligned quantity before 8-byte
11646 aligned quantities. */
11647 else if (ARM_DOUBLEWORD_ALIGN
11648 && fix->fix_size < 8
11649 && mp->fix_size >= 8)
11651 min_mp = mp;
11652 min_address = mp->min_address + fix->fix_size;
11657 /* We need to create a new entry. */
11658 mp = XNEW (Mnode);
11659 mp->fix_size = fix->fix_size;
11660 mp->mode = fix->mode;
11661 mp->value = fix->value;
11662 mp->refcount = 1;
11663 mp->max_address = minipool_barrier->address + 65536;
11665 mp->min_address = min_address;
11667 if (min_mp == NULL)
11669 mp->prev = NULL;
11670 mp->next = minipool_vector_head;
11672 if (mp->next == NULL)
11674 minipool_vector_tail = mp;
11675 minipool_vector_label = gen_label_rtx ();
11677 else
11678 mp->next->prev = mp;
11680 minipool_vector_head = mp;
11682 else
11684 mp->next = min_mp->next;
11685 mp->prev = min_mp;
11686 min_mp->next = mp;
11688 if (mp->next != NULL)
11689 mp->next->prev = mp;
11690 else
11691 minipool_vector_tail = mp;
11694 /* Save the new entry. */
11695 min_mp = mp;
11697 if (mp->prev)
11698 mp = mp->prev;
11699 else
11700 mp->offset = 0;
11702 /* Scan over the following entries and adjust their offsets. */
11703 while (mp->next != NULL)
11705 if (mp->next->min_address < mp->min_address + mp->fix_size)
11706 mp->next->min_address = mp->min_address + mp->fix_size;
11708 if (mp->refcount)
11709 mp->next->offset = mp->offset + mp->fix_size;
11710 else
11711 mp->next->offset = mp->offset;
11713 mp = mp->next;
11716 return min_mp;
11719 static void
11720 assign_minipool_offsets (Mfix *barrier)
11722 HOST_WIDE_INT offset = 0;
11723 Mnode *mp;
11725 minipool_barrier = barrier;
11727 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11729 mp->offset = offset;
11731 if (mp->refcount > 0)
11732 offset += mp->fix_size;
11736 /* Output the literal table */
11737 static void
11738 dump_minipool (rtx scan)
11740 Mnode * mp;
11741 Mnode * nmp;
11742 int align64 = 0;
11744 if (ARM_DOUBLEWORD_ALIGN)
11745 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11746 if (mp->refcount > 0 && mp->fix_size >= 8)
11748 align64 = 1;
11749 break;
11752 if (dump_file)
11753 fprintf (dump_file,
11754 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11755 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11757 scan = emit_label_after (gen_label_rtx (), scan);
11758 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11759 scan = emit_label_after (minipool_vector_label, scan);
11761 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11763 if (mp->refcount > 0)
11765 if (dump_file)
11767 fprintf (dump_file,
11768 ";; Offset %u, min %ld, max %ld ",
11769 (unsigned) mp->offset, (unsigned long) mp->min_address,
11770 (unsigned long) mp->max_address);
11771 arm_print_value (dump_file, mp->value);
11772 fputc ('\n', dump_file);
11775 switch (mp->fix_size)
11777 #ifdef HAVE_consttable_1
11778 case 1:
11779 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11780 break;
11782 #endif
11783 #ifdef HAVE_consttable_2
11784 case 2:
11785 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11786 break;
11788 #endif
11789 #ifdef HAVE_consttable_4
11790 case 4:
11791 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11792 break;
11794 #endif
11795 #ifdef HAVE_consttable_8
11796 case 8:
11797 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11798 break;
11800 #endif
11801 #ifdef HAVE_consttable_16
11802 case 16:
11803 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11804 break;
11806 #endif
11807 default:
11808 gcc_unreachable ();
11812 nmp = mp->next;
11813 free (mp);
11816 minipool_vector_head = minipool_vector_tail = NULL;
11817 scan = emit_insn_after (gen_consttable_end (), scan);
11818 scan = emit_barrier_after (scan);
11821 /* Return the cost of forcibly inserting a barrier after INSN. */
11822 static int
11823 arm_barrier_cost (rtx insn)
11825 /* Basing the location of the pool on the loop depth is preferable,
11826 but at the moment, the basic block information seems to be
11827 corrupt by this stage of the compilation. */
11828 int base_cost = 50;
11829 rtx next = next_nonnote_insn (insn);
11831 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11832 base_cost -= 20;
11834 switch (GET_CODE (insn))
11836 case CODE_LABEL:
11837 /* It will always be better to place the table before the label, rather
11838 than after it. */
11839 return 50;
11841 case INSN:
11842 case CALL_INSN:
11843 return base_cost;
11845 case JUMP_INSN:
11846 return base_cost - 10;
11848 default:
11849 return base_cost + 10;
11853 /* Find the best place in the insn stream in the range
11854 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11855 Create the barrier by inserting a jump and add a new fix entry for
11856 it. */
11857 static Mfix *
11858 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11860 HOST_WIDE_INT count = 0;
11861 rtx barrier;
11862 rtx from = fix->insn;
11863 /* The instruction after which we will insert the jump. */
11864 rtx selected = NULL;
11865 int selected_cost;
11866 /* The address at which the jump instruction will be placed. */
11867 HOST_WIDE_INT selected_address;
11868 Mfix * new_fix;
11869 HOST_WIDE_INT max_count = max_address - fix->address;
11870 rtx label = gen_label_rtx ();
11872 selected_cost = arm_barrier_cost (from);
11873 selected_address = fix->address;
11875 while (from && count < max_count)
11877 rtx tmp;
11878 int new_cost;
11880 /* This code shouldn't have been called if there was a natural barrier
11881 within range. */
11882 gcc_assert (GET_CODE (from) != BARRIER);
11884 /* Count the length of this insn. */
11885 count += get_attr_length (from);
11887 /* If there is a jump table, add its length. */
11888 tmp = is_jump_table (from);
11889 if (tmp != NULL)
11891 count += get_jump_table_size (tmp);
11893 /* Jump tables aren't in a basic block, so base the cost on
11894 the dispatch insn. If we select this location, we will
11895 still put the pool after the table. */
11896 new_cost = arm_barrier_cost (from);
11898 if (count < max_count
11899 && (!selected || new_cost <= selected_cost))
11901 selected = tmp;
11902 selected_cost = new_cost;
11903 selected_address = fix->address + count;
11906 /* Continue after the dispatch table. */
11907 from = NEXT_INSN (tmp);
11908 continue;
11911 new_cost = arm_barrier_cost (from);
11913 if (count < max_count
11914 && (!selected || new_cost <= selected_cost))
11916 selected = from;
11917 selected_cost = new_cost;
11918 selected_address = fix->address + count;
11921 from = NEXT_INSN (from);
11924 /* Make sure that we found a place to insert the jump. */
11925 gcc_assert (selected);
11927 /* Make sure we do not split a call and its corresponding
11928 CALL_ARG_LOCATION note. */
11929 if (CALL_P (selected))
11931 rtx next = NEXT_INSN (selected);
11932 if (next && NOTE_P (next)
11933 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
11934 selected = next;
11937 /* Create a new JUMP_INSN that branches around a barrier. */
11938 from = emit_jump_insn_after (gen_jump (label), selected);
11939 JUMP_LABEL (from) = label;
11940 barrier = emit_barrier_after (from);
11941 emit_label_after (label, barrier);
11943 /* Create a minipool barrier entry for the new barrier. */
11944 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11945 new_fix->insn = barrier;
11946 new_fix->address = selected_address;
11947 new_fix->next = fix->next;
11948 fix->next = new_fix;
11950 return new_fix;
11953 /* Record that there is a natural barrier in the insn stream at
11954 ADDRESS. */
11955 static void
11956 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11958 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11960 fix->insn = insn;
11961 fix->address = address;
11963 fix->next = NULL;
11964 if (minipool_fix_head != NULL)
11965 minipool_fix_tail->next = fix;
11966 else
11967 minipool_fix_head = fix;
11969 minipool_fix_tail = fix;
11972 /* Record INSN, which will need fixing up to load a value from the
11973 minipool. ADDRESS is the offset of the insn since the start of the
11974 function; LOC is a pointer to the part of the insn which requires
11975 fixing; VALUE is the constant that must be loaded, which is of type
11976 MODE. */
11977 static void
11978 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11979 enum machine_mode mode, rtx value)
11981 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11983 fix->insn = insn;
11984 fix->address = address;
11985 fix->loc = loc;
11986 fix->mode = mode;
11987 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11988 fix->value = value;
11989 fix->forwards = get_attr_pool_range (insn);
11990 fix->backwards = get_attr_neg_pool_range (insn);
11991 fix->minipool = NULL;
11993 /* If an insn doesn't have a range defined for it, then it isn't
11994 expecting to be reworked by this code. Better to stop now than
11995 to generate duff assembly code. */
11996 gcc_assert (fix->forwards || fix->backwards);
11998 /* If an entry requires 8-byte alignment then assume all constant pools
11999 require 4 bytes of padding. Trying to do this later on a per-pool
12000 basis is awkward because existing pool entries have to be modified. */
12001 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12002 minipool_pad = 4;
12004 if (dump_file)
12006 fprintf (dump_file,
12007 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12008 GET_MODE_NAME (mode),
12009 INSN_UID (insn), (unsigned long) address,
12010 -1 * (long)fix->backwards, (long)fix->forwards);
12011 arm_print_value (dump_file, fix->value);
12012 fprintf (dump_file, "\n");
12015 /* Add it to the chain of fixes. */
12016 fix->next = NULL;
12018 if (minipool_fix_head != NULL)
12019 minipool_fix_tail->next = fix;
12020 else
12021 minipool_fix_head = fix;
12023 minipool_fix_tail = fix;
12026 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12027 Returns the number of insns needed, or 99 if we don't know how to
12028 do it. */
12030 arm_const_double_inline_cost (rtx val)
12032 rtx lowpart, highpart;
12033 enum machine_mode mode;
12035 mode = GET_MODE (val);
12037 if (mode == VOIDmode)
12038 mode = DImode;
12040 gcc_assert (GET_MODE_SIZE (mode) == 8);
12042 lowpart = gen_lowpart (SImode, val);
12043 highpart = gen_highpart_mode (SImode, mode, val);
12045 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12046 gcc_assert (GET_CODE (highpart) == CONST_INT);
12048 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12049 NULL_RTX, NULL_RTX, 0, 0)
12050 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12051 NULL_RTX, NULL_RTX, 0, 0));
12054 /* Return true if it is worthwhile to split a 64-bit constant into two
12055 32-bit operations. This is the case if optimizing for size, or
12056 if we have load delay slots, or if one 32-bit part can be done with
12057 a single data operation. */
12058 bool
12059 arm_const_double_by_parts (rtx val)
12061 enum machine_mode mode = GET_MODE (val);
12062 rtx part;
12064 if (optimize_size || arm_ld_sched)
12065 return true;
12067 if (mode == VOIDmode)
12068 mode = DImode;
12070 part = gen_highpart_mode (SImode, mode, val);
12072 gcc_assert (GET_CODE (part) == CONST_INT);
12074 if (const_ok_for_arm (INTVAL (part))
12075 || const_ok_for_arm (~INTVAL (part)))
12076 return true;
12078 part = gen_lowpart (SImode, val);
12080 gcc_assert (GET_CODE (part) == CONST_INT);
12082 if (const_ok_for_arm (INTVAL (part))
12083 || const_ok_for_arm (~INTVAL (part)))
12084 return true;
12086 return false;
12089 /* Return true if it is possible to inline both the high and low parts
12090 of a 64-bit constant into 32-bit data processing instructions. */
12091 bool
12092 arm_const_double_by_immediates (rtx val)
12094 enum machine_mode mode = GET_MODE (val);
12095 rtx part;
12097 if (mode == VOIDmode)
12098 mode = DImode;
12100 part = gen_highpart_mode (SImode, mode, val);
12102 gcc_assert (GET_CODE (part) == CONST_INT);
12104 if (!const_ok_for_arm (INTVAL (part)))
12105 return false;
12107 part = gen_lowpart (SImode, val);
12109 gcc_assert (GET_CODE (part) == CONST_INT);
12111 if (!const_ok_for_arm (INTVAL (part)))
12112 return false;
12114 return true;
12117 /* Scan INSN and note any of its operands that need fixing.
12118 If DO_PUSHES is false we do not actually push any of the fixups
12119 needed. The function returns TRUE if any fixups were needed/pushed.
12120 This is used by arm_memory_load_p() which needs to know about loads
12121 of constants that will be converted into minipool loads. */
12122 static bool
12123 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12125 bool result = false;
12126 int opno;
12128 extract_insn (insn);
12130 if (!constrain_operands (1))
12131 fatal_insn_not_found (insn);
12133 if (recog_data.n_alternatives == 0)
12134 return false;
12136 /* Fill in recog_op_alt with information about the constraints of
12137 this insn. */
12138 preprocess_constraints ();
12140 for (opno = 0; opno < recog_data.n_operands; opno++)
12142 /* Things we need to fix can only occur in inputs. */
12143 if (recog_data.operand_type[opno] != OP_IN)
12144 continue;
12146 /* If this alternative is a memory reference, then any mention
12147 of constants in this alternative is really to fool reload
12148 into allowing us to accept one there. We need to fix them up
12149 now so that we output the right code. */
12150 if (recog_op_alt[opno][which_alternative].memory_ok)
12152 rtx op = recog_data.operand[opno];
12154 if (CONSTANT_P (op))
12156 if (do_pushes)
12157 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12158 recog_data.operand_mode[opno], op);
12159 result = true;
12161 else if (GET_CODE (op) == MEM
12162 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12163 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12165 if (do_pushes)
12167 rtx cop = avoid_constant_pool_reference (op);
12169 /* Casting the address of something to a mode narrower
12170 than a word can cause avoid_constant_pool_reference()
12171 to return the pool reference itself. That's no good to
12172 us here. Lets just hope that we can use the
12173 constant pool value directly. */
12174 if (op == cop)
12175 cop = get_pool_constant (XEXP (op, 0));
12177 push_minipool_fix (insn, address,
12178 recog_data.operand_loc[opno],
12179 recog_data.operand_mode[opno], cop);
12182 result = true;
12187 return result;
12190 /* Convert instructions to their cc-clobbering variant if possible, since
12191 that allows us to use smaller encodings. */
12193 static void
12194 thumb2_reorg (void)
12196 basic_block bb;
12197 regset_head live;
12199 INIT_REG_SET (&live);
12201 /* We are freeing block_for_insn in the toplev to keep compatibility
12202 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12203 compute_bb_for_insn ();
12204 df_analyze ();
12206 FOR_EACH_BB (bb)
12208 rtx insn;
12210 COPY_REG_SET (&live, DF_LR_OUT (bb));
12211 df_simulate_initialize_backwards (bb, &live);
12212 FOR_BB_INSNS_REVERSE (bb, insn)
12214 if (NONJUMP_INSN_P (insn)
12215 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12217 rtx pat = PATTERN (insn);
12218 if (GET_CODE (pat) == SET
12219 && low_register_operand (XEXP (pat, 0), SImode)
12220 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12221 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12222 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12224 rtx dst = XEXP (pat, 0);
12225 rtx src = XEXP (pat, 1);
12226 rtx op0 = XEXP (src, 0);
12227 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12228 ? XEXP (src, 1) : NULL);
12230 if (rtx_equal_p (dst, op0)
12231 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12233 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12234 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12235 rtvec vec = gen_rtvec (2, pat, clobber);
12237 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12238 INSN_CODE (insn) = -1;
12240 /* We can also handle a commutative operation where the
12241 second operand matches the destination. */
12242 else if (op1 && rtx_equal_p (dst, op1))
12244 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12245 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12246 rtvec vec;
12248 src = copy_rtx (src);
12249 XEXP (src, 0) = op1;
12250 XEXP (src, 1) = op0;
12251 pat = gen_rtx_SET (VOIDmode, dst, src);
12252 vec = gen_rtvec (2, pat, clobber);
12253 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12254 INSN_CODE (insn) = -1;
12259 if (NONDEBUG_INSN_P (insn))
12260 df_simulate_one_insn_backwards (bb, insn, &live);
12264 CLEAR_REG_SET (&live);
12267 /* Gcc puts the pool in the wrong place for ARM, since we can only
12268 load addresses a limited distance around the pc. We do some
12269 special munging to move the constant pool values to the correct
12270 point in the code. */
12271 static void
12272 arm_reorg (void)
12274 rtx insn;
12275 HOST_WIDE_INT address = 0;
12276 Mfix * fix;
12278 if (TARGET_THUMB2)
12279 thumb2_reorg ();
12281 minipool_fix_head = minipool_fix_tail = NULL;
12283 /* The first insn must always be a note, or the code below won't
12284 scan it properly. */
12285 insn = get_insns ();
12286 gcc_assert (GET_CODE (insn) == NOTE);
12287 minipool_pad = 0;
12289 /* Scan all the insns and record the operands that will need fixing. */
12290 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12292 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12293 && (arm_cirrus_insn_p (insn)
12294 || GET_CODE (insn) == JUMP_INSN
12295 || arm_memory_load_p (insn)))
12296 cirrus_reorg (insn);
12298 if (GET_CODE (insn) == BARRIER)
12299 push_minipool_barrier (insn, address);
12300 else if (INSN_P (insn))
12302 rtx table;
12304 note_invalid_constants (insn, address, true);
12305 address += get_attr_length (insn);
12307 /* If the insn is a vector jump, add the size of the table
12308 and skip the table. */
12309 if ((table = is_jump_table (insn)) != NULL)
12311 address += get_jump_table_size (table);
12312 insn = table;
12317 fix = minipool_fix_head;
12319 /* Now scan the fixups and perform the required changes. */
12320 while (fix)
12322 Mfix * ftmp;
12323 Mfix * fdel;
12324 Mfix * last_added_fix;
12325 Mfix * last_barrier = NULL;
12326 Mfix * this_fix;
12328 /* Skip any further barriers before the next fix. */
12329 while (fix && GET_CODE (fix->insn) == BARRIER)
12330 fix = fix->next;
12332 /* No more fixes. */
12333 if (fix == NULL)
12334 break;
12336 last_added_fix = NULL;
12338 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12340 if (GET_CODE (ftmp->insn) == BARRIER)
12342 if (ftmp->address >= minipool_vector_head->max_address)
12343 break;
12345 last_barrier = ftmp;
12347 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12348 break;
12350 last_added_fix = ftmp; /* Keep track of the last fix added. */
12353 /* If we found a barrier, drop back to that; any fixes that we
12354 could have reached but come after the barrier will now go in
12355 the next mini-pool. */
12356 if (last_barrier != NULL)
12358 /* Reduce the refcount for those fixes that won't go into this
12359 pool after all. */
12360 for (fdel = last_barrier->next;
12361 fdel && fdel != ftmp;
12362 fdel = fdel->next)
12364 fdel->minipool->refcount--;
12365 fdel->minipool = NULL;
12368 ftmp = last_barrier;
12370 else
12372 /* ftmp is first fix that we can't fit into this pool and
12373 there no natural barriers that we could use. Insert a
12374 new barrier in the code somewhere between the previous
12375 fix and this one, and arrange to jump around it. */
12376 HOST_WIDE_INT max_address;
12378 /* The last item on the list of fixes must be a barrier, so
12379 we can never run off the end of the list of fixes without
12380 last_barrier being set. */
12381 gcc_assert (ftmp);
12383 max_address = minipool_vector_head->max_address;
12384 /* Check that there isn't another fix that is in range that
12385 we couldn't fit into this pool because the pool was
12386 already too large: we need to put the pool before such an
12387 instruction. The pool itself may come just after the
12388 fix because create_fix_barrier also allows space for a
12389 jump instruction. */
12390 if (ftmp->address < max_address)
12391 max_address = ftmp->address + 1;
12393 last_barrier = create_fix_barrier (last_added_fix, max_address);
12396 assign_minipool_offsets (last_barrier);
12398 while (ftmp)
12400 if (GET_CODE (ftmp->insn) != BARRIER
12401 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12402 == NULL))
12403 break;
12405 ftmp = ftmp->next;
12408 /* Scan over the fixes we have identified for this pool, fixing them
12409 up and adding the constants to the pool itself. */
12410 for (this_fix = fix; this_fix && ftmp != this_fix;
12411 this_fix = this_fix->next)
12412 if (GET_CODE (this_fix->insn) != BARRIER)
12414 rtx addr
12415 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12416 minipool_vector_label),
12417 this_fix->minipool->offset);
12418 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12421 dump_minipool (last_barrier->insn);
12422 fix = ftmp;
12425 /* From now on we must synthesize any constants that we can't handle
12426 directly. This can happen if the RTL gets split during final
12427 instruction generation. */
12428 after_arm_reorg = 1;
12430 /* Free the minipool memory. */
12431 obstack_free (&minipool_obstack, minipool_startobj);
12434 /* Routines to output assembly language. */
12436 /* If the rtx is the correct value then return the string of the number.
12437 In this way we can ensure that valid double constants are generated even
12438 when cross compiling. */
12439 const char *
12440 fp_immediate_constant (rtx x)
12442 REAL_VALUE_TYPE r;
12443 int i;
12445 if (!fp_consts_inited)
12446 init_fp_table ();
12448 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12449 for (i = 0; i < 8; i++)
12450 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12451 return strings_fp[i];
12453 gcc_unreachable ();
12456 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12457 static const char *
12458 fp_const_from_val (REAL_VALUE_TYPE *r)
12460 int i;
12462 if (!fp_consts_inited)
12463 init_fp_table ();
12465 for (i = 0; i < 8; i++)
12466 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12467 return strings_fp[i];
12469 gcc_unreachable ();
12472 /* Output the operands of a LDM/STM instruction to STREAM.
12473 MASK is the ARM register set mask of which only bits 0-15 are important.
12474 REG is the base register, either the frame pointer or the stack pointer,
12475 INSTR is the possibly suffixed load or store instruction.
12476 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12478 static void
12479 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12480 unsigned long mask, int rfe)
12482 unsigned i;
12483 bool not_first = FALSE;
12485 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12486 fputc ('\t', stream);
12487 asm_fprintf (stream, instr, reg);
12488 fputc ('{', stream);
12490 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12491 if (mask & (1 << i))
12493 if (not_first)
12494 fprintf (stream, ", ");
12496 asm_fprintf (stream, "%r", i);
12497 not_first = TRUE;
12500 if (rfe)
12501 fprintf (stream, "}^\n");
12502 else
12503 fprintf (stream, "}\n");
12507 /* Output a FLDMD instruction to STREAM.
12508 BASE if the register containing the address.
12509 REG and COUNT specify the register range.
12510 Extra registers may be added to avoid hardware bugs.
12512 We output FLDMD even for ARMv5 VFP implementations. Although
12513 FLDMD is technically not supported until ARMv6, it is believed
12514 that all VFP implementations support its use in this context. */
12516 static void
12517 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12519 int i;
12521 /* Workaround ARM10 VFPr1 bug. */
12522 if (count == 2 && !arm_arch6)
12524 if (reg == 15)
12525 reg--;
12526 count++;
12529 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12530 load into multiple parts if we have to handle more than 16 registers. */
12531 if (count > 16)
12533 vfp_output_fldmd (stream, base, reg, 16);
12534 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12535 return;
12538 fputc ('\t', stream);
12539 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12541 for (i = reg; i < reg + count; i++)
12543 if (i > reg)
12544 fputs (", ", stream);
12545 asm_fprintf (stream, "d%d", i);
12547 fputs ("}\n", stream);
12552 /* Output the assembly for a store multiple. */
12554 const char *
12555 vfp_output_fstmd (rtx * operands)
12557 char pattern[100];
12558 int p;
12559 int base;
12560 int i;
12562 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12563 p = strlen (pattern);
12565 gcc_assert (GET_CODE (operands[1]) == REG);
12567 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12568 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12570 p += sprintf (&pattern[p], ", d%d", base + i);
12572 strcpy (&pattern[p], "}");
12574 output_asm_insn (pattern, operands);
12575 return "";
12579 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12580 number of bytes pushed. */
12582 static int
12583 vfp_emit_fstmd (int base_reg, int count)
12585 rtx par;
12586 rtx dwarf;
12587 rtx tmp, reg;
12588 int i;
12590 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12591 register pairs are stored by a store multiple insn. We avoid this
12592 by pushing an extra pair. */
12593 if (count == 2 && !arm_arch6)
12595 if (base_reg == LAST_VFP_REGNUM - 3)
12596 base_reg -= 2;
12597 count++;
12600 /* FSTMD may not store more than 16 doubleword registers at once. Split
12601 larger stores into multiple parts (up to a maximum of two, in
12602 practice). */
12603 if (count > 16)
12605 int saved;
12606 /* NOTE: base_reg is an internal register number, so each D register
12607 counts as 2. */
12608 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12609 saved += vfp_emit_fstmd (base_reg, 16);
12610 return saved;
12613 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12614 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12616 reg = gen_rtx_REG (DFmode, base_reg);
12617 base_reg += 2;
12619 XVECEXP (par, 0, 0)
12620 = gen_rtx_SET (VOIDmode,
12621 gen_frame_mem
12622 (BLKmode,
12623 gen_rtx_PRE_MODIFY (Pmode,
12624 stack_pointer_rtx,
12625 plus_constant
12626 (stack_pointer_rtx,
12627 - (count * 8)))
12629 gen_rtx_UNSPEC (BLKmode,
12630 gen_rtvec (1, reg),
12631 UNSPEC_PUSH_MULT));
12633 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12634 plus_constant (stack_pointer_rtx, -(count * 8)));
12635 RTX_FRAME_RELATED_P (tmp) = 1;
12636 XVECEXP (dwarf, 0, 0) = tmp;
12638 tmp = gen_rtx_SET (VOIDmode,
12639 gen_frame_mem (DFmode, stack_pointer_rtx),
12640 reg);
12641 RTX_FRAME_RELATED_P (tmp) = 1;
12642 XVECEXP (dwarf, 0, 1) = tmp;
12644 for (i = 1; i < count; i++)
12646 reg = gen_rtx_REG (DFmode, base_reg);
12647 base_reg += 2;
12648 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12650 tmp = gen_rtx_SET (VOIDmode,
12651 gen_frame_mem (DFmode,
12652 plus_constant (stack_pointer_rtx,
12653 i * 8)),
12654 reg);
12655 RTX_FRAME_RELATED_P (tmp) = 1;
12656 XVECEXP (dwarf, 0, i + 1) = tmp;
12659 par = emit_insn (par);
12660 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12661 RTX_FRAME_RELATED_P (par) = 1;
12663 return count * 8;
12666 /* Emit a call instruction with pattern PAT. ADDR is the address of
12667 the call target. */
12669 void
12670 arm_emit_call_insn (rtx pat, rtx addr)
12672 rtx insn;
12674 insn = emit_call_insn (pat);
12676 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12677 If the call might use such an entry, add a use of the PIC register
12678 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12679 if (TARGET_VXWORKS_RTP
12680 && flag_pic
12681 && GET_CODE (addr) == SYMBOL_REF
12682 && (SYMBOL_REF_DECL (addr)
12683 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12684 : !SYMBOL_REF_LOCAL_P (addr)))
12686 require_pic_register ();
12687 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12691 /* Output a 'call' insn. */
12692 const char *
12693 output_call (rtx *operands)
12695 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12697 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12698 if (REGNO (operands[0]) == LR_REGNUM)
12700 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12701 output_asm_insn ("mov%?\t%0, %|lr", operands);
12704 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12706 if (TARGET_INTERWORK || arm_arch4t)
12707 output_asm_insn ("bx%?\t%0", operands);
12708 else
12709 output_asm_insn ("mov%?\t%|pc, %0", operands);
12711 return "";
12714 /* Output a 'call' insn that is a reference in memory. This is
12715 disabled for ARMv5 and we prefer a blx instead because otherwise
12716 there's a significant performance overhead. */
12717 const char *
12718 output_call_mem (rtx *operands)
12720 gcc_assert (!arm_arch5);
12721 if (TARGET_INTERWORK)
12723 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12724 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12725 output_asm_insn ("bx%?\t%|ip", operands);
12727 else if (regno_use_in (LR_REGNUM, operands[0]))
12729 /* LR is used in the memory address. We load the address in the
12730 first instruction. It's safe to use IP as the target of the
12731 load since the call will kill it anyway. */
12732 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12733 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12734 if (arm_arch4t)
12735 output_asm_insn ("bx%?\t%|ip", operands);
12736 else
12737 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12739 else
12741 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12742 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12745 return "";
12749 /* Output a move from arm registers to an fpa registers.
12750 OPERANDS[0] is an fpa register.
12751 OPERANDS[1] is the first registers of an arm register pair. */
12752 const char *
12753 output_mov_long_double_fpa_from_arm (rtx *operands)
12755 int arm_reg0 = REGNO (operands[1]);
12756 rtx ops[3];
12758 gcc_assert (arm_reg0 != IP_REGNUM);
12760 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12761 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12762 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12764 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12765 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12767 return "";
12770 /* Output a move from an fpa register to arm registers.
12771 OPERANDS[0] is the first registers of an arm register pair.
12772 OPERANDS[1] is an fpa register. */
12773 const char *
12774 output_mov_long_double_arm_from_fpa (rtx *operands)
12776 int arm_reg0 = REGNO (operands[0]);
12777 rtx ops[3];
12779 gcc_assert (arm_reg0 != IP_REGNUM);
12781 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12782 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12783 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12785 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12786 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12787 return "";
12790 /* Output a move from arm registers to arm registers of a long double
12791 OPERANDS[0] is the destination.
12792 OPERANDS[1] is the source. */
12793 const char *
12794 output_mov_long_double_arm_from_arm (rtx *operands)
12796 /* We have to be careful here because the two might overlap. */
12797 int dest_start = REGNO (operands[0]);
12798 int src_start = REGNO (operands[1]);
12799 rtx ops[2];
12800 int i;
12802 if (dest_start < src_start)
12804 for (i = 0; i < 3; i++)
12806 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12807 ops[1] = gen_rtx_REG (SImode, src_start + i);
12808 output_asm_insn ("mov%?\t%0, %1", ops);
12811 else
12813 for (i = 2; i >= 0; i--)
12815 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12816 ops[1] = gen_rtx_REG (SImode, src_start + i);
12817 output_asm_insn ("mov%?\t%0, %1", ops);
12821 return "";
12824 void
12825 arm_emit_movpair (rtx dest, rtx src)
12827 /* If the src is an immediate, simplify it. */
12828 if (CONST_INT_P (src))
12830 HOST_WIDE_INT val = INTVAL (src);
12831 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12832 if ((val >> 16) & 0x0000ffff)
12833 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12834 GEN_INT (16)),
12835 GEN_INT ((val >> 16) & 0x0000ffff));
12836 return;
12838 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12839 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12842 /* Output a move from arm registers to an fpa registers.
12843 OPERANDS[0] is an fpa register.
12844 OPERANDS[1] is the first registers of an arm register pair. */
12845 const char *
12846 output_mov_double_fpa_from_arm (rtx *operands)
12848 int arm_reg0 = REGNO (operands[1]);
12849 rtx ops[2];
12851 gcc_assert (arm_reg0 != IP_REGNUM);
12853 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12854 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12855 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12856 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12857 return "";
12860 /* Output a move from an fpa register to arm registers.
12861 OPERANDS[0] is the first registers of an arm register pair.
12862 OPERANDS[1] is an fpa register. */
12863 const char *
12864 output_mov_double_arm_from_fpa (rtx *operands)
12866 int arm_reg0 = REGNO (operands[0]);
12867 rtx ops[2];
12869 gcc_assert (arm_reg0 != IP_REGNUM);
12871 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12872 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12873 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12874 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12875 return "";
12878 /* Output a move between double words. It must be REG<-MEM
12879 or MEM<-REG. */
12880 const char *
12881 output_move_double (rtx *operands)
12883 enum rtx_code code0 = GET_CODE (operands[0]);
12884 enum rtx_code code1 = GET_CODE (operands[1]);
12885 rtx otherops[3];
12887 if (code0 == REG)
12889 unsigned int reg0 = REGNO (operands[0]);
12891 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12893 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12895 switch (GET_CODE (XEXP (operands[1], 0)))
12897 case REG:
12898 if (TARGET_LDRD
12899 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12900 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12901 else
12902 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12903 break;
12905 case PRE_INC:
12906 gcc_assert (TARGET_LDRD);
12907 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12908 break;
12910 case PRE_DEC:
12911 if (TARGET_LDRD)
12912 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12913 else
12914 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12915 break;
12917 case POST_INC:
12918 if (TARGET_LDRD)
12919 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12920 else
12921 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12922 break;
12924 case POST_DEC:
12925 gcc_assert (TARGET_LDRD);
12926 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12927 break;
12929 case PRE_MODIFY:
12930 case POST_MODIFY:
12931 /* Autoicrement addressing modes should never have overlapping
12932 base and destination registers, and overlapping index registers
12933 are already prohibited, so this doesn't need to worry about
12934 fix_cm3_ldrd. */
12935 otherops[0] = operands[0];
12936 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12937 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12939 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12941 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12943 /* Registers overlap so split out the increment. */
12944 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12945 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12947 else
12949 /* Use a single insn if we can.
12950 FIXME: IWMMXT allows offsets larger than ldrd can
12951 handle, fix these up with a pair of ldr. */
12952 if (TARGET_THUMB2
12953 || GET_CODE (otherops[2]) != CONST_INT
12954 || (INTVAL (otherops[2]) > -256
12955 && INTVAL (otherops[2]) < 256))
12956 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12957 else
12959 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12960 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12964 else
12966 /* Use a single insn if we can.
12967 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12968 fix these up with a pair of ldr. */
12969 if (TARGET_THUMB2
12970 || GET_CODE (otherops[2]) != CONST_INT
12971 || (INTVAL (otherops[2]) > -256
12972 && INTVAL (otherops[2]) < 256))
12973 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12974 else
12976 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12977 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12980 break;
12982 case LABEL_REF:
12983 case CONST:
12984 /* We might be able to use ldrd %0, %1 here. However the range is
12985 different to ldr/adr, and it is broken on some ARMv7-M
12986 implementations. */
12987 /* Use the second register of the pair to avoid problematic
12988 overlap. */
12989 otherops[1] = operands[1];
12990 output_asm_insn ("adr%?\t%0, %1", otherops);
12991 operands[1] = otherops[0];
12992 if (TARGET_LDRD)
12993 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12994 else
12995 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12996 break;
12998 /* ??? This needs checking for thumb2. */
12999 default:
13000 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13001 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13003 otherops[0] = operands[0];
13004 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13005 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13007 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13009 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13011 switch ((int) INTVAL (otherops[2]))
13013 case -8:
13014 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13015 return "";
13016 case -4:
13017 if (TARGET_THUMB2)
13018 break;
13019 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13020 return "";
13021 case 4:
13022 if (TARGET_THUMB2)
13023 break;
13024 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13025 return "";
13028 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13029 operands[1] = otherops[0];
13030 if (TARGET_LDRD
13031 && (GET_CODE (otherops[2]) == REG
13032 || TARGET_THUMB2
13033 || (GET_CODE (otherops[2]) == CONST_INT
13034 && INTVAL (otherops[2]) > -256
13035 && INTVAL (otherops[2]) < 256)))
13037 if (reg_overlap_mentioned_p (operands[0],
13038 otherops[2]))
13040 rtx tmp;
13041 /* Swap base and index registers over to
13042 avoid a conflict. */
13043 tmp = otherops[1];
13044 otherops[1] = otherops[2];
13045 otherops[2] = tmp;
13047 /* If both registers conflict, it will usually
13048 have been fixed by a splitter. */
13049 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13050 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13052 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13053 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13055 else
13057 otherops[0] = operands[0];
13058 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13060 return "";
13063 if (GET_CODE (otherops[2]) == CONST_INT)
13065 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13066 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13067 else
13068 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13070 else
13071 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13073 else
13074 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13076 if (TARGET_LDRD)
13077 return "ldr%(d%)\t%0, [%1]";
13079 return "ldm%(ia%)\t%1, %M0";
13081 else
13083 otherops[1] = adjust_address (operands[1], SImode, 4);
13084 /* Take care of overlapping base/data reg. */
13085 if (reg_mentioned_p (operands[0], operands[1]))
13087 output_asm_insn ("ldr%?\t%0, %1", otherops);
13088 output_asm_insn ("ldr%?\t%0, %1", operands);
13090 else
13092 output_asm_insn ("ldr%?\t%0, %1", operands);
13093 output_asm_insn ("ldr%?\t%0, %1", otherops);
13098 else
13100 /* Constraints should ensure this. */
13101 gcc_assert (code0 == MEM && code1 == REG);
13102 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13104 switch (GET_CODE (XEXP (operands[0], 0)))
13106 case REG:
13107 if (TARGET_LDRD)
13108 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13109 else
13110 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13111 break;
13113 case PRE_INC:
13114 gcc_assert (TARGET_LDRD);
13115 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13116 break;
13118 case PRE_DEC:
13119 if (TARGET_LDRD)
13120 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13121 else
13122 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13123 break;
13125 case POST_INC:
13126 if (TARGET_LDRD)
13127 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13128 else
13129 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13130 break;
13132 case POST_DEC:
13133 gcc_assert (TARGET_LDRD);
13134 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13135 break;
13137 case PRE_MODIFY:
13138 case POST_MODIFY:
13139 otherops[0] = operands[1];
13140 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13141 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13143 /* IWMMXT allows offsets larger than ldrd can handle,
13144 fix these up with a pair of ldr. */
13145 if (!TARGET_THUMB2
13146 && GET_CODE (otherops[2]) == CONST_INT
13147 && (INTVAL(otherops[2]) <= -256
13148 || INTVAL(otherops[2]) >= 256))
13150 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13152 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13153 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13155 else
13157 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13158 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13161 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13162 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13163 else
13164 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13165 break;
13167 case PLUS:
13168 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13169 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13171 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13173 case -8:
13174 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13175 return "";
13177 case -4:
13178 if (TARGET_THUMB2)
13179 break;
13180 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13181 return "";
13183 case 4:
13184 if (TARGET_THUMB2)
13185 break;
13186 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13187 return "";
13190 if (TARGET_LDRD
13191 && (GET_CODE (otherops[2]) == REG
13192 || TARGET_THUMB2
13193 || (GET_CODE (otherops[2]) == CONST_INT
13194 && INTVAL (otherops[2]) > -256
13195 && INTVAL (otherops[2]) < 256)))
13197 otherops[0] = operands[1];
13198 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13199 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13200 return "";
13202 /* Fall through */
13204 default:
13205 otherops[0] = adjust_address (operands[0], SImode, 4);
13206 otherops[1] = operands[1];
13207 output_asm_insn ("str%?\t%1, %0", operands);
13208 output_asm_insn ("str%?\t%H1, %0", otherops);
13212 return "";
13215 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13216 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13218 const char *
13219 output_move_quad (rtx *operands)
13221 if (REG_P (operands[0]))
13223 /* Load, or reg->reg move. */
13225 if (MEM_P (operands[1]))
13227 switch (GET_CODE (XEXP (operands[1], 0)))
13229 case REG:
13230 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13231 break;
13233 case LABEL_REF:
13234 case CONST:
13235 output_asm_insn ("adr%?\t%0, %1", operands);
13236 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13237 break;
13239 default:
13240 gcc_unreachable ();
13243 else
13245 rtx ops[2];
13246 int dest, src, i;
13248 gcc_assert (REG_P (operands[1]));
13250 dest = REGNO (operands[0]);
13251 src = REGNO (operands[1]);
13253 /* This seems pretty dumb, but hopefully GCC won't try to do it
13254 very often. */
13255 if (dest < src)
13256 for (i = 0; i < 4; i++)
13258 ops[0] = gen_rtx_REG (SImode, dest + i);
13259 ops[1] = gen_rtx_REG (SImode, src + i);
13260 output_asm_insn ("mov%?\t%0, %1", ops);
13262 else
13263 for (i = 3; i >= 0; i--)
13265 ops[0] = gen_rtx_REG (SImode, dest + i);
13266 ops[1] = gen_rtx_REG (SImode, src + i);
13267 output_asm_insn ("mov%?\t%0, %1", ops);
13271 else
13273 gcc_assert (MEM_P (operands[0]));
13274 gcc_assert (REG_P (operands[1]));
13275 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13277 switch (GET_CODE (XEXP (operands[0], 0)))
13279 case REG:
13280 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13281 break;
13283 default:
13284 gcc_unreachable ();
13288 return "";
13291 /* Output a VFP load or store instruction. */
13293 const char *
13294 output_move_vfp (rtx *operands)
13296 rtx reg, mem, addr, ops[2];
13297 int load = REG_P (operands[0]);
13298 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13299 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13300 const char *templ;
13301 char buff[50];
13302 enum machine_mode mode;
13304 reg = operands[!load];
13305 mem = operands[load];
13307 mode = GET_MODE (reg);
13309 gcc_assert (REG_P (reg));
13310 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13311 gcc_assert (mode == SFmode
13312 || mode == DFmode
13313 || mode == SImode
13314 || mode == DImode
13315 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13316 gcc_assert (MEM_P (mem));
13318 addr = XEXP (mem, 0);
13320 switch (GET_CODE (addr))
13322 case PRE_DEC:
13323 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13324 ops[0] = XEXP (addr, 0);
13325 ops[1] = reg;
13326 break;
13328 case POST_INC:
13329 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13330 ops[0] = XEXP (addr, 0);
13331 ops[1] = reg;
13332 break;
13334 default:
13335 templ = "f%s%c%%?\t%%%s0, %%1%s";
13336 ops[0] = reg;
13337 ops[1] = mem;
13338 break;
13341 sprintf (buff, templ,
13342 load ? "ld" : "st",
13343 dp ? 'd' : 's',
13344 dp ? "P" : "",
13345 integer_p ? "\t%@ int" : "");
13346 output_asm_insn (buff, ops);
13348 return "";
13351 /* Output a Neon quad-word load or store, or a load or store for
13352 larger structure modes.
13354 WARNING: The ordering of elements is weird in big-endian mode,
13355 because we use VSTM, as required by the EABI. GCC RTL defines
13356 element ordering based on in-memory order. This can be differ
13357 from the architectural ordering of elements within a NEON register.
13358 The intrinsics defined in arm_neon.h use the NEON register element
13359 ordering, not the GCC RTL element ordering.
13361 For example, the in-memory ordering of a big-endian a quadword
13362 vector with 16-bit elements when stored from register pair {d0,d1}
13363 will be (lowest address first, d0[N] is NEON register element N):
13365 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13367 When necessary, quadword registers (dN, dN+1) are moved to ARM
13368 registers from rN in the order:
13370 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13372 So that STM/LDM can be used on vectors in ARM registers, and the
13373 same memory layout will result as if VSTM/VLDM were used. */
13375 const char *
13376 output_move_neon (rtx *operands)
13378 rtx reg, mem, addr, ops[2];
13379 int regno, load = REG_P (operands[0]);
13380 const char *templ;
13381 char buff[50];
13382 enum machine_mode mode;
13384 reg = operands[!load];
13385 mem = operands[load];
13387 mode = GET_MODE (reg);
13389 gcc_assert (REG_P (reg));
13390 regno = REGNO (reg);
13391 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13392 || NEON_REGNO_OK_FOR_QUAD (regno));
13393 gcc_assert (VALID_NEON_DREG_MODE (mode)
13394 || VALID_NEON_QREG_MODE (mode)
13395 || VALID_NEON_STRUCT_MODE (mode));
13396 gcc_assert (MEM_P (mem));
13398 addr = XEXP (mem, 0);
13400 /* Strip off const from addresses like (const (plus (...))). */
13401 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13402 addr = XEXP (addr, 0);
13404 switch (GET_CODE (addr))
13406 case POST_INC:
13407 templ = "v%smia%%?\t%%0!, %%h1";
13408 ops[0] = XEXP (addr, 0);
13409 ops[1] = reg;
13410 break;
13412 case PRE_DEC:
13413 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13414 templ = "v%smdb%%?\t%%0!, %%h1";
13415 ops[0] = XEXP (addr, 0);
13416 ops[1] = reg;
13417 break;
13419 case POST_MODIFY:
13420 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13421 gcc_unreachable ();
13423 case LABEL_REF:
13424 case PLUS:
13426 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13427 int i;
13428 int overlap = -1;
13429 for (i = 0; i < nregs; i++)
13431 /* We're only using DImode here because it's a convenient size. */
13432 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13433 ops[1] = adjust_address (mem, DImode, 8 * i);
13434 if (reg_overlap_mentioned_p (ops[0], mem))
13436 gcc_assert (overlap == -1);
13437 overlap = i;
13439 else
13441 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13442 output_asm_insn (buff, ops);
13445 if (overlap != -1)
13447 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13448 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13449 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13450 output_asm_insn (buff, ops);
13453 return "";
13456 default:
13457 templ = "v%smia%%?\t%%m0, %%h1";
13458 ops[0] = mem;
13459 ops[1] = reg;
13462 sprintf (buff, templ, load ? "ld" : "st");
13463 output_asm_insn (buff, ops);
13465 return "";
13468 /* Compute and return the length of neon_mov<mode>, where <mode> is
13469 one of VSTRUCT modes: EI, OI, CI or XI. */
13471 arm_attr_length_move_neon (rtx insn)
13473 rtx reg, mem, addr;
13474 int load;
13475 enum machine_mode mode;
13477 extract_insn_cached (insn);
13479 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13481 mode = GET_MODE (recog_data.operand[0]);
13482 switch (mode)
13484 case EImode:
13485 case OImode:
13486 return 8;
13487 case CImode:
13488 return 12;
13489 case XImode:
13490 return 16;
13491 default:
13492 gcc_unreachable ();
13496 load = REG_P (recog_data.operand[0]);
13497 reg = recog_data.operand[!load];
13498 mem = recog_data.operand[load];
13500 gcc_assert (MEM_P (mem));
13502 mode = GET_MODE (reg);
13503 addr = XEXP (mem, 0);
13505 /* Strip off const from addresses like (const (plus (...))). */
13506 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13507 addr = XEXP (addr, 0);
13509 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13511 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13512 return insns * 4;
13514 else
13515 return 4;
13518 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13519 return zero. */
13522 arm_address_offset_is_imm (rtx insn)
13524 rtx mem, addr;
13526 extract_insn_cached (insn);
13528 if (REG_P (recog_data.operand[0]))
13529 return 0;
13531 mem = recog_data.operand[0];
13533 gcc_assert (MEM_P (mem));
13535 addr = XEXP (mem, 0);
13537 if (GET_CODE (addr) == REG
13538 || (GET_CODE (addr) == PLUS
13539 && GET_CODE (XEXP (addr, 0)) == REG
13540 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13541 return 1;
13542 else
13543 return 0;
13546 /* Output an ADD r, s, #n where n may be too big for one instruction.
13547 If adding zero to one register, output nothing. */
13548 const char *
13549 output_add_immediate (rtx *operands)
13551 HOST_WIDE_INT n = INTVAL (operands[2]);
13553 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13555 if (n < 0)
13556 output_multi_immediate (operands,
13557 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13558 -n);
13559 else
13560 output_multi_immediate (operands,
13561 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13565 return "";
13568 /* Output a multiple immediate operation.
13569 OPERANDS is the vector of operands referred to in the output patterns.
13570 INSTR1 is the output pattern to use for the first constant.
13571 INSTR2 is the output pattern to use for subsequent constants.
13572 IMMED_OP is the index of the constant slot in OPERANDS.
13573 N is the constant value. */
13574 static const char *
13575 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13576 int immed_op, HOST_WIDE_INT n)
13578 #if HOST_BITS_PER_WIDE_INT > 32
13579 n &= 0xffffffff;
13580 #endif
13582 if (n == 0)
13584 /* Quick and easy output. */
13585 operands[immed_op] = const0_rtx;
13586 output_asm_insn (instr1, operands);
13588 else
13590 int i;
13591 const char * instr = instr1;
13593 /* Note that n is never zero here (which would give no output). */
13594 for (i = 0; i < 32; i += 2)
13596 if (n & (3 << i))
13598 operands[immed_op] = GEN_INT (n & (255 << i));
13599 output_asm_insn (instr, operands);
13600 instr = instr2;
13601 i += 6;
13606 return "";
13609 /* Return the name of a shifter operation. */
13610 static const char *
13611 arm_shift_nmem(enum rtx_code code)
13613 switch (code)
13615 case ASHIFT:
13616 return ARM_LSL_NAME;
13618 case ASHIFTRT:
13619 return "asr";
13621 case LSHIFTRT:
13622 return "lsr";
13624 case ROTATERT:
13625 return "ror";
13627 default:
13628 abort();
13632 /* Return the appropriate ARM instruction for the operation code.
13633 The returned result should not be overwritten. OP is the rtx of the
13634 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13635 was shifted. */
13636 const char *
13637 arithmetic_instr (rtx op, int shift_first_arg)
13639 switch (GET_CODE (op))
13641 case PLUS:
13642 return "add";
13644 case MINUS:
13645 return shift_first_arg ? "rsb" : "sub";
13647 case IOR:
13648 return "orr";
13650 case XOR:
13651 return "eor";
13653 case AND:
13654 return "and";
13656 case ASHIFT:
13657 case ASHIFTRT:
13658 case LSHIFTRT:
13659 case ROTATERT:
13660 return arm_shift_nmem(GET_CODE(op));
13662 default:
13663 gcc_unreachable ();
13667 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13668 for the operation code. The returned result should not be overwritten.
13669 OP is the rtx code of the shift.
13670 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13671 shift. */
13672 static const char *
13673 shift_op (rtx op, HOST_WIDE_INT *amountp)
13675 const char * mnem;
13676 enum rtx_code code = GET_CODE (op);
13678 switch (GET_CODE (XEXP (op, 1)))
13680 case REG:
13681 case SUBREG:
13682 *amountp = -1;
13683 break;
13685 case CONST_INT:
13686 *amountp = INTVAL (XEXP (op, 1));
13687 break;
13689 default:
13690 gcc_unreachable ();
13693 switch (code)
13695 case ROTATE:
13696 gcc_assert (*amountp != -1);
13697 *amountp = 32 - *amountp;
13698 code = ROTATERT;
13700 /* Fall through. */
13702 case ASHIFT:
13703 case ASHIFTRT:
13704 case LSHIFTRT:
13705 case ROTATERT:
13706 mnem = arm_shift_nmem(code);
13707 break;
13709 case MULT:
13710 /* We never have to worry about the amount being other than a
13711 power of 2, since this case can never be reloaded from a reg. */
13712 gcc_assert (*amountp != -1);
13713 *amountp = int_log2 (*amountp);
13714 return ARM_LSL_NAME;
13716 default:
13717 gcc_unreachable ();
13720 if (*amountp != -1)
13722 /* This is not 100% correct, but follows from the desire to merge
13723 multiplication by a power of 2 with the recognizer for a
13724 shift. >=32 is not a valid shift for "lsl", so we must try and
13725 output a shift that produces the correct arithmetical result.
13726 Using lsr #32 is identical except for the fact that the carry bit
13727 is not set correctly if we set the flags; but we never use the
13728 carry bit from such an operation, so we can ignore that. */
13729 if (code == ROTATERT)
13730 /* Rotate is just modulo 32. */
13731 *amountp &= 31;
13732 else if (*amountp != (*amountp & 31))
13734 if (code == ASHIFT)
13735 mnem = "lsr";
13736 *amountp = 32;
13739 /* Shifts of 0 are no-ops. */
13740 if (*amountp == 0)
13741 return NULL;
13744 return mnem;
13747 /* Obtain the shift from the POWER of two. */
13749 static HOST_WIDE_INT
13750 int_log2 (HOST_WIDE_INT power)
13752 HOST_WIDE_INT shift = 0;
13754 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13756 gcc_assert (shift <= 31);
13757 shift++;
13760 return shift;
13763 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13764 because /bin/as is horribly restrictive. The judgement about
13765 whether or not each character is 'printable' (and can be output as
13766 is) or not (and must be printed with an octal escape) must be made
13767 with reference to the *host* character set -- the situation is
13768 similar to that discussed in the comments above pp_c_char in
13769 c-pretty-print.c. */
13771 #define MAX_ASCII_LEN 51
13773 void
13774 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13776 int i;
13777 int len_so_far = 0;
13779 fputs ("\t.ascii\t\"", stream);
13781 for (i = 0; i < len; i++)
13783 int c = p[i];
13785 if (len_so_far >= MAX_ASCII_LEN)
13787 fputs ("\"\n\t.ascii\t\"", stream);
13788 len_so_far = 0;
13791 if (ISPRINT (c))
13793 if (c == '\\' || c == '\"')
13795 putc ('\\', stream);
13796 len_so_far++;
13798 putc (c, stream);
13799 len_so_far++;
13801 else
13803 fprintf (stream, "\\%03o", c);
13804 len_so_far += 4;
13808 fputs ("\"\n", stream);
13811 /* Compute the register save mask for registers 0 through 12
13812 inclusive. This code is used by arm_compute_save_reg_mask. */
13814 static unsigned long
13815 arm_compute_save_reg0_reg12_mask (void)
13817 unsigned long func_type = arm_current_func_type ();
13818 unsigned long save_reg_mask = 0;
13819 unsigned int reg;
13821 if (IS_INTERRUPT (func_type))
13823 unsigned int max_reg;
13824 /* Interrupt functions must not corrupt any registers,
13825 even call clobbered ones. If this is a leaf function
13826 we can just examine the registers used by the RTL, but
13827 otherwise we have to assume that whatever function is
13828 called might clobber anything, and so we have to save
13829 all the call-clobbered registers as well. */
13830 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13831 /* FIQ handlers have registers r8 - r12 banked, so
13832 we only need to check r0 - r7, Normal ISRs only
13833 bank r14 and r15, so we must check up to r12.
13834 r13 is the stack pointer which is always preserved,
13835 so we do not need to consider it here. */
13836 max_reg = 7;
13837 else
13838 max_reg = 12;
13840 for (reg = 0; reg <= max_reg; reg++)
13841 if (df_regs_ever_live_p (reg)
13842 || (! current_function_is_leaf && call_used_regs[reg]))
13843 save_reg_mask |= (1 << reg);
13845 /* Also save the pic base register if necessary. */
13846 if (flag_pic
13847 && !TARGET_SINGLE_PIC_BASE
13848 && arm_pic_register != INVALID_REGNUM
13849 && crtl->uses_pic_offset_table)
13850 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13852 else if (IS_VOLATILE(func_type))
13854 /* For noreturn functions we historically omitted register saves
13855 altogether. However this really messes up debugging. As a
13856 compromise save just the frame pointers. Combined with the link
13857 register saved elsewhere this should be sufficient to get
13858 a backtrace. */
13859 if (frame_pointer_needed)
13860 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13861 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13862 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13863 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13864 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13866 else
13868 /* In the normal case we only need to save those registers
13869 which are call saved and which are used by this function. */
13870 for (reg = 0; reg <= 11; reg++)
13871 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13872 save_reg_mask |= (1 << reg);
13874 /* Handle the frame pointer as a special case. */
13875 if (frame_pointer_needed)
13876 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13878 /* If we aren't loading the PIC register,
13879 don't stack it even though it may be live. */
13880 if (flag_pic
13881 && !TARGET_SINGLE_PIC_BASE
13882 && arm_pic_register != INVALID_REGNUM
13883 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13884 || crtl->uses_pic_offset_table))
13885 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13887 /* The prologue will copy SP into R0, so save it. */
13888 if (IS_STACKALIGN (func_type))
13889 save_reg_mask |= 1;
13892 /* Save registers so the exception handler can modify them. */
13893 if (crtl->calls_eh_return)
13895 unsigned int i;
13897 for (i = 0; ; i++)
13899 reg = EH_RETURN_DATA_REGNO (i);
13900 if (reg == INVALID_REGNUM)
13901 break;
13902 save_reg_mask |= 1 << reg;
13906 return save_reg_mask;
13910 /* Compute the number of bytes used to store the static chain register on the
13911 stack, above the stack frame. We need to know this accurately to get the
13912 alignment of the rest of the stack frame correct. */
13914 static int arm_compute_static_chain_stack_bytes (void)
13916 unsigned long func_type = arm_current_func_type ();
13917 int static_chain_stack_bytes = 0;
13919 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13920 IS_NESTED (func_type) &&
13921 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13922 static_chain_stack_bytes = 4;
13924 return static_chain_stack_bytes;
13928 /* Compute a bit mask of which registers need to be
13929 saved on the stack for the current function.
13930 This is used by arm_get_frame_offsets, which may add extra registers. */
13932 static unsigned long
13933 arm_compute_save_reg_mask (void)
13935 unsigned int save_reg_mask = 0;
13936 unsigned long func_type = arm_current_func_type ();
13937 unsigned int reg;
13939 if (IS_NAKED (func_type))
13940 /* This should never really happen. */
13941 return 0;
13943 /* If we are creating a stack frame, then we must save the frame pointer,
13944 IP (which will hold the old stack pointer), LR and the PC. */
13945 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13946 save_reg_mask |=
13947 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13948 | (1 << IP_REGNUM)
13949 | (1 << LR_REGNUM)
13950 | (1 << PC_REGNUM);
13952 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13954 /* Decide if we need to save the link register.
13955 Interrupt routines have their own banked link register,
13956 so they never need to save it.
13957 Otherwise if we do not use the link register we do not need to save
13958 it. If we are pushing other registers onto the stack however, we
13959 can save an instruction in the epilogue by pushing the link register
13960 now and then popping it back into the PC. This incurs extra memory
13961 accesses though, so we only do it when optimizing for size, and only
13962 if we know that we will not need a fancy return sequence. */
13963 if (df_regs_ever_live_p (LR_REGNUM)
13964 || (save_reg_mask
13965 && optimize_size
13966 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13967 && !crtl->calls_eh_return))
13968 save_reg_mask |= 1 << LR_REGNUM;
13970 if (cfun->machine->lr_save_eliminated)
13971 save_reg_mask &= ~ (1 << LR_REGNUM);
13973 if (TARGET_REALLY_IWMMXT
13974 && ((bit_count (save_reg_mask)
13975 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13976 arm_compute_static_chain_stack_bytes())
13977 ) % 2) != 0)
13979 /* The total number of registers that are going to be pushed
13980 onto the stack is odd. We need to ensure that the stack
13981 is 64-bit aligned before we start to save iWMMXt registers,
13982 and also before we start to create locals. (A local variable
13983 might be a double or long long which we will load/store using
13984 an iWMMXt instruction). Therefore we need to push another
13985 ARM register, so that the stack will be 64-bit aligned. We
13986 try to avoid using the arg registers (r0 -r3) as they might be
13987 used to pass values in a tail call. */
13988 for (reg = 4; reg <= 12; reg++)
13989 if ((save_reg_mask & (1 << reg)) == 0)
13990 break;
13992 if (reg <= 12)
13993 save_reg_mask |= (1 << reg);
13994 else
13996 cfun->machine->sibcall_blocked = 1;
13997 save_reg_mask |= (1 << 3);
14001 /* We may need to push an additional register for use initializing the
14002 PIC base register. */
14003 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14004 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14006 reg = thumb_find_work_register (1 << 4);
14007 if (!call_used_regs[reg])
14008 save_reg_mask |= (1 << reg);
14011 return save_reg_mask;
14015 /* Compute a bit mask of which registers need to be
14016 saved on the stack for the current function. */
14017 static unsigned long
14018 thumb1_compute_save_reg_mask (void)
14020 unsigned long mask;
14021 unsigned reg;
14023 mask = 0;
14024 for (reg = 0; reg < 12; reg ++)
14025 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14026 mask |= 1 << reg;
14028 if (flag_pic
14029 && !TARGET_SINGLE_PIC_BASE
14030 && arm_pic_register != INVALID_REGNUM
14031 && crtl->uses_pic_offset_table)
14032 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14034 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14035 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14036 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14038 /* LR will also be pushed if any lo regs are pushed. */
14039 if (mask & 0xff || thumb_force_lr_save ())
14040 mask |= (1 << LR_REGNUM);
14042 /* Make sure we have a low work register if we need one.
14043 We will need one if we are going to push a high register,
14044 but we are not currently intending to push a low register. */
14045 if ((mask & 0xff) == 0
14046 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14048 /* Use thumb_find_work_register to choose which register
14049 we will use. If the register is live then we will
14050 have to push it. Use LAST_LO_REGNUM as our fallback
14051 choice for the register to select. */
14052 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14053 /* Make sure the register returned by thumb_find_work_register is
14054 not part of the return value. */
14055 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14056 reg = LAST_LO_REGNUM;
14058 if (! call_used_regs[reg])
14059 mask |= 1 << reg;
14062 /* The 504 below is 8 bytes less than 512 because there are two possible
14063 alignment words. We can't tell here if they will be present or not so we
14064 have to play it safe and assume that they are. */
14065 if ((CALLER_INTERWORKING_SLOT_SIZE +
14066 ROUND_UP_WORD (get_frame_size ()) +
14067 crtl->outgoing_args_size) >= 504)
14069 /* This is the same as the code in thumb1_expand_prologue() which
14070 determines which register to use for stack decrement. */
14071 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14072 if (mask & (1 << reg))
14073 break;
14075 if (reg > LAST_LO_REGNUM)
14077 /* Make sure we have a register available for stack decrement. */
14078 mask |= 1 << LAST_LO_REGNUM;
14082 return mask;
14086 /* Return the number of bytes required to save VFP registers. */
14087 static int
14088 arm_get_vfp_saved_size (void)
14090 unsigned int regno;
14091 int count;
14092 int saved;
14094 saved = 0;
14095 /* Space for saved VFP registers. */
14096 if (TARGET_HARD_FLOAT && TARGET_VFP)
14098 count = 0;
14099 for (regno = FIRST_VFP_REGNUM;
14100 regno < LAST_VFP_REGNUM;
14101 regno += 2)
14103 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14104 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14106 if (count > 0)
14108 /* Workaround ARM10 VFPr1 bug. */
14109 if (count == 2 && !arm_arch6)
14110 count++;
14111 saved += count * 8;
14113 count = 0;
14115 else
14116 count++;
14118 if (count > 0)
14120 if (count == 2 && !arm_arch6)
14121 count++;
14122 saved += count * 8;
14125 return saved;
14129 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14130 everything bar the final return instruction. */
14131 const char *
14132 output_return_instruction (rtx operand, int really_return, int reverse)
14134 char conditional[10];
14135 char instr[100];
14136 unsigned reg;
14137 unsigned long live_regs_mask;
14138 unsigned long func_type;
14139 arm_stack_offsets *offsets;
14141 func_type = arm_current_func_type ();
14143 if (IS_NAKED (func_type))
14144 return "";
14146 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14148 /* If this function was declared non-returning, and we have
14149 found a tail call, then we have to trust that the called
14150 function won't return. */
14151 if (really_return)
14153 rtx ops[2];
14155 /* Otherwise, trap an attempted return by aborting. */
14156 ops[0] = operand;
14157 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14158 : "abort");
14159 assemble_external_libcall (ops[1]);
14160 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14163 return "";
14166 gcc_assert (!cfun->calls_alloca || really_return);
14168 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14170 cfun->machine->return_used_this_function = 1;
14172 offsets = arm_get_frame_offsets ();
14173 live_regs_mask = offsets->saved_regs_mask;
14175 if (live_regs_mask)
14177 const char * return_reg;
14179 /* If we do not have any special requirements for function exit
14180 (e.g. interworking) then we can load the return address
14181 directly into the PC. Otherwise we must load it into LR. */
14182 if (really_return
14183 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14184 return_reg = reg_names[PC_REGNUM];
14185 else
14186 return_reg = reg_names[LR_REGNUM];
14188 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14190 /* There are three possible reasons for the IP register
14191 being saved. 1) a stack frame was created, in which case
14192 IP contains the old stack pointer, or 2) an ISR routine
14193 corrupted it, or 3) it was saved to align the stack on
14194 iWMMXt. In case 1, restore IP into SP, otherwise just
14195 restore IP. */
14196 if (frame_pointer_needed)
14198 live_regs_mask &= ~ (1 << IP_REGNUM);
14199 live_regs_mask |= (1 << SP_REGNUM);
14201 else
14202 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14205 /* On some ARM architectures it is faster to use LDR rather than
14206 LDM to load a single register. On other architectures, the
14207 cost is the same. In 26 bit mode, or for exception handlers,
14208 we have to use LDM to load the PC so that the CPSR is also
14209 restored. */
14210 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14211 if (live_regs_mask == (1U << reg))
14212 break;
14214 if (reg <= LAST_ARM_REGNUM
14215 && (reg != LR_REGNUM
14216 || ! really_return
14217 || ! IS_INTERRUPT (func_type)))
14219 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14220 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14222 else
14224 char *p;
14225 int first = 1;
14227 /* Generate the load multiple instruction to restore the
14228 registers. Note we can get here, even if
14229 frame_pointer_needed is true, but only if sp already
14230 points to the base of the saved core registers. */
14231 if (live_regs_mask & (1 << SP_REGNUM))
14233 unsigned HOST_WIDE_INT stack_adjust;
14235 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14236 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14238 if (stack_adjust && arm_arch5 && TARGET_ARM)
14239 if (TARGET_UNIFIED_ASM)
14240 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14241 else
14242 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14243 else
14245 /* If we can't use ldmib (SA110 bug),
14246 then try to pop r3 instead. */
14247 if (stack_adjust)
14248 live_regs_mask |= 1 << 3;
14250 if (TARGET_UNIFIED_ASM)
14251 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14252 else
14253 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14256 else
14257 if (TARGET_UNIFIED_ASM)
14258 sprintf (instr, "pop%s\t{", conditional);
14259 else
14260 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14262 p = instr + strlen (instr);
14264 for (reg = 0; reg <= SP_REGNUM; reg++)
14265 if (live_regs_mask & (1 << reg))
14267 int l = strlen (reg_names[reg]);
14269 if (first)
14270 first = 0;
14271 else
14273 memcpy (p, ", ", 2);
14274 p += 2;
14277 memcpy (p, "%|", 2);
14278 memcpy (p + 2, reg_names[reg], l);
14279 p += l + 2;
14282 if (live_regs_mask & (1 << LR_REGNUM))
14284 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14285 /* If returning from an interrupt, restore the CPSR. */
14286 if (IS_INTERRUPT (func_type))
14287 strcat (p, "^");
14289 else
14290 strcpy (p, "}");
14293 output_asm_insn (instr, & operand);
14295 /* See if we need to generate an extra instruction to
14296 perform the actual function return. */
14297 if (really_return
14298 && func_type != ARM_FT_INTERWORKED
14299 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14301 /* The return has already been handled
14302 by loading the LR into the PC. */
14303 really_return = 0;
14307 if (really_return)
14309 switch ((int) ARM_FUNC_TYPE (func_type))
14311 case ARM_FT_ISR:
14312 case ARM_FT_FIQ:
14313 /* ??? This is wrong for unified assembly syntax. */
14314 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14315 break;
14317 case ARM_FT_INTERWORKED:
14318 sprintf (instr, "bx%s\t%%|lr", conditional);
14319 break;
14321 case ARM_FT_EXCEPTION:
14322 /* ??? This is wrong for unified assembly syntax. */
14323 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14324 break;
14326 default:
14327 /* Use bx if it's available. */
14328 if (arm_arch5 || arm_arch4t)
14329 sprintf (instr, "bx%s\t%%|lr", conditional);
14330 else
14331 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14332 break;
14335 output_asm_insn (instr, & operand);
14338 return "";
14341 /* Write the function name into the code section, directly preceding
14342 the function prologue.
14344 Code will be output similar to this:
14346 .ascii "arm_poke_function_name", 0
14347 .align
14349 .word 0xff000000 + (t1 - t0)
14350 arm_poke_function_name
14351 mov ip, sp
14352 stmfd sp!, {fp, ip, lr, pc}
14353 sub fp, ip, #4
14355 When performing a stack backtrace, code can inspect the value
14356 of 'pc' stored at 'fp' + 0. If the trace function then looks
14357 at location pc - 12 and the top 8 bits are set, then we know
14358 that there is a function name embedded immediately preceding this
14359 location and has length ((pc[-3]) & 0xff000000).
14361 We assume that pc is declared as a pointer to an unsigned long.
14363 It is of no benefit to output the function name if we are assembling
14364 a leaf function. These function types will not contain a stack
14365 backtrace structure, therefore it is not possible to determine the
14366 function name. */
14367 void
14368 arm_poke_function_name (FILE *stream, const char *name)
14370 unsigned long alignlength;
14371 unsigned long length;
14372 rtx x;
14374 length = strlen (name) + 1;
14375 alignlength = ROUND_UP_WORD (length);
14377 ASM_OUTPUT_ASCII (stream, name, length);
14378 ASM_OUTPUT_ALIGN (stream, 2);
14379 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14380 assemble_aligned_integer (UNITS_PER_WORD, x);
14383 /* Place some comments into the assembler stream
14384 describing the current function. */
14385 static void
14386 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14388 unsigned long func_type;
14390 if (TARGET_THUMB1)
14392 thumb1_output_function_prologue (f, frame_size);
14393 return;
14396 /* Sanity check. */
14397 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14399 func_type = arm_current_func_type ();
14401 switch ((int) ARM_FUNC_TYPE (func_type))
14403 default:
14404 case ARM_FT_NORMAL:
14405 break;
14406 case ARM_FT_INTERWORKED:
14407 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14408 break;
14409 case ARM_FT_ISR:
14410 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14411 break;
14412 case ARM_FT_FIQ:
14413 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14414 break;
14415 case ARM_FT_EXCEPTION:
14416 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14417 break;
14420 if (IS_NAKED (func_type))
14421 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14423 if (IS_VOLATILE (func_type))
14424 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14426 if (IS_NESTED (func_type))
14427 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14428 if (IS_STACKALIGN (func_type))
14429 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14431 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14432 crtl->args.size,
14433 crtl->args.pretend_args_size, frame_size);
14435 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14436 frame_pointer_needed,
14437 cfun->machine->uses_anonymous_args);
14439 if (cfun->machine->lr_save_eliminated)
14440 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14442 if (crtl->calls_eh_return)
14443 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14447 const char *
14448 arm_output_epilogue (rtx sibling)
14450 int reg;
14451 unsigned long saved_regs_mask;
14452 unsigned long func_type;
14453 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14454 frame that is $fp + 4 for a non-variadic function. */
14455 int floats_offset = 0;
14456 rtx operands[3];
14457 FILE * f = asm_out_file;
14458 unsigned int lrm_count = 0;
14459 int really_return = (sibling == NULL);
14460 int start_reg;
14461 arm_stack_offsets *offsets;
14463 /* If we have already generated the return instruction
14464 then it is futile to generate anything else. */
14465 if (use_return_insn (FALSE, sibling) &&
14466 (cfun->machine->return_used_this_function != 0))
14467 return "";
14469 func_type = arm_current_func_type ();
14471 if (IS_NAKED (func_type))
14472 /* Naked functions don't have epilogues. */
14473 return "";
14475 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14477 rtx op;
14479 /* A volatile function should never return. Call abort. */
14480 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14481 assemble_external_libcall (op);
14482 output_asm_insn ("bl\t%a0", &op);
14484 return "";
14487 /* If we are throwing an exception, then we really must be doing a
14488 return, so we can't tail-call. */
14489 gcc_assert (!crtl->calls_eh_return || really_return);
14491 offsets = arm_get_frame_offsets ();
14492 saved_regs_mask = offsets->saved_regs_mask;
14494 if (TARGET_IWMMXT)
14495 lrm_count = bit_count (saved_regs_mask);
14497 floats_offset = offsets->saved_args;
14498 /* Compute how far away the floats will be. */
14499 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14500 if (saved_regs_mask & (1 << reg))
14501 floats_offset += 4;
14503 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14505 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14506 int vfp_offset = offsets->frame;
14508 if (TARGET_FPA_EMU2)
14510 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14511 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14513 floats_offset += 12;
14514 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14515 reg, FP_REGNUM, floats_offset - vfp_offset);
14518 else
14520 start_reg = LAST_FPA_REGNUM;
14522 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14524 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14526 floats_offset += 12;
14528 /* We can't unstack more than four registers at once. */
14529 if (start_reg - reg == 3)
14531 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14532 reg, FP_REGNUM, floats_offset - vfp_offset);
14533 start_reg = reg - 1;
14536 else
14538 if (reg != start_reg)
14539 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14540 reg + 1, start_reg - reg,
14541 FP_REGNUM, floats_offset - vfp_offset);
14542 start_reg = reg - 1;
14546 /* Just in case the last register checked also needs unstacking. */
14547 if (reg != start_reg)
14548 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14549 reg + 1, start_reg - reg,
14550 FP_REGNUM, floats_offset - vfp_offset);
14553 if (TARGET_HARD_FLOAT && TARGET_VFP)
14555 int saved_size;
14557 /* The fldmd insns do not have base+offset addressing
14558 modes, so we use IP to hold the address. */
14559 saved_size = arm_get_vfp_saved_size ();
14561 if (saved_size > 0)
14563 floats_offset += saved_size;
14564 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14565 FP_REGNUM, floats_offset - vfp_offset);
14567 start_reg = FIRST_VFP_REGNUM;
14568 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14570 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14571 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14573 if (start_reg != reg)
14574 vfp_output_fldmd (f, IP_REGNUM,
14575 (start_reg - FIRST_VFP_REGNUM) / 2,
14576 (reg - start_reg) / 2);
14577 start_reg = reg + 2;
14580 if (start_reg != reg)
14581 vfp_output_fldmd (f, IP_REGNUM,
14582 (start_reg - FIRST_VFP_REGNUM) / 2,
14583 (reg - start_reg) / 2);
14586 if (TARGET_IWMMXT)
14588 /* The frame pointer is guaranteed to be non-double-word aligned.
14589 This is because it is set to (old_stack_pointer - 4) and the
14590 old_stack_pointer was double word aligned. Thus the offset to
14591 the iWMMXt registers to be loaded must also be non-double-word
14592 sized, so that the resultant address *is* double-word aligned.
14593 We can ignore floats_offset since that was already included in
14594 the live_regs_mask. */
14595 lrm_count += (lrm_count % 2 ? 2 : 1);
14597 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14598 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14600 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14601 reg, FP_REGNUM, lrm_count * 4);
14602 lrm_count += 2;
14606 /* saved_regs_mask should contain the IP, which at the time of stack
14607 frame generation actually contains the old stack pointer. So a
14608 quick way to unwind the stack is just pop the IP register directly
14609 into the stack pointer. */
14610 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14611 saved_regs_mask &= ~ (1 << IP_REGNUM);
14612 saved_regs_mask |= (1 << SP_REGNUM);
14614 /* There are two registers left in saved_regs_mask - LR and PC. We
14615 only need to restore the LR register (the return address), but to
14616 save time we can load it directly into the PC, unless we need a
14617 special function exit sequence, or we are not really returning. */
14618 if (really_return
14619 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14620 && !crtl->calls_eh_return)
14621 /* Delete the LR from the register mask, so that the LR on
14622 the stack is loaded into the PC in the register mask. */
14623 saved_regs_mask &= ~ (1 << LR_REGNUM);
14624 else
14625 saved_regs_mask &= ~ (1 << PC_REGNUM);
14627 /* We must use SP as the base register, because SP is one of the
14628 registers being restored. If an interrupt or page fault
14629 happens in the ldm instruction, the SP might or might not
14630 have been restored. That would be bad, as then SP will no
14631 longer indicate the safe area of stack, and we can get stack
14632 corruption. Using SP as the base register means that it will
14633 be reset correctly to the original value, should an interrupt
14634 occur. If the stack pointer already points at the right
14635 place, then omit the subtraction. */
14636 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14637 || cfun->calls_alloca)
14638 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14639 4 * bit_count (saved_regs_mask));
14640 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14642 if (IS_INTERRUPT (func_type))
14643 /* Interrupt handlers will have pushed the
14644 IP onto the stack, so restore it now. */
14645 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14647 else
14649 /* This branch is executed for ARM mode (non-apcs frames) and
14650 Thumb-2 mode. Frame layout is essentially the same for those
14651 cases, except that in ARM mode frame pointer points to the
14652 first saved register, while in Thumb-2 mode the frame pointer points
14653 to the last saved register.
14655 It is possible to make frame pointer point to last saved
14656 register in both cases, and remove some conditionals below.
14657 That means that fp setup in prologue would be just "mov fp, sp"
14658 and sp restore in epilogue would be just "mov sp, fp", whereas
14659 now we have to use add/sub in those cases. However, the value
14660 of that would be marginal, as both mov and add/sub are 32-bit
14661 in ARM mode, and it would require extra conditionals
14662 in arm_expand_prologue to distingish ARM-apcs-frame case
14663 (where frame pointer is required to point at first register)
14664 and ARM-non-apcs-frame. Therefore, such change is postponed
14665 until real need arise. */
14666 unsigned HOST_WIDE_INT amount;
14667 int rfe;
14668 /* Restore stack pointer if necessary. */
14669 if (TARGET_ARM && frame_pointer_needed)
14671 operands[0] = stack_pointer_rtx;
14672 operands[1] = hard_frame_pointer_rtx;
14674 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14675 output_add_immediate (operands);
14677 else
14679 if (frame_pointer_needed)
14681 /* For Thumb-2 restore sp from the frame pointer.
14682 Operand restrictions mean we have to incrememnt FP, then copy
14683 to SP. */
14684 amount = offsets->locals_base - offsets->saved_regs;
14685 operands[0] = hard_frame_pointer_rtx;
14687 else
14689 unsigned long count;
14690 operands[0] = stack_pointer_rtx;
14691 amount = offsets->outgoing_args - offsets->saved_regs;
14692 /* pop call clobbered registers if it avoids a
14693 separate stack adjustment. */
14694 count = offsets->saved_regs - offsets->saved_args;
14695 if (optimize_size
14696 && count != 0
14697 && !crtl->calls_eh_return
14698 && bit_count(saved_regs_mask) * 4 == count
14699 && !IS_INTERRUPT (func_type)
14700 && !crtl->tail_call_emit)
14702 unsigned long mask;
14703 /* Preserve return values, of any size. */
14704 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14705 mask ^= 0xf;
14706 mask &= ~saved_regs_mask;
14707 reg = 0;
14708 while (bit_count (mask) * 4 > amount)
14710 while ((mask & (1 << reg)) == 0)
14711 reg++;
14712 mask &= ~(1 << reg);
14714 if (bit_count (mask) * 4 == amount) {
14715 amount = 0;
14716 saved_regs_mask |= mask;
14721 if (amount)
14723 operands[1] = operands[0];
14724 operands[2] = GEN_INT (amount);
14725 output_add_immediate (operands);
14727 if (frame_pointer_needed)
14728 asm_fprintf (f, "\tmov\t%r, %r\n",
14729 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14732 if (TARGET_FPA_EMU2)
14734 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14735 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14736 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14737 reg, SP_REGNUM);
14739 else
14741 start_reg = FIRST_FPA_REGNUM;
14743 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14745 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14747 if (reg - start_reg == 3)
14749 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14750 start_reg, SP_REGNUM);
14751 start_reg = reg + 1;
14754 else
14756 if (reg != start_reg)
14757 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14758 start_reg, reg - start_reg,
14759 SP_REGNUM);
14761 start_reg = reg + 1;
14765 /* Just in case the last register checked also needs unstacking. */
14766 if (reg != start_reg)
14767 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14768 start_reg, reg - start_reg, SP_REGNUM);
14771 if (TARGET_HARD_FLOAT && TARGET_VFP)
14773 int end_reg = LAST_VFP_REGNUM + 1;
14775 /* Scan the registers in reverse order. We need to match
14776 any groupings made in the prologue and generate matching
14777 pop operations. */
14778 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14780 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14781 && (!df_regs_ever_live_p (reg + 1)
14782 || call_used_regs[reg + 1]))
14784 if (end_reg > reg + 2)
14785 vfp_output_fldmd (f, SP_REGNUM,
14786 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14787 (end_reg - (reg + 2)) / 2);
14788 end_reg = reg;
14791 if (end_reg > reg + 2)
14792 vfp_output_fldmd (f, SP_REGNUM, 0,
14793 (end_reg - (reg + 2)) / 2);
14796 if (TARGET_IWMMXT)
14797 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14798 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14799 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14801 /* If we can, restore the LR into the PC. */
14802 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14803 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14804 && !IS_STACKALIGN (func_type)
14805 && really_return
14806 && crtl->args.pretend_args_size == 0
14807 && saved_regs_mask & (1 << LR_REGNUM)
14808 && !crtl->calls_eh_return)
14810 saved_regs_mask &= ~ (1 << LR_REGNUM);
14811 saved_regs_mask |= (1 << PC_REGNUM);
14812 rfe = IS_INTERRUPT (func_type);
14814 else
14815 rfe = 0;
14817 /* Load the registers off the stack. If we only have one register
14818 to load use the LDR instruction - it is faster. For Thumb-2
14819 always use pop and the assembler will pick the best instruction.*/
14820 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14821 && !IS_INTERRUPT(func_type))
14823 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14825 else if (saved_regs_mask)
14827 if (saved_regs_mask & (1 << SP_REGNUM))
14828 /* Note - write back to the stack register is not enabled
14829 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14830 in the list of registers and if we add writeback the
14831 instruction becomes UNPREDICTABLE. */
14832 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14833 rfe);
14834 else if (TARGET_ARM)
14835 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14836 rfe);
14837 else
14838 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14841 if (crtl->args.pretend_args_size)
14843 /* Unwind the pre-pushed regs. */
14844 operands[0] = operands[1] = stack_pointer_rtx;
14845 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14846 output_add_immediate (operands);
14850 /* We may have already restored PC directly from the stack. */
14851 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14852 return "";
14854 /* Stack adjustment for exception handler. */
14855 if (crtl->calls_eh_return)
14856 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14857 ARM_EH_STACKADJ_REGNUM);
14859 /* Generate the return instruction. */
14860 switch ((int) ARM_FUNC_TYPE (func_type))
14862 case ARM_FT_ISR:
14863 case ARM_FT_FIQ:
14864 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14865 break;
14867 case ARM_FT_EXCEPTION:
14868 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14869 break;
14871 case ARM_FT_INTERWORKED:
14872 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14873 break;
14875 default:
14876 if (IS_STACKALIGN (func_type))
14878 /* See comment in arm_expand_prologue. */
14879 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14881 if (arm_arch5 || arm_arch4t)
14882 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14883 else
14884 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14885 break;
14888 return "";
14891 static void
14892 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14893 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14895 arm_stack_offsets *offsets;
14897 if (TARGET_THUMB1)
14899 int regno;
14901 /* Emit any call-via-reg trampolines that are needed for v4t support
14902 of call_reg and call_value_reg type insns. */
14903 for (regno = 0; regno < LR_REGNUM; regno++)
14905 rtx label = cfun->machine->call_via[regno];
14907 if (label != NULL)
14909 switch_to_section (function_section (current_function_decl));
14910 targetm.asm_out.internal_label (asm_out_file, "L",
14911 CODE_LABEL_NUMBER (label));
14912 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14916 /* ??? Probably not safe to set this here, since it assumes that a
14917 function will be emitted as assembly immediately after we generate
14918 RTL for it. This does not happen for inline functions. */
14919 cfun->machine->return_used_this_function = 0;
14921 else /* TARGET_32BIT */
14923 /* We need to take into account any stack-frame rounding. */
14924 offsets = arm_get_frame_offsets ();
14926 gcc_assert (!use_return_insn (FALSE, NULL)
14927 || (cfun->machine->return_used_this_function != 0)
14928 || offsets->saved_regs == offsets->outgoing_args
14929 || frame_pointer_needed);
14931 /* Reset the ARM-specific per-function variables. */
14932 after_arm_reorg = 0;
14936 /* Generate and emit an insn that we will recognize as a push_multi.
14937 Unfortunately, since this insn does not reflect very well the actual
14938 semantics of the operation, we need to annotate the insn for the benefit
14939 of DWARF2 frame unwind information. */
14940 static rtx
14941 emit_multi_reg_push (unsigned long mask)
14943 int num_regs = 0;
14944 int num_dwarf_regs;
14945 int i, j;
14946 rtx par;
14947 rtx dwarf;
14948 int dwarf_par_index;
14949 rtx tmp, reg;
14951 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14952 if (mask & (1 << i))
14953 num_regs++;
14955 gcc_assert (num_regs && num_regs <= 16);
14957 /* We don't record the PC in the dwarf frame information. */
14958 num_dwarf_regs = num_regs;
14959 if (mask & (1 << PC_REGNUM))
14960 num_dwarf_regs--;
14962 /* For the body of the insn we are going to generate an UNSPEC in
14963 parallel with several USEs. This allows the insn to be recognized
14964 by the push_multi pattern in the arm.md file.
14966 The body of the insn looks something like this:
14968 (parallel [
14969 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14970 (const_int:SI <num>)))
14971 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14972 (use (reg:SI XX))
14973 (use (reg:SI YY))
14977 For the frame note however, we try to be more explicit and actually
14978 show each register being stored into the stack frame, plus a (single)
14979 decrement of the stack pointer. We do it this way in order to be
14980 friendly to the stack unwinding code, which only wants to see a single
14981 stack decrement per instruction. The RTL we generate for the note looks
14982 something like this:
14984 (sequence [
14985 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14986 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14987 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14988 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14992 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14993 instead we'd have a parallel expression detailing all
14994 the stores to the various memory addresses so that debug
14995 information is more up-to-date. Remember however while writing
14996 this to take care of the constraints with the push instruction.
14998 Note also that this has to be taken care of for the VFP registers.
15000 For more see PR43399. */
15002 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15003 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15004 dwarf_par_index = 1;
15006 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15008 if (mask & (1 << i))
15010 reg = gen_rtx_REG (SImode, i);
15012 XVECEXP (par, 0, 0)
15013 = gen_rtx_SET (VOIDmode,
15014 gen_frame_mem
15015 (BLKmode,
15016 gen_rtx_PRE_MODIFY (Pmode,
15017 stack_pointer_rtx,
15018 plus_constant
15019 (stack_pointer_rtx,
15020 -4 * num_regs))
15022 gen_rtx_UNSPEC (BLKmode,
15023 gen_rtvec (1, reg),
15024 UNSPEC_PUSH_MULT));
15026 if (i != PC_REGNUM)
15028 tmp = gen_rtx_SET (VOIDmode,
15029 gen_frame_mem (SImode, stack_pointer_rtx),
15030 reg);
15031 RTX_FRAME_RELATED_P (tmp) = 1;
15032 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15033 dwarf_par_index++;
15036 break;
15040 for (j = 1, i++; j < num_regs; i++)
15042 if (mask & (1 << i))
15044 reg = gen_rtx_REG (SImode, i);
15046 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15048 if (i != PC_REGNUM)
15051 = gen_rtx_SET (VOIDmode,
15052 gen_frame_mem
15053 (SImode,
15054 plus_constant (stack_pointer_rtx,
15055 4 * j)),
15056 reg);
15057 RTX_FRAME_RELATED_P (tmp) = 1;
15058 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15061 j++;
15065 par = emit_insn (par);
15067 tmp = gen_rtx_SET (VOIDmode,
15068 stack_pointer_rtx,
15069 plus_constant (stack_pointer_rtx, -4 * num_regs));
15070 RTX_FRAME_RELATED_P (tmp) = 1;
15071 XVECEXP (dwarf, 0, 0) = tmp;
15073 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15075 return par;
15078 /* Calculate the size of the return value that is passed in registers. */
15079 static unsigned
15080 arm_size_return_regs (void)
15082 enum machine_mode mode;
15084 if (crtl->return_rtx != 0)
15085 mode = GET_MODE (crtl->return_rtx);
15086 else
15087 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15089 return GET_MODE_SIZE (mode);
15092 static rtx
15093 emit_sfm (int base_reg, int count)
15095 rtx par;
15096 rtx dwarf;
15097 rtx tmp, reg;
15098 int i;
15100 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15101 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15103 reg = gen_rtx_REG (XFmode, base_reg++);
15105 XVECEXP (par, 0, 0)
15106 = gen_rtx_SET (VOIDmode,
15107 gen_frame_mem
15108 (BLKmode,
15109 gen_rtx_PRE_MODIFY (Pmode,
15110 stack_pointer_rtx,
15111 plus_constant
15112 (stack_pointer_rtx,
15113 -12 * count))
15115 gen_rtx_UNSPEC (BLKmode,
15116 gen_rtvec (1, reg),
15117 UNSPEC_PUSH_MULT));
15118 tmp = gen_rtx_SET (VOIDmode,
15119 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15120 RTX_FRAME_RELATED_P (tmp) = 1;
15121 XVECEXP (dwarf, 0, 1) = tmp;
15123 for (i = 1; i < count; i++)
15125 reg = gen_rtx_REG (XFmode, base_reg++);
15126 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15128 tmp = gen_rtx_SET (VOIDmode,
15129 gen_frame_mem (XFmode,
15130 plus_constant (stack_pointer_rtx,
15131 i * 12)),
15132 reg);
15133 RTX_FRAME_RELATED_P (tmp) = 1;
15134 XVECEXP (dwarf, 0, i + 1) = tmp;
15137 tmp = gen_rtx_SET (VOIDmode,
15138 stack_pointer_rtx,
15139 plus_constant (stack_pointer_rtx, -12 * count));
15141 RTX_FRAME_RELATED_P (tmp) = 1;
15142 XVECEXP (dwarf, 0, 0) = tmp;
15144 par = emit_insn (par);
15145 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15147 return par;
15151 /* Return true if the current function needs to save/restore LR. */
15153 static bool
15154 thumb_force_lr_save (void)
15156 return !cfun->machine->lr_save_eliminated
15157 && (!leaf_function_p ()
15158 || thumb_far_jump_used_p ()
15159 || df_regs_ever_live_p (LR_REGNUM));
15163 /* Return true if r3 is used by any of the tail call insns in the
15164 current function. */
15166 static bool
15167 any_sibcall_uses_r3 (void)
15169 edge_iterator ei;
15170 edge e;
15172 if (!crtl->tail_call_emit)
15173 return false;
15174 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15175 if (e->flags & EDGE_SIBCALL)
15177 rtx call = BB_END (e->src);
15178 if (!CALL_P (call))
15179 call = prev_nonnote_nondebug_insn (call);
15180 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15181 if (find_regno_fusage (call, USE, 3))
15182 return true;
15184 return false;
15188 /* Compute the distance from register FROM to register TO.
15189 These can be the arg pointer (26), the soft frame pointer (25),
15190 the stack pointer (13) or the hard frame pointer (11).
15191 In thumb mode r7 is used as the soft frame pointer, if needed.
15192 Typical stack layout looks like this:
15194 old stack pointer -> | |
15195 ----
15196 | | \
15197 | | saved arguments for
15198 | | vararg functions
15199 | | /
15201 hard FP & arg pointer -> | | \
15202 | | stack
15203 | | frame
15204 | | /
15206 | | \
15207 | | call saved
15208 | | registers
15209 soft frame pointer -> | | /
15211 | | \
15212 | | local
15213 | | variables
15214 locals base pointer -> | | /
15216 | | \
15217 | | outgoing
15218 | | arguments
15219 current stack pointer -> | | /
15222 For a given function some or all of these stack components
15223 may not be needed, giving rise to the possibility of
15224 eliminating some of the registers.
15226 The values returned by this function must reflect the behavior
15227 of arm_expand_prologue() and arm_compute_save_reg_mask().
15229 The sign of the number returned reflects the direction of stack
15230 growth, so the values are positive for all eliminations except
15231 from the soft frame pointer to the hard frame pointer.
15233 SFP may point just inside the local variables block to ensure correct
15234 alignment. */
15237 /* Calculate stack offsets. These are used to calculate register elimination
15238 offsets and in prologue/epilogue code. Also calculates which registers
15239 should be saved. */
15241 static arm_stack_offsets *
15242 arm_get_frame_offsets (void)
15244 struct arm_stack_offsets *offsets;
15245 unsigned long func_type;
15246 int leaf;
15247 int saved;
15248 int core_saved;
15249 HOST_WIDE_INT frame_size;
15250 int i;
15252 offsets = &cfun->machine->stack_offsets;
15254 /* We need to know if we are a leaf function. Unfortunately, it
15255 is possible to be called after start_sequence has been called,
15256 which causes get_insns to return the insns for the sequence,
15257 not the function, which will cause leaf_function_p to return
15258 the incorrect result.
15260 to know about leaf functions once reload has completed, and the
15261 frame size cannot be changed after that time, so we can safely
15262 use the cached value. */
15264 if (reload_completed)
15265 return offsets;
15267 /* Initially this is the size of the local variables. It will translated
15268 into an offset once we have determined the size of preceding data. */
15269 frame_size = ROUND_UP_WORD (get_frame_size ());
15271 leaf = leaf_function_p ();
15273 /* Space for variadic functions. */
15274 offsets->saved_args = crtl->args.pretend_args_size;
15276 /* In Thumb mode this is incorrect, but never used. */
15277 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15278 arm_compute_static_chain_stack_bytes();
15280 if (TARGET_32BIT)
15282 unsigned int regno;
15284 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15285 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15286 saved = core_saved;
15288 /* We know that SP will be doubleword aligned on entry, and we must
15289 preserve that condition at any subroutine call. We also require the
15290 soft frame pointer to be doubleword aligned. */
15292 if (TARGET_REALLY_IWMMXT)
15294 /* Check for the call-saved iWMMXt registers. */
15295 for (regno = FIRST_IWMMXT_REGNUM;
15296 regno <= LAST_IWMMXT_REGNUM;
15297 regno++)
15298 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15299 saved += 8;
15302 func_type = arm_current_func_type ();
15303 if (! IS_VOLATILE (func_type))
15305 /* Space for saved FPA registers. */
15306 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15307 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15308 saved += 12;
15310 /* Space for saved VFP registers. */
15311 if (TARGET_HARD_FLOAT && TARGET_VFP)
15312 saved += arm_get_vfp_saved_size ();
15315 else /* TARGET_THUMB1 */
15317 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15318 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15319 saved = core_saved;
15320 if (TARGET_BACKTRACE)
15321 saved += 16;
15324 /* Saved registers include the stack frame. */
15325 offsets->saved_regs = offsets->saved_args + saved +
15326 arm_compute_static_chain_stack_bytes();
15327 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15328 /* A leaf function does not need any stack alignment if it has nothing
15329 on the stack. */
15330 if (leaf && frame_size == 0
15331 /* However if it calls alloca(), we have a dynamically allocated
15332 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15333 && ! cfun->calls_alloca)
15335 offsets->outgoing_args = offsets->soft_frame;
15336 offsets->locals_base = offsets->soft_frame;
15337 return offsets;
15340 /* Ensure SFP has the correct alignment. */
15341 if (ARM_DOUBLEWORD_ALIGN
15342 && (offsets->soft_frame & 7))
15344 offsets->soft_frame += 4;
15345 /* Try to align stack by pushing an extra reg. Don't bother doing this
15346 when there is a stack frame as the alignment will be rolled into
15347 the normal stack adjustment. */
15348 if (frame_size + crtl->outgoing_args_size == 0)
15350 int reg = -1;
15352 /* If it is safe to use r3, then do so. This sometimes
15353 generates better code on Thumb-2 by avoiding the need to
15354 use 32-bit push/pop instructions. */
15355 if (! any_sibcall_uses_r3 ()
15356 && arm_size_return_regs () <= 12
15357 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15359 reg = 3;
15361 else
15362 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15364 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15366 reg = i;
15367 break;
15371 if (reg != -1)
15373 offsets->saved_regs += 4;
15374 offsets->saved_regs_mask |= (1 << reg);
15379 offsets->locals_base = offsets->soft_frame + frame_size;
15380 offsets->outgoing_args = (offsets->locals_base
15381 + crtl->outgoing_args_size);
15383 if (ARM_DOUBLEWORD_ALIGN)
15385 /* Ensure SP remains doubleword aligned. */
15386 if (offsets->outgoing_args & 7)
15387 offsets->outgoing_args += 4;
15388 gcc_assert (!(offsets->outgoing_args & 7));
15391 return offsets;
15395 /* Calculate the relative offsets for the different stack pointers. Positive
15396 offsets are in the direction of stack growth. */
15398 HOST_WIDE_INT
15399 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15401 arm_stack_offsets *offsets;
15403 offsets = arm_get_frame_offsets ();
15405 /* OK, now we have enough information to compute the distances.
15406 There must be an entry in these switch tables for each pair
15407 of registers in ELIMINABLE_REGS, even if some of the entries
15408 seem to be redundant or useless. */
15409 switch (from)
15411 case ARG_POINTER_REGNUM:
15412 switch (to)
15414 case THUMB_HARD_FRAME_POINTER_REGNUM:
15415 return 0;
15417 case FRAME_POINTER_REGNUM:
15418 /* This is the reverse of the soft frame pointer
15419 to hard frame pointer elimination below. */
15420 return offsets->soft_frame - offsets->saved_args;
15422 case ARM_HARD_FRAME_POINTER_REGNUM:
15423 /* This is only non-zero in the case where the static chain register
15424 is stored above the frame. */
15425 return offsets->frame - offsets->saved_args - 4;
15427 case STACK_POINTER_REGNUM:
15428 /* If nothing has been pushed on the stack at all
15429 then this will return -4. This *is* correct! */
15430 return offsets->outgoing_args - (offsets->saved_args + 4);
15432 default:
15433 gcc_unreachable ();
15435 gcc_unreachable ();
15437 case FRAME_POINTER_REGNUM:
15438 switch (to)
15440 case THUMB_HARD_FRAME_POINTER_REGNUM:
15441 return 0;
15443 case ARM_HARD_FRAME_POINTER_REGNUM:
15444 /* The hard frame pointer points to the top entry in the
15445 stack frame. The soft frame pointer to the bottom entry
15446 in the stack frame. If there is no stack frame at all,
15447 then they are identical. */
15449 return offsets->frame - offsets->soft_frame;
15451 case STACK_POINTER_REGNUM:
15452 return offsets->outgoing_args - offsets->soft_frame;
15454 default:
15455 gcc_unreachable ();
15457 gcc_unreachable ();
15459 default:
15460 /* You cannot eliminate from the stack pointer.
15461 In theory you could eliminate from the hard frame
15462 pointer to the stack pointer, but this will never
15463 happen, since if a stack frame is not needed the
15464 hard frame pointer will never be used. */
15465 gcc_unreachable ();
15469 /* Given FROM and TO register numbers, say whether this elimination is
15470 allowed. Frame pointer elimination is automatically handled.
15472 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15473 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15474 pointer, we must eliminate FRAME_POINTER_REGNUM into
15475 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15476 ARG_POINTER_REGNUM. */
15478 bool
15479 arm_can_eliminate (const int from, const int to)
15481 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15482 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15483 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15484 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15485 true);
15488 /* Emit RTL to save coprocessor registers on function entry. Returns the
15489 number of bytes pushed. */
15491 static int
15492 arm_save_coproc_regs(void)
15494 int saved_size = 0;
15495 unsigned reg;
15496 unsigned start_reg;
15497 rtx insn;
15499 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15500 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15502 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15503 insn = gen_rtx_MEM (V2SImode, insn);
15504 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15505 RTX_FRAME_RELATED_P (insn) = 1;
15506 saved_size += 8;
15509 /* Save any floating point call-saved registers used by this
15510 function. */
15511 if (TARGET_FPA_EMU2)
15513 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15514 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15516 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15517 insn = gen_rtx_MEM (XFmode, insn);
15518 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15519 RTX_FRAME_RELATED_P (insn) = 1;
15520 saved_size += 12;
15523 else
15525 start_reg = LAST_FPA_REGNUM;
15527 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15529 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15531 if (start_reg - reg == 3)
15533 insn = emit_sfm (reg, 4);
15534 RTX_FRAME_RELATED_P (insn) = 1;
15535 saved_size += 48;
15536 start_reg = reg - 1;
15539 else
15541 if (start_reg != reg)
15543 insn = emit_sfm (reg + 1, start_reg - reg);
15544 RTX_FRAME_RELATED_P (insn) = 1;
15545 saved_size += (start_reg - reg) * 12;
15547 start_reg = reg - 1;
15551 if (start_reg != reg)
15553 insn = emit_sfm (reg + 1, start_reg - reg);
15554 saved_size += (start_reg - reg) * 12;
15555 RTX_FRAME_RELATED_P (insn) = 1;
15558 if (TARGET_HARD_FLOAT && TARGET_VFP)
15560 start_reg = FIRST_VFP_REGNUM;
15562 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15564 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15565 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15567 if (start_reg != reg)
15568 saved_size += vfp_emit_fstmd (start_reg,
15569 (reg - start_reg) / 2);
15570 start_reg = reg + 2;
15573 if (start_reg != reg)
15574 saved_size += vfp_emit_fstmd (start_reg,
15575 (reg - start_reg) / 2);
15577 return saved_size;
15581 /* Set the Thumb frame pointer from the stack pointer. */
15583 static void
15584 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15586 HOST_WIDE_INT amount;
15587 rtx insn, dwarf;
15589 amount = offsets->outgoing_args - offsets->locals_base;
15590 if (amount < 1024)
15591 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15592 stack_pointer_rtx, GEN_INT (amount)));
15593 else
15595 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15596 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15597 expects the first two operands to be the same. */
15598 if (TARGET_THUMB2)
15600 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15601 stack_pointer_rtx,
15602 hard_frame_pointer_rtx));
15604 else
15606 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15607 hard_frame_pointer_rtx,
15608 stack_pointer_rtx));
15610 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15611 plus_constant (stack_pointer_rtx, amount));
15612 RTX_FRAME_RELATED_P (dwarf) = 1;
15613 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15616 RTX_FRAME_RELATED_P (insn) = 1;
15619 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15620 function. */
15621 void
15622 arm_expand_prologue (void)
15624 rtx amount;
15625 rtx insn;
15626 rtx ip_rtx;
15627 unsigned long live_regs_mask;
15628 unsigned long func_type;
15629 int fp_offset = 0;
15630 int saved_pretend_args = 0;
15631 int saved_regs = 0;
15632 unsigned HOST_WIDE_INT args_to_push;
15633 arm_stack_offsets *offsets;
15635 func_type = arm_current_func_type ();
15637 /* Naked functions don't have prologues. */
15638 if (IS_NAKED (func_type))
15639 return;
15641 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15642 args_to_push = crtl->args.pretend_args_size;
15644 /* Compute which register we will have to save onto the stack. */
15645 offsets = arm_get_frame_offsets ();
15646 live_regs_mask = offsets->saved_regs_mask;
15648 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15650 if (IS_STACKALIGN (func_type))
15652 rtx dwarf;
15653 rtx r0;
15654 rtx r1;
15655 /* Handle a word-aligned stack pointer. We generate the following:
15657 mov r0, sp
15658 bic r1, r0, #7
15659 mov sp, r1
15660 <save and restore r0 in normal prologue/epilogue>
15661 mov sp, r0
15662 bx lr
15664 The unwinder doesn't need to know about the stack realignment.
15665 Just tell it we saved SP in r0. */
15666 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15668 r0 = gen_rtx_REG (SImode, 0);
15669 r1 = gen_rtx_REG (SImode, 1);
15670 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15671 compiler won't choke. */
15672 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15673 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15674 insn = gen_movsi (r0, stack_pointer_rtx);
15675 RTX_FRAME_RELATED_P (insn) = 1;
15676 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15677 emit_insn (insn);
15678 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15679 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15682 /* For APCS frames, if IP register is clobbered
15683 when creating frame, save that register in a special
15684 way. */
15685 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15687 if (IS_INTERRUPT (func_type))
15689 /* Interrupt functions must not corrupt any registers.
15690 Creating a frame pointer however, corrupts the IP
15691 register, so we must push it first. */
15692 insn = emit_multi_reg_push (1 << IP_REGNUM);
15694 /* Do not set RTX_FRAME_RELATED_P on this insn.
15695 The dwarf stack unwinding code only wants to see one
15696 stack decrement per function, and this is not it. If
15697 this instruction is labeled as being part of the frame
15698 creation sequence then dwarf2out_frame_debug_expr will
15699 die when it encounters the assignment of IP to FP
15700 later on, since the use of SP here establishes SP as
15701 the CFA register and not IP.
15703 Anyway this instruction is not really part of the stack
15704 frame creation although it is part of the prologue. */
15706 else if (IS_NESTED (func_type))
15708 /* The Static chain register is the same as the IP register
15709 used as a scratch register during stack frame creation.
15710 To get around this need to find somewhere to store IP
15711 whilst the frame is being created. We try the following
15712 places in order:
15714 1. The last argument register.
15715 2. A slot on the stack above the frame. (This only
15716 works if the function is not a varargs function).
15717 3. Register r3, after pushing the argument registers
15718 onto the stack.
15720 Note - we only need to tell the dwarf2 backend about the SP
15721 adjustment in the second variant; the static chain register
15722 doesn't need to be unwound, as it doesn't contain a value
15723 inherited from the caller. */
15725 if (df_regs_ever_live_p (3) == false)
15726 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15727 else if (args_to_push == 0)
15729 rtx dwarf;
15731 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15732 saved_regs += 4;
15734 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15735 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15736 fp_offset = 4;
15738 /* Just tell the dwarf backend that we adjusted SP. */
15739 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15740 plus_constant (stack_pointer_rtx,
15741 -fp_offset));
15742 RTX_FRAME_RELATED_P (insn) = 1;
15743 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15745 else
15747 /* Store the args on the stack. */
15748 if (cfun->machine->uses_anonymous_args)
15749 insn = emit_multi_reg_push
15750 ((0xf0 >> (args_to_push / 4)) & 0xf);
15751 else
15752 insn = emit_insn
15753 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15754 GEN_INT (- args_to_push)));
15756 RTX_FRAME_RELATED_P (insn) = 1;
15758 saved_pretend_args = 1;
15759 fp_offset = args_to_push;
15760 args_to_push = 0;
15762 /* Now reuse r3 to preserve IP. */
15763 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15767 insn = emit_set_insn (ip_rtx,
15768 plus_constant (stack_pointer_rtx, fp_offset));
15769 RTX_FRAME_RELATED_P (insn) = 1;
15772 if (args_to_push)
15774 /* Push the argument registers, or reserve space for them. */
15775 if (cfun->machine->uses_anonymous_args)
15776 insn = emit_multi_reg_push
15777 ((0xf0 >> (args_to_push / 4)) & 0xf);
15778 else
15779 insn = emit_insn
15780 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15781 GEN_INT (- args_to_push)));
15782 RTX_FRAME_RELATED_P (insn) = 1;
15785 /* If this is an interrupt service routine, and the link register
15786 is going to be pushed, and we're not generating extra
15787 push of IP (needed when frame is needed and frame layout if apcs),
15788 subtracting four from LR now will mean that the function return
15789 can be done with a single instruction. */
15790 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15791 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15792 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15793 && TARGET_ARM)
15795 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15797 emit_set_insn (lr, plus_constant (lr, -4));
15800 if (live_regs_mask)
15802 saved_regs += bit_count (live_regs_mask) * 4;
15803 if (optimize_size && !frame_pointer_needed
15804 && saved_regs == offsets->saved_regs - offsets->saved_args)
15806 /* If no coprocessor registers are being pushed and we don't have
15807 to worry about a frame pointer then push extra registers to
15808 create the stack frame. This is done is a way that does not
15809 alter the frame layout, so is independent of the epilogue. */
15810 int n;
15811 int frame;
15812 n = 0;
15813 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15814 n++;
15815 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15816 if (frame && n * 4 >= frame)
15818 n = frame / 4;
15819 live_regs_mask |= (1 << n) - 1;
15820 saved_regs += frame;
15823 insn = emit_multi_reg_push (live_regs_mask);
15824 RTX_FRAME_RELATED_P (insn) = 1;
15827 if (! IS_VOLATILE (func_type))
15828 saved_regs += arm_save_coproc_regs ();
15830 if (frame_pointer_needed && TARGET_ARM)
15832 /* Create the new frame pointer. */
15833 if (TARGET_APCS_FRAME)
15835 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15836 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15837 RTX_FRAME_RELATED_P (insn) = 1;
15839 if (IS_NESTED (func_type))
15841 /* Recover the static chain register. */
15842 if (!df_regs_ever_live_p (3)
15843 || saved_pretend_args)
15844 insn = gen_rtx_REG (SImode, 3);
15845 else /* if (crtl->args.pretend_args_size == 0) */
15847 insn = plus_constant (hard_frame_pointer_rtx, 4);
15848 insn = gen_frame_mem (SImode, insn);
15850 emit_set_insn (ip_rtx, insn);
15851 /* Add a USE to stop propagate_one_insn() from barfing. */
15852 emit_insn (gen_prologue_use (ip_rtx));
15855 else
15857 insn = GEN_INT (saved_regs - 4);
15858 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15859 stack_pointer_rtx, insn));
15860 RTX_FRAME_RELATED_P (insn) = 1;
15864 if (flag_stack_usage_info)
15865 current_function_static_stack_size
15866 = offsets->outgoing_args - offsets->saved_args;
15868 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15870 /* This add can produce multiple insns for a large constant, so we
15871 need to get tricky. */
15872 rtx last = get_last_insn ();
15874 amount = GEN_INT (offsets->saved_args + saved_regs
15875 - offsets->outgoing_args);
15877 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15878 amount));
15881 last = last ? NEXT_INSN (last) : get_insns ();
15882 RTX_FRAME_RELATED_P (last) = 1;
15884 while (last != insn);
15886 /* If the frame pointer is needed, emit a special barrier that
15887 will prevent the scheduler from moving stores to the frame
15888 before the stack adjustment. */
15889 if (frame_pointer_needed)
15890 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15891 hard_frame_pointer_rtx));
15895 if (frame_pointer_needed && TARGET_THUMB2)
15896 thumb_set_frame_pointer (offsets);
15898 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15900 unsigned long mask;
15902 mask = live_regs_mask;
15903 mask &= THUMB2_WORK_REGS;
15904 if (!IS_NESTED (func_type))
15905 mask |= (1 << IP_REGNUM);
15906 arm_load_pic_register (mask);
15909 /* If we are profiling, make sure no instructions are scheduled before
15910 the call to mcount. Similarly if the user has requested no
15911 scheduling in the prolog. Similarly if we want non-call exceptions
15912 using the EABI unwinder, to prevent faulting instructions from being
15913 swapped with a stack adjustment. */
15914 if (crtl->profile || !TARGET_SCHED_PROLOG
15915 || (arm_except_unwind_info (&global_options) == UI_TARGET
15916 && cfun->can_throw_non_call_exceptions))
15917 emit_insn (gen_blockage ());
15919 /* If the link register is being kept alive, with the return address in it,
15920 then make sure that it does not get reused by the ce2 pass. */
15921 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15922 cfun->machine->lr_save_eliminated = 1;
15925 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15926 static void
15927 arm_print_condition (FILE *stream)
15929 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15931 /* Branch conversion is not implemented for Thumb-2. */
15932 if (TARGET_THUMB)
15934 output_operand_lossage ("predicated Thumb instruction");
15935 return;
15937 if (current_insn_predicate != NULL)
15939 output_operand_lossage
15940 ("predicated instruction in conditional sequence");
15941 return;
15944 fputs (arm_condition_codes[arm_current_cc], stream);
15946 else if (current_insn_predicate)
15948 enum arm_cond_code code;
15950 if (TARGET_THUMB1)
15952 output_operand_lossage ("predicated Thumb instruction");
15953 return;
15956 code = get_arm_condition_code (current_insn_predicate);
15957 fputs (arm_condition_codes[code], stream);
15962 /* If CODE is 'd', then the X is a condition operand and the instruction
15963 should only be executed if the condition is true.
15964 if CODE is 'D', then the X is a condition operand and the instruction
15965 should only be executed if the condition is false: however, if the mode
15966 of the comparison is CCFPEmode, then always execute the instruction -- we
15967 do this because in these circumstances !GE does not necessarily imply LT;
15968 in these cases the instruction pattern will take care to make sure that
15969 an instruction containing %d will follow, thereby undoing the effects of
15970 doing this instruction unconditionally.
15971 If CODE is 'N' then X is a floating point operand that must be negated
15972 before output.
15973 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15974 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15975 static void
15976 arm_print_operand (FILE *stream, rtx x, int code)
15978 switch (code)
15980 case '@':
15981 fputs (ASM_COMMENT_START, stream);
15982 return;
15984 case '_':
15985 fputs (user_label_prefix, stream);
15986 return;
15988 case '|':
15989 fputs (REGISTER_PREFIX, stream);
15990 return;
15992 case '?':
15993 arm_print_condition (stream);
15994 return;
15996 case '(':
15997 /* Nothing in unified syntax, otherwise the current condition code. */
15998 if (!TARGET_UNIFIED_ASM)
15999 arm_print_condition (stream);
16000 break;
16002 case ')':
16003 /* The current condition code in unified syntax, otherwise nothing. */
16004 if (TARGET_UNIFIED_ASM)
16005 arm_print_condition (stream);
16006 break;
16008 case '.':
16009 /* The current condition code for a condition code setting instruction.
16010 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16011 if (TARGET_UNIFIED_ASM)
16013 fputc('s', stream);
16014 arm_print_condition (stream);
16016 else
16018 arm_print_condition (stream);
16019 fputc('s', stream);
16021 return;
16023 case '!':
16024 /* If the instruction is conditionally executed then print
16025 the current condition code, otherwise print 's'. */
16026 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16027 if (current_insn_predicate)
16028 arm_print_condition (stream);
16029 else
16030 fputc('s', stream);
16031 break;
16033 /* %# is a "break" sequence. It doesn't output anything, but is used to
16034 separate e.g. operand numbers from following text, if that text consists
16035 of further digits which we don't want to be part of the operand
16036 number. */
16037 case '#':
16038 return;
16040 case 'N':
16042 REAL_VALUE_TYPE r;
16043 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16044 r = real_value_negate (&r);
16045 fprintf (stream, "%s", fp_const_from_val (&r));
16047 return;
16049 /* An integer or symbol address without a preceding # sign. */
16050 case 'c':
16051 switch (GET_CODE (x))
16053 case CONST_INT:
16054 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16055 break;
16057 case SYMBOL_REF:
16058 output_addr_const (stream, x);
16059 break;
16061 default:
16062 gcc_unreachable ();
16064 return;
16066 case 'B':
16067 if (GET_CODE (x) == CONST_INT)
16069 HOST_WIDE_INT val;
16070 val = ARM_SIGN_EXTEND (~INTVAL (x));
16071 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16073 else
16075 putc ('~', stream);
16076 output_addr_const (stream, x);
16078 return;
16080 case 'L':
16081 /* The low 16 bits of an immediate constant. */
16082 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16083 return;
16085 case 'i':
16086 fprintf (stream, "%s", arithmetic_instr (x, 1));
16087 return;
16089 /* Truncate Cirrus shift counts. */
16090 case 's':
16091 if (GET_CODE (x) == CONST_INT)
16093 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16094 return;
16096 arm_print_operand (stream, x, 0);
16097 return;
16099 case 'I':
16100 fprintf (stream, "%s", arithmetic_instr (x, 0));
16101 return;
16103 case 'S':
16105 HOST_WIDE_INT val;
16106 const char *shift;
16108 if (!shift_operator (x, SImode))
16110 output_operand_lossage ("invalid shift operand");
16111 break;
16114 shift = shift_op (x, &val);
16116 if (shift)
16118 fprintf (stream, ", %s ", shift);
16119 if (val == -1)
16120 arm_print_operand (stream, XEXP (x, 1), 0);
16121 else
16122 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16125 return;
16127 /* An explanation of the 'Q', 'R' and 'H' register operands:
16129 In a pair of registers containing a DI or DF value the 'Q'
16130 operand returns the register number of the register containing
16131 the least significant part of the value. The 'R' operand returns
16132 the register number of the register containing the most
16133 significant part of the value.
16135 The 'H' operand returns the higher of the two register numbers.
16136 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16137 same as the 'Q' operand, since the most significant part of the
16138 value is held in the lower number register. The reverse is true
16139 on systems where WORDS_BIG_ENDIAN is false.
16141 The purpose of these operands is to distinguish between cases
16142 where the endian-ness of the values is important (for example
16143 when they are added together), and cases where the endian-ness
16144 is irrelevant, but the order of register operations is important.
16145 For example when loading a value from memory into a register
16146 pair, the endian-ness does not matter. Provided that the value
16147 from the lower memory address is put into the lower numbered
16148 register, and the value from the higher address is put into the
16149 higher numbered register, the load will work regardless of whether
16150 the value being loaded is big-wordian or little-wordian. The
16151 order of the two register loads can matter however, if the address
16152 of the memory location is actually held in one of the registers
16153 being overwritten by the load.
16155 The 'Q' and 'R' constraints are also available for 64-bit
16156 constants. */
16157 case 'Q':
16158 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16160 rtx part = gen_lowpart (SImode, x);
16161 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16162 return;
16165 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16167 output_operand_lossage ("invalid operand for code '%c'", code);
16168 return;
16171 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16172 return;
16174 case 'R':
16175 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16177 enum machine_mode mode = GET_MODE (x);
16178 rtx part;
16180 if (mode == VOIDmode)
16181 mode = DImode;
16182 part = gen_highpart_mode (SImode, mode, x);
16183 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16184 return;
16187 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16189 output_operand_lossage ("invalid operand for code '%c'", code);
16190 return;
16193 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16194 return;
16196 case 'H':
16197 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16199 output_operand_lossage ("invalid operand for code '%c'", code);
16200 return;
16203 asm_fprintf (stream, "%r", REGNO (x) + 1);
16204 return;
16206 case 'J':
16207 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16209 output_operand_lossage ("invalid operand for code '%c'", code);
16210 return;
16213 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16214 return;
16216 case 'K':
16217 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16219 output_operand_lossage ("invalid operand for code '%c'", code);
16220 return;
16223 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16224 return;
16226 case 'm':
16227 asm_fprintf (stream, "%r",
16228 GET_CODE (XEXP (x, 0)) == REG
16229 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16230 return;
16232 case 'M':
16233 asm_fprintf (stream, "{%r-%r}",
16234 REGNO (x),
16235 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16236 return;
16238 /* Like 'M', but writing doubleword vector registers, for use by Neon
16239 insns. */
16240 case 'h':
16242 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16243 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16244 if (numregs == 1)
16245 asm_fprintf (stream, "{d%d}", regno);
16246 else
16247 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16249 return;
16251 case 'd':
16252 /* CONST_TRUE_RTX means always -- that's the default. */
16253 if (x == const_true_rtx)
16254 return;
16256 if (!COMPARISON_P (x))
16258 output_operand_lossage ("invalid operand for code '%c'", code);
16259 return;
16262 fputs (arm_condition_codes[get_arm_condition_code (x)],
16263 stream);
16264 return;
16266 case 'D':
16267 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16268 want to do that. */
16269 if (x == const_true_rtx)
16271 output_operand_lossage ("instruction never executed");
16272 return;
16274 if (!COMPARISON_P (x))
16276 output_operand_lossage ("invalid operand for code '%c'", code);
16277 return;
16280 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16281 (get_arm_condition_code (x))],
16282 stream);
16283 return;
16285 /* Cirrus registers can be accessed in a variety of ways:
16286 single floating point (f)
16287 double floating point (d)
16288 32bit integer (fx)
16289 64bit integer (dx). */
16290 case 'W': /* Cirrus register in F mode. */
16291 case 'X': /* Cirrus register in D mode. */
16292 case 'Y': /* Cirrus register in FX mode. */
16293 case 'Z': /* Cirrus register in DX mode. */
16294 gcc_assert (GET_CODE (x) == REG
16295 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16297 fprintf (stream, "mv%s%s",
16298 code == 'W' ? "f"
16299 : code == 'X' ? "d"
16300 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16302 return;
16304 /* Print cirrus register in the mode specified by the register's mode. */
16305 case 'V':
16307 int mode = GET_MODE (x);
16309 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16311 output_operand_lossage ("invalid operand for code '%c'", code);
16312 return;
16315 fprintf (stream, "mv%s%s",
16316 mode == DFmode ? "d"
16317 : mode == SImode ? "fx"
16318 : mode == DImode ? "dx"
16319 : "f", reg_names[REGNO (x)] + 2);
16321 return;
16324 case 'U':
16325 if (GET_CODE (x) != REG
16326 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16327 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16328 /* Bad value for wCG register number. */
16330 output_operand_lossage ("invalid operand for code '%c'", code);
16331 return;
16334 else
16335 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16336 return;
16338 /* Print an iWMMXt control register name. */
16339 case 'w':
16340 if (GET_CODE (x) != CONST_INT
16341 || INTVAL (x) < 0
16342 || INTVAL (x) >= 16)
16343 /* Bad value for wC register number. */
16345 output_operand_lossage ("invalid operand for code '%c'", code);
16346 return;
16349 else
16351 static const char * wc_reg_names [16] =
16353 "wCID", "wCon", "wCSSF", "wCASF",
16354 "wC4", "wC5", "wC6", "wC7",
16355 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16356 "wC12", "wC13", "wC14", "wC15"
16359 fprintf (stream, wc_reg_names [INTVAL (x)]);
16361 return;
16363 /* Print the high single-precision register of a VFP double-precision
16364 register. */
16365 case 'p':
16367 int mode = GET_MODE (x);
16368 int regno;
16370 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16372 output_operand_lossage ("invalid operand for code '%c'", code);
16373 return;
16376 regno = REGNO (x);
16377 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16379 output_operand_lossage ("invalid operand for code '%c'", code);
16380 return;
16383 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16385 return;
16387 /* Print a VFP/Neon double precision or quad precision register name. */
16388 case 'P':
16389 case 'q':
16391 int mode = GET_MODE (x);
16392 int is_quad = (code == 'q');
16393 int regno;
16395 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16397 output_operand_lossage ("invalid operand for code '%c'", code);
16398 return;
16401 if (GET_CODE (x) != REG
16402 || !IS_VFP_REGNUM (REGNO (x)))
16404 output_operand_lossage ("invalid operand for code '%c'", code);
16405 return;
16408 regno = REGNO (x);
16409 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16410 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16412 output_operand_lossage ("invalid operand for code '%c'", code);
16413 return;
16416 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16417 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16419 return;
16421 /* These two codes print the low/high doubleword register of a Neon quad
16422 register, respectively. For pair-structure types, can also print
16423 low/high quadword registers. */
16424 case 'e':
16425 case 'f':
16427 int mode = GET_MODE (x);
16428 int regno;
16430 if ((GET_MODE_SIZE (mode) != 16
16431 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16433 output_operand_lossage ("invalid operand for code '%c'", code);
16434 return;
16437 regno = REGNO (x);
16438 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16440 output_operand_lossage ("invalid operand for code '%c'", code);
16441 return;
16444 if (GET_MODE_SIZE (mode) == 16)
16445 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16446 + (code == 'f' ? 1 : 0));
16447 else
16448 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16449 + (code == 'f' ? 1 : 0));
16451 return;
16453 /* Print a VFPv3 floating-point constant, represented as an integer
16454 index. */
16455 case 'G':
16457 int index = vfp3_const_double_index (x);
16458 gcc_assert (index != -1);
16459 fprintf (stream, "%d", index);
16461 return;
16463 /* Print bits representing opcode features for Neon.
16465 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16466 and polynomials as unsigned.
16468 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16470 Bit 2 is 1 for rounding functions, 0 otherwise. */
16472 /* Identify the type as 's', 'u', 'p' or 'f'. */
16473 case 'T':
16475 HOST_WIDE_INT bits = INTVAL (x);
16476 fputc ("uspf"[bits & 3], stream);
16478 return;
16480 /* Likewise, but signed and unsigned integers are both 'i'. */
16481 case 'F':
16483 HOST_WIDE_INT bits = INTVAL (x);
16484 fputc ("iipf"[bits & 3], stream);
16486 return;
16488 /* As for 'T', but emit 'u' instead of 'p'. */
16489 case 't':
16491 HOST_WIDE_INT bits = INTVAL (x);
16492 fputc ("usuf"[bits & 3], stream);
16494 return;
16496 /* Bit 2: rounding (vs none). */
16497 case 'O':
16499 HOST_WIDE_INT bits = INTVAL (x);
16500 fputs ((bits & 4) != 0 ? "r" : "", stream);
16502 return;
16504 /* Memory operand for vld1/vst1 instruction. */
16505 case 'A':
16507 rtx addr;
16508 bool postinc = FALSE;
16509 unsigned align, memsize, align_bits;
16511 gcc_assert (GET_CODE (x) == MEM);
16512 addr = XEXP (x, 0);
16513 if (GET_CODE (addr) == POST_INC)
16515 postinc = 1;
16516 addr = XEXP (addr, 0);
16518 asm_fprintf (stream, "[%r", REGNO (addr));
16520 /* We know the alignment of this access, so we can emit a hint in the
16521 instruction (for some alignments) as an aid to the memory subsystem
16522 of the target. */
16523 align = MEM_ALIGN (x) >> 3;
16524 memsize = INTVAL (MEM_SIZE (x));
16526 /* Only certain alignment specifiers are supported by the hardware. */
16527 if (memsize == 16 && (align % 32) == 0)
16528 align_bits = 256;
16529 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
16530 align_bits = 128;
16531 else if ((align % 8) == 0)
16532 align_bits = 64;
16533 else
16534 align_bits = 0;
16536 if (align_bits != 0)
16537 asm_fprintf (stream, ":%d", align_bits);
16539 asm_fprintf (stream, "]");
16541 if (postinc)
16542 fputs("!", stream);
16544 return;
16546 case 'C':
16548 rtx addr;
16550 gcc_assert (GET_CODE (x) == MEM);
16551 addr = XEXP (x, 0);
16552 gcc_assert (GET_CODE (addr) == REG);
16553 asm_fprintf (stream, "[%r]", REGNO (addr));
16555 return;
16557 /* Translate an S register number into a D register number and element index. */
16558 case 'y':
16560 int mode = GET_MODE (x);
16561 int regno;
16563 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16565 output_operand_lossage ("invalid operand for code '%c'", code);
16566 return;
16569 regno = REGNO (x);
16570 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16572 output_operand_lossage ("invalid operand for code '%c'", code);
16573 return;
16576 regno = regno - FIRST_VFP_REGNUM;
16577 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16579 return;
16581 /* Register specifier for vld1.16/vst1.16. Translate the S register
16582 number into a D register number and element index. */
16583 case 'z':
16585 int mode = GET_MODE (x);
16586 int regno;
16588 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16590 output_operand_lossage ("invalid operand for code '%c'", code);
16591 return;
16594 regno = REGNO (x);
16595 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16597 output_operand_lossage ("invalid operand for code '%c'", code);
16598 return;
16601 regno = regno - FIRST_VFP_REGNUM;
16602 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16604 return;
16606 default:
16607 if (x == 0)
16609 output_operand_lossage ("missing operand");
16610 return;
16613 switch (GET_CODE (x))
16615 case REG:
16616 asm_fprintf (stream, "%r", REGNO (x));
16617 break;
16619 case MEM:
16620 output_memory_reference_mode = GET_MODE (x);
16621 output_address (XEXP (x, 0));
16622 break;
16624 case CONST_DOUBLE:
16625 if (TARGET_NEON)
16627 char fpstr[20];
16628 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16629 sizeof (fpstr), 0, 1);
16630 fprintf (stream, "#%s", fpstr);
16632 else
16633 fprintf (stream, "#%s", fp_immediate_constant (x));
16634 break;
16636 default:
16637 gcc_assert (GET_CODE (x) != NEG);
16638 fputc ('#', stream);
16639 if (GET_CODE (x) == HIGH)
16641 fputs (":lower16:", stream);
16642 x = XEXP (x, 0);
16645 output_addr_const (stream, x);
16646 break;
16651 /* Target hook for printing a memory address. */
16652 static void
16653 arm_print_operand_address (FILE *stream, rtx x)
16655 if (TARGET_32BIT)
16657 int is_minus = GET_CODE (x) == MINUS;
16659 if (GET_CODE (x) == REG)
16660 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16661 else if (GET_CODE (x) == PLUS || is_minus)
16663 rtx base = XEXP (x, 0);
16664 rtx index = XEXP (x, 1);
16665 HOST_WIDE_INT offset = 0;
16666 if (GET_CODE (base) != REG
16667 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16669 /* Ensure that BASE is a register. */
16670 /* (one of them must be). */
16671 /* Also ensure the SP is not used as in index register. */
16672 rtx temp = base;
16673 base = index;
16674 index = temp;
16676 switch (GET_CODE (index))
16678 case CONST_INT:
16679 offset = INTVAL (index);
16680 if (is_minus)
16681 offset = -offset;
16682 asm_fprintf (stream, "[%r, #%wd]",
16683 REGNO (base), offset);
16684 break;
16686 case REG:
16687 asm_fprintf (stream, "[%r, %s%r]",
16688 REGNO (base), is_minus ? "-" : "",
16689 REGNO (index));
16690 break;
16692 case MULT:
16693 case ASHIFTRT:
16694 case LSHIFTRT:
16695 case ASHIFT:
16696 case ROTATERT:
16698 asm_fprintf (stream, "[%r, %s%r",
16699 REGNO (base), is_minus ? "-" : "",
16700 REGNO (XEXP (index, 0)));
16701 arm_print_operand (stream, index, 'S');
16702 fputs ("]", stream);
16703 break;
16706 default:
16707 gcc_unreachable ();
16710 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16711 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16713 extern enum machine_mode output_memory_reference_mode;
16715 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16717 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16718 asm_fprintf (stream, "[%r, #%s%d]!",
16719 REGNO (XEXP (x, 0)),
16720 GET_CODE (x) == PRE_DEC ? "-" : "",
16721 GET_MODE_SIZE (output_memory_reference_mode));
16722 else
16723 asm_fprintf (stream, "[%r], #%s%d",
16724 REGNO (XEXP (x, 0)),
16725 GET_CODE (x) == POST_DEC ? "-" : "",
16726 GET_MODE_SIZE (output_memory_reference_mode));
16728 else if (GET_CODE (x) == PRE_MODIFY)
16730 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16731 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16732 asm_fprintf (stream, "#%wd]!",
16733 INTVAL (XEXP (XEXP (x, 1), 1)));
16734 else
16735 asm_fprintf (stream, "%r]!",
16736 REGNO (XEXP (XEXP (x, 1), 1)));
16738 else if (GET_CODE (x) == POST_MODIFY)
16740 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16741 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16742 asm_fprintf (stream, "#%wd",
16743 INTVAL (XEXP (XEXP (x, 1), 1)));
16744 else
16745 asm_fprintf (stream, "%r",
16746 REGNO (XEXP (XEXP (x, 1), 1)));
16748 else output_addr_const (stream, x);
16750 else
16752 if (GET_CODE (x) == REG)
16753 asm_fprintf (stream, "[%r]", REGNO (x));
16754 else if (GET_CODE (x) == POST_INC)
16755 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16756 else if (GET_CODE (x) == PLUS)
16758 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16759 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16760 asm_fprintf (stream, "[%r, #%wd]",
16761 REGNO (XEXP (x, 0)),
16762 INTVAL (XEXP (x, 1)));
16763 else
16764 asm_fprintf (stream, "[%r, %r]",
16765 REGNO (XEXP (x, 0)),
16766 REGNO (XEXP (x, 1)));
16768 else
16769 output_addr_const (stream, x);
16773 /* Target hook for indicating whether a punctuation character for
16774 TARGET_PRINT_OPERAND is valid. */
16775 static bool
16776 arm_print_operand_punct_valid_p (unsigned char code)
16778 return (code == '@' || code == '|' || code == '.'
16779 || code == '(' || code == ')' || code == '#'
16780 || (TARGET_32BIT && (code == '?'))
16781 || (TARGET_THUMB2 && (code == '!'))
16782 || (TARGET_THUMB && (code == '_')));
16785 /* Target hook for assembling integer objects. The ARM version needs to
16786 handle word-sized values specially. */
16787 static bool
16788 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16790 enum machine_mode mode;
16792 if (size == UNITS_PER_WORD && aligned_p)
16794 fputs ("\t.word\t", asm_out_file);
16795 output_addr_const (asm_out_file, x);
16797 /* Mark symbols as position independent. We only do this in the
16798 .text segment, not in the .data segment. */
16799 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16800 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16802 /* See legitimize_pic_address for an explanation of the
16803 TARGET_VXWORKS_RTP check. */
16804 if (TARGET_VXWORKS_RTP
16805 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16806 fputs ("(GOT)", asm_out_file);
16807 else
16808 fputs ("(GOTOFF)", asm_out_file);
16810 fputc ('\n', asm_out_file);
16811 return true;
16814 mode = GET_MODE (x);
16816 if (arm_vector_mode_supported_p (mode))
16818 int i, units;
16820 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16822 units = CONST_VECTOR_NUNITS (x);
16823 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16825 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16826 for (i = 0; i < units; i++)
16828 rtx elt = CONST_VECTOR_ELT (x, i);
16829 assemble_integer
16830 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16832 else
16833 for (i = 0; i < units; i++)
16835 rtx elt = CONST_VECTOR_ELT (x, i);
16836 REAL_VALUE_TYPE rval;
16838 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16840 assemble_real
16841 (rval, GET_MODE_INNER (mode),
16842 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16845 return true;
16848 return default_assemble_integer (x, size, aligned_p);
16851 static void
16852 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16854 section *s;
16856 if (!TARGET_AAPCS_BASED)
16858 (is_ctor ?
16859 default_named_section_asm_out_constructor
16860 : default_named_section_asm_out_destructor) (symbol, priority);
16861 return;
16864 /* Put these in the .init_array section, using a special relocation. */
16865 if (priority != DEFAULT_INIT_PRIORITY)
16867 char buf[18];
16868 sprintf (buf, "%s.%.5u",
16869 is_ctor ? ".init_array" : ".fini_array",
16870 priority);
16871 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16873 else if (is_ctor)
16874 s = ctors_section;
16875 else
16876 s = dtors_section;
16878 switch_to_section (s);
16879 assemble_align (POINTER_SIZE);
16880 fputs ("\t.word\t", asm_out_file);
16881 output_addr_const (asm_out_file, symbol);
16882 fputs ("(target1)\n", asm_out_file);
16885 /* Add a function to the list of static constructors. */
16887 static void
16888 arm_elf_asm_constructor (rtx symbol, int priority)
16890 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16893 /* Add a function to the list of static destructors. */
16895 static void
16896 arm_elf_asm_destructor (rtx symbol, int priority)
16898 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16901 /* A finite state machine takes care of noticing whether or not instructions
16902 can be conditionally executed, and thus decrease execution time and code
16903 size by deleting branch instructions. The fsm is controlled by
16904 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16906 /* The state of the fsm controlling condition codes are:
16907 0: normal, do nothing special
16908 1: make ASM_OUTPUT_OPCODE not output this instruction
16909 2: make ASM_OUTPUT_OPCODE not output this instruction
16910 3: make instructions conditional
16911 4: make instructions conditional
16913 State transitions (state->state by whom under condition):
16914 0 -> 1 final_prescan_insn if the `target' is a label
16915 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16916 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16917 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16918 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16919 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16920 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16921 (the target insn is arm_target_insn).
16923 If the jump clobbers the conditions then we use states 2 and 4.
16925 A similar thing can be done with conditional return insns.
16927 XXX In case the `target' is an unconditional branch, this conditionalising
16928 of the instructions always reduces code size, but not always execution
16929 time. But then, I want to reduce the code size to somewhere near what
16930 /bin/cc produces. */
16932 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16933 instructions. When a COND_EXEC instruction is seen the subsequent
16934 instructions are scanned so that multiple conditional instructions can be
16935 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16936 specify the length and true/false mask for the IT block. These will be
16937 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16939 /* Returns the index of the ARM condition code string in
16940 `arm_condition_codes'. COMPARISON should be an rtx like
16941 `(eq (...) (...))'. */
16942 static enum arm_cond_code
16943 get_arm_condition_code (rtx comparison)
16945 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16946 enum arm_cond_code code;
16947 enum rtx_code comp_code = GET_CODE (comparison);
16949 if (GET_MODE_CLASS (mode) != MODE_CC)
16950 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16951 XEXP (comparison, 1));
16953 switch (mode)
16955 case CC_DNEmode: code = ARM_NE; goto dominance;
16956 case CC_DEQmode: code = ARM_EQ; goto dominance;
16957 case CC_DGEmode: code = ARM_GE; goto dominance;
16958 case CC_DGTmode: code = ARM_GT; goto dominance;
16959 case CC_DLEmode: code = ARM_LE; goto dominance;
16960 case CC_DLTmode: code = ARM_LT; goto dominance;
16961 case CC_DGEUmode: code = ARM_CS; goto dominance;
16962 case CC_DGTUmode: code = ARM_HI; goto dominance;
16963 case CC_DLEUmode: code = ARM_LS; goto dominance;
16964 case CC_DLTUmode: code = ARM_CC;
16966 dominance:
16967 gcc_assert (comp_code == EQ || comp_code == NE);
16969 if (comp_code == EQ)
16970 return ARM_INVERSE_CONDITION_CODE (code);
16971 return code;
16973 case CC_NOOVmode:
16974 switch (comp_code)
16976 case NE: return ARM_NE;
16977 case EQ: return ARM_EQ;
16978 case GE: return ARM_PL;
16979 case LT: return ARM_MI;
16980 default: gcc_unreachable ();
16983 case CC_Zmode:
16984 switch (comp_code)
16986 case NE: return ARM_NE;
16987 case EQ: return ARM_EQ;
16988 default: gcc_unreachable ();
16991 case CC_Nmode:
16992 switch (comp_code)
16994 case NE: return ARM_MI;
16995 case EQ: return ARM_PL;
16996 default: gcc_unreachable ();
16999 case CCFPEmode:
17000 case CCFPmode:
17001 /* These encodings assume that AC=1 in the FPA system control
17002 byte. This allows us to handle all cases except UNEQ and
17003 LTGT. */
17004 switch (comp_code)
17006 case GE: return ARM_GE;
17007 case GT: return ARM_GT;
17008 case LE: return ARM_LS;
17009 case LT: return ARM_MI;
17010 case NE: return ARM_NE;
17011 case EQ: return ARM_EQ;
17012 case ORDERED: return ARM_VC;
17013 case UNORDERED: return ARM_VS;
17014 case UNLT: return ARM_LT;
17015 case UNLE: return ARM_LE;
17016 case UNGT: return ARM_HI;
17017 case UNGE: return ARM_PL;
17018 /* UNEQ and LTGT do not have a representation. */
17019 case UNEQ: /* Fall through. */
17020 case LTGT: /* Fall through. */
17021 default: gcc_unreachable ();
17024 case CC_SWPmode:
17025 switch (comp_code)
17027 case NE: return ARM_NE;
17028 case EQ: return ARM_EQ;
17029 case GE: return ARM_LE;
17030 case GT: return ARM_LT;
17031 case LE: return ARM_GE;
17032 case LT: return ARM_GT;
17033 case GEU: return ARM_LS;
17034 case GTU: return ARM_CC;
17035 case LEU: return ARM_CS;
17036 case LTU: return ARM_HI;
17037 default: gcc_unreachable ();
17040 case CC_Cmode:
17041 switch (comp_code)
17043 case LTU: return ARM_CS;
17044 case GEU: return ARM_CC;
17045 default: gcc_unreachable ();
17048 case CC_CZmode:
17049 switch (comp_code)
17051 case NE: return ARM_NE;
17052 case EQ: return ARM_EQ;
17053 case GEU: return ARM_CS;
17054 case GTU: return ARM_HI;
17055 case LEU: return ARM_LS;
17056 case LTU: return ARM_CC;
17057 default: gcc_unreachable ();
17060 case CC_NCVmode:
17061 switch (comp_code)
17063 case GE: return ARM_GE;
17064 case LT: return ARM_LT;
17065 case GEU: return ARM_CS;
17066 case LTU: return ARM_CC;
17067 default: gcc_unreachable ();
17070 case CCmode:
17071 switch (comp_code)
17073 case NE: return ARM_NE;
17074 case EQ: return ARM_EQ;
17075 case GE: return ARM_GE;
17076 case GT: return ARM_GT;
17077 case LE: return ARM_LE;
17078 case LT: return ARM_LT;
17079 case GEU: return ARM_CS;
17080 case GTU: return ARM_HI;
17081 case LEU: return ARM_LS;
17082 case LTU: return ARM_CC;
17083 default: gcc_unreachable ();
17086 default: gcc_unreachable ();
17090 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17091 instructions. */
17092 void
17093 thumb2_final_prescan_insn (rtx insn)
17095 rtx first_insn = insn;
17096 rtx body = PATTERN (insn);
17097 rtx predicate;
17098 enum arm_cond_code code;
17099 int n;
17100 int mask;
17102 /* Remove the previous insn from the count of insns to be output. */
17103 if (arm_condexec_count)
17104 arm_condexec_count--;
17106 /* Nothing to do if we are already inside a conditional block. */
17107 if (arm_condexec_count)
17108 return;
17110 if (GET_CODE (body) != COND_EXEC)
17111 return;
17113 /* Conditional jumps are implemented directly. */
17114 if (GET_CODE (insn) == JUMP_INSN)
17115 return;
17117 predicate = COND_EXEC_TEST (body);
17118 arm_current_cc = get_arm_condition_code (predicate);
17120 n = get_attr_ce_count (insn);
17121 arm_condexec_count = 1;
17122 arm_condexec_mask = (1 << n) - 1;
17123 arm_condexec_masklen = n;
17124 /* See if subsequent instructions can be combined into the same block. */
17125 for (;;)
17127 insn = next_nonnote_insn (insn);
17129 /* Jumping into the middle of an IT block is illegal, so a label or
17130 barrier terminates the block. */
17131 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17132 break;
17134 body = PATTERN (insn);
17135 /* USE and CLOBBER aren't really insns, so just skip them. */
17136 if (GET_CODE (body) == USE
17137 || GET_CODE (body) == CLOBBER)
17138 continue;
17140 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17141 if (GET_CODE (body) != COND_EXEC)
17142 break;
17143 /* Allow up to 4 conditionally executed instructions in a block. */
17144 n = get_attr_ce_count (insn);
17145 if (arm_condexec_masklen + n > 4)
17146 break;
17148 predicate = COND_EXEC_TEST (body);
17149 code = get_arm_condition_code (predicate);
17150 mask = (1 << n) - 1;
17151 if (arm_current_cc == code)
17152 arm_condexec_mask |= (mask << arm_condexec_masklen);
17153 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17154 break;
17156 arm_condexec_count++;
17157 arm_condexec_masklen += n;
17159 /* A jump must be the last instruction in a conditional block. */
17160 if (GET_CODE(insn) == JUMP_INSN)
17161 break;
17163 /* Restore recog_data (getting the attributes of other insns can
17164 destroy this array, but final.c assumes that it remains intact
17165 across this call). */
17166 extract_constrain_insn_cached (first_insn);
17169 void
17170 arm_final_prescan_insn (rtx insn)
17172 /* BODY will hold the body of INSN. */
17173 rtx body = PATTERN (insn);
17175 /* This will be 1 if trying to repeat the trick, and things need to be
17176 reversed if it appears to fail. */
17177 int reverse = 0;
17179 /* If we start with a return insn, we only succeed if we find another one. */
17180 int seeking_return = 0;
17182 /* START_INSN will hold the insn from where we start looking. This is the
17183 first insn after the following code_label if REVERSE is true. */
17184 rtx start_insn = insn;
17186 /* If in state 4, check if the target branch is reached, in order to
17187 change back to state 0. */
17188 if (arm_ccfsm_state == 4)
17190 if (insn == arm_target_insn)
17192 arm_target_insn = NULL;
17193 arm_ccfsm_state = 0;
17195 return;
17198 /* If in state 3, it is possible to repeat the trick, if this insn is an
17199 unconditional branch to a label, and immediately following this branch
17200 is the previous target label which is only used once, and the label this
17201 branch jumps to is not too far off. */
17202 if (arm_ccfsm_state == 3)
17204 if (simplejump_p (insn))
17206 start_insn = next_nonnote_insn (start_insn);
17207 if (GET_CODE (start_insn) == BARRIER)
17209 /* XXX Isn't this always a barrier? */
17210 start_insn = next_nonnote_insn (start_insn);
17212 if (GET_CODE (start_insn) == CODE_LABEL
17213 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17214 && LABEL_NUSES (start_insn) == 1)
17215 reverse = TRUE;
17216 else
17217 return;
17219 else if (GET_CODE (body) == RETURN)
17221 start_insn = next_nonnote_insn (start_insn);
17222 if (GET_CODE (start_insn) == BARRIER)
17223 start_insn = next_nonnote_insn (start_insn);
17224 if (GET_CODE (start_insn) == CODE_LABEL
17225 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17226 && LABEL_NUSES (start_insn) == 1)
17228 reverse = TRUE;
17229 seeking_return = 1;
17231 else
17232 return;
17234 else
17235 return;
17238 gcc_assert (!arm_ccfsm_state || reverse);
17239 if (GET_CODE (insn) != JUMP_INSN)
17240 return;
17242 /* This jump might be paralleled with a clobber of the condition codes
17243 the jump should always come first */
17244 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17245 body = XVECEXP (body, 0, 0);
17247 if (reverse
17248 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17249 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17251 int insns_skipped;
17252 int fail = FALSE, succeed = FALSE;
17253 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17254 int then_not_else = TRUE;
17255 rtx this_insn = start_insn, label = 0;
17257 /* Register the insn jumped to. */
17258 if (reverse)
17260 if (!seeking_return)
17261 label = XEXP (SET_SRC (body), 0);
17263 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17264 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17265 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17267 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17268 then_not_else = FALSE;
17270 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17271 seeking_return = 1;
17272 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17274 seeking_return = 1;
17275 then_not_else = FALSE;
17277 else
17278 gcc_unreachable ();
17280 /* See how many insns this branch skips, and what kind of insns. If all
17281 insns are okay, and the label or unconditional branch to the same
17282 label is not too far away, succeed. */
17283 for (insns_skipped = 0;
17284 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17286 rtx scanbody;
17288 this_insn = next_nonnote_insn (this_insn);
17289 if (!this_insn)
17290 break;
17292 switch (GET_CODE (this_insn))
17294 case CODE_LABEL:
17295 /* Succeed if it is the target label, otherwise fail since
17296 control falls in from somewhere else. */
17297 if (this_insn == label)
17299 arm_ccfsm_state = 1;
17300 succeed = TRUE;
17302 else
17303 fail = TRUE;
17304 break;
17306 case BARRIER:
17307 /* Succeed if the following insn is the target label.
17308 Otherwise fail.
17309 If return insns are used then the last insn in a function
17310 will be a barrier. */
17311 this_insn = next_nonnote_insn (this_insn);
17312 if (this_insn && this_insn == label)
17314 arm_ccfsm_state = 1;
17315 succeed = TRUE;
17317 else
17318 fail = TRUE;
17319 break;
17321 case CALL_INSN:
17322 /* The AAPCS says that conditional calls should not be
17323 used since they make interworking inefficient (the
17324 linker can't transform BL<cond> into BLX). That's
17325 only a problem if the machine has BLX. */
17326 if (arm_arch5)
17328 fail = TRUE;
17329 break;
17332 /* Succeed if the following insn is the target label, or
17333 if the following two insns are a barrier and the
17334 target label. */
17335 this_insn = next_nonnote_insn (this_insn);
17336 if (this_insn && GET_CODE (this_insn) == BARRIER)
17337 this_insn = next_nonnote_insn (this_insn);
17339 if (this_insn && this_insn == label
17340 && insns_skipped < max_insns_skipped)
17342 arm_ccfsm_state = 1;
17343 succeed = TRUE;
17345 else
17346 fail = TRUE;
17347 break;
17349 case JUMP_INSN:
17350 /* If this is an unconditional branch to the same label, succeed.
17351 If it is to another label, do nothing. If it is conditional,
17352 fail. */
17353 /* XXX Probably, the tests for SET and the PC are
17354 unnecessary. */
17356 scanbody = PATTERN (this_insn);
17357 if (GET_CODE (scanbody) == SET
17358 && GET_CODE (SET_DEST (scanbody)) == PC)
17360 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17361 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17363 arm_ccfsm_state = 2;
17364 succeed = TRUE;
17366 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17367 fail = TRUE;
17369 /* Fail if a conditional return is undesirable (e.g. on a
17370 StrongARM), but still allow this if optimizing for size. */
17371 else if (GET_CODE (scanbody) == RETURN
17372 && !use_return_insn (TRUE, NULL)
17373 && !optimize_size)
17374 fail = TRUE;
17375 else if (GET_CODE (scanbody) == RETURN
17376 && seeking_return)
17378 arm_ccfsm_state = 2;
17379 succeed = TRUE;
17381 else if (GET_CODE (scanbody) == PARALLEL)
17383 switch (get_attr_conds (this_insn))
17385 case CONDS_NOCOND:
17386 break;
17387 default:
17388 fail = TRUE;
17389 break;
17392 else
17393 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17395 break;
17397 case INSN:
17398 /* Instructions using or affecting the condition codes make it
17399 fail. */
17400 scanbody = PATTERN (this_insn);
17401 if (!(GET_CODE (scanbody) == SET
17402 || GET_CODE (scanbody) == PARALLEL)
17403 || get_attr_conds (this_insn) != CONDS_NOCOND)
17404 fail = TRUE;
17406 /* A conditional cirrus instruction must be followed by
17407 a non Cirrus instruction. However, since we
17408 conditionalize instructions in this function and by
17409 the time we get here we can't add instructions
17410 (nops), because shorten_branches() has already been
17411 called, we will disable conditionalizing Cirrus
17412 instructions to be safe. */
17413 if (GET_CODE (scanbody) != USE
17414 && GET_CODE (scanbody) != CLOBBER
17415 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17416 fail = TRUE;
17417 break;
17419 default:
17420 break;
17423 if (succeed)
17425 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17426 arm_target_label = CODE_LABEL_NUMBER (label);
17427 else
17429 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17431 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17433 this_insn = next_nonnote_insn (this_insn);
17434 gcc_assert (!this_insn
17435 || (GET_CODE (this_insn) != BARRIER
17436 && GET_CODE (this_insn) != CODE_LABEL));
17438 if (!this_insn)
17440 /* Oh, dear! we ran off the end.. give up. */
17441 extract_constrain_insn_cached (insn);
17442 arm_ccfsm_state = 0;
17443 arm_target_insn = NULL;
17444 return;
17446 arm_target_insn = this_insn;
17449 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17450 what it was. */
17451 if (!reverse)
17452 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17454 if (reverse || then_not_else)
17455 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17458 /* Restore recog_data (getting the attributes of other insns can
17459 destroy this array, but final.c assumes that it remains intact
17460 across this call. */
17461 extract_constrain_insn_cached (insn);
17465 /* Output IT instructions. */
17466 void
17467 thumb2_asm_output_opcode (FILE * stream)
17469 char buff[5];
17470 int n;
17472 if (arm_condexec_mask)
17474 for (n = 0; n < arm_condexec_masklen; n++)
17475 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17476 buff[n] = 0;
17477 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17478 arm_condition_codes[arm_current_cc]);
17479 arm_condexec_mask = 0;
17483 /* Returns true if REGNO is a valid register
17484 for holding a quantity of type MODE. */
17486 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17488 if (GET_MODE_CLASS (mode) == MODE_CC)
17489 return (regno == CC_REGNUM
17490 || (TARGET_HARD_FLOAT && TARGET_VFP
17491 && regno == VFPCC_REGNUM));
17493 if (TARGET_THUMB1)
17494 /* For the Thumb we only allow values bigger than SImode in
17495 registers 0 - 6, so that there is always a second low
17496 register available to hold the upper part of the value.
17497 We probably we ought to ensure that the register is the
17498 start of an even numbered register pair. */
17499 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17501 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17502 && IS_CIRRUS_REGNUM (regno))
17503 /* We have outlawed SI values in Cirrus registers because they
17504 reside in the lower 32 bits, but SF values reside in the
17505 upper 32 bits. This causes gcc all sorts of grief. We can't
17506 even split the registers into pairs because Cirrus SI values
17507 get sign extended to 64bits-- aldyh. */
17508 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17510 if (TARGET_HARD_FLOAT && TARGET_VFP
17511 && IS_VFP_REGNUM (regno))
17513 if (mode == SFmode || mode == SImode)
17514 return VFP_REGNO_OK_FOR_SINGLE (regno);
17516 if (mode == DFmode)
17517 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17519 /* VFP registers can hold HFmode values, but there is no point in
17520 putting them there unless we have hardware conversion insns. */
17521 if (mode == HFmode)
17522 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17524 if (TARGET_NEON)
17525 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17526 || (VALID_NEON_QREG_MODE (mode)
17527 && NEON_REGNO_OK_FOR_QUAD (regno))
17528 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17529 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17530 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17531 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17532 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17534 return FALSE;
17537 if (TARGET_REALLY_IWMMXT)
17539 if (IS_IWMMXT_GR_REGNUM (regno))
17540 return mode == SImode;
17542 if (IS_IWMMXT_REGNUM (regno))
17543 return VALID_IWMMXT_REG_MODE (mode);
17546 /* We allow almost any value to be stored in the general registers.
17547 Restrict doubleword quantities to even register pairs so that we can
17548 use ldrd. Do not allow very large Neon structure opaque modes in
17549 general registers; they would use too many. */
17550 if (regno <= LAST_ARM_REGNUM)
17551 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17552 && ARM_NUM_REGS (mode) <= 4;
17554 if (regno == FRAME_POINTER_REGNUM
17555 || regno == ARG_POINTER_REGNUM)
17556 /* We only allow integers in the fake hard registers. */
17557 return GET_MODE_CLASS (mode) == MODE_INT;
17559 /* The only registers left are the FPA registers
17560 which we only allow to hold FP values. */
17561 return (TARGET_HARD_FLOAT && TARGET_FPA
17562 && GET_MODE_CLASS (mode) == MODE_FLOAT
17563 && regno >= FIRST_FPA_REGNUM
17564 && regno <= LAST_FPA_REGNUM);
17567 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17568 not used in arm mode. */
17570 enum reg_class
17571 arm_regno_class (int regno)
17573 if (TARGET_THUMB1)
17575 if (regno == STACK_POINTER_REGNUM)
17576 return STACK_REG;
17577 if (regno == CC_REGNUM)
17578 return CC_REG;
17579 if (regno < 8)
17580 return LO_REGS;
17581 return HI_REGS;
17584 if (TARGET_THUMB2 && regno < 8)
17585 return LO_REGS;
17587 if ( regno <= LAST_ARM_REGNUM
17588 || regno == FRAME_POINTER_REGNUM
17589 || regno == ARG_POINTER_REGNUM)
17590 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17592 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17593 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17595 if (IS_CIRRUS_REGNUM (regno))
17596 return CIRRUS_REGS;
17598 if (IS_VFP_REGNUM (regno))
17600 if (regno <= D7_VFP_REGNUM)
17601 return VFP_D0_D7_REGS;
17602 else if (regno <= LAST_LO_VFP_REGNUM)
17603 return VFP_LO_REGS;
17604 else
17605 return VFP_HI_REGS;
17608 if (IS_IWMMXT_REGNUM (regno))
17609 return IWMMXT_REGS;
17611 if (IS_IWMMXT_GR_REGNUM (regno))
17612 return IWMMXT_GR_REGS;
17614 return FPA_REGS;
17617 /* Handle a special case when computing the offset
17618 of an argument from the frame pointer. */
17620 arm_debugger_arg_offset (int value, rtx addr)
17622 rtx insn;
17624 /* We are only interested if dbxout_parms() failed to compute the offset. */
17625 if (value != 0)
17626 return 0;
17628 /* We can only cope with the case where the address is held in a register. */
17629 if (GET_CODE (addr) != REG)
17630 return 0;
17632 /* If we are using the frame pointer to point at the argument, then
17633 an offset of 0 is correct. */
17634 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17635 return 0;
17637 /* If we are using the stack pointer to point at the
17638 argument, then an offset of 0 is correct. */
17639 /* ??? Check this is consistent with thumb2 frame layout. */
17640 if ((TARGET_THUMB || !frame_pointer_needed)
17641 && REGNO (addr) == SP_REGNUM)
17642 return 0;
17644 /* Oh dear. The argument is pointed to by a register rather
17645 than being held in a register, or being stored at a known
17646 offset from the frame pointer. Since GDB only understands
17647 those two kinds of argument we must translate the address
17648 held in the register into an offset from the frame pointer.
17649 We do this by searching through the insns for the function
17650 looking to see where this register gets its value. If the
17651 register is initialized from the frame pointer plus an offset
17652 then we are in luck and we can continue, otherwise we give up.
17654 This code is exercised by producing debugging information
17655 for a function with arguments like this:
17657 double func (double a, double b, int c, double d) {return d;}
17659 Without this code the stab for parameter 'd' will be set to
17660 an offset of 0 from the frame pointer, rather than 8. */
17662 /* The if() statement says:
17664 If the insn is a normal instruction
17665 and if the insn is setting the value in a register
17666 and if the register being set is the register holding the address of the argument
17667 and if the address is computing by an addition
17668 that involves adding to a register
17669 which is the frame pointer
17670 a constant integer
17672 then... */
17674 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17676 if ( GET_CODE (insn) == INSN
17677 && GET_CODE (PATTERN (insn)) == SET
17678 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17679 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17680 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17681 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17682 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17685 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17687 break;
17691 if (value == 0)
17693 debug_rtx (addr);
17694 warning (0, "unable to compute real location of stacked parameter");
17695 value = 8; /* XXX magic hack */
17698 return value;
17701 typedef enum {
17702 T_V8QI,
17703 T_V4HI,
17704 T_V2SI,
17705 T_V2SF,
17706 T_DI,
17707 T_V16QI,
17708 T_V8HI,
17709 T_V4SI,
17710 T_V4SF,
17711 T_V2DI,
17712 T_TI,
17713 T_EI,
17714 T_OI,
17715 T_MAX /* Size of enum. Keep last. */
17716 } neon_builtin_type_mode;
17718 #define TYPE_MODE_BIT(X) (1 << (X))
17720 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
17721 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
17722 | TYPE_MODE_BIT (T_DI))
17723 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
17724 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
17725 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
17727 #define v8qi_UP T_V8QI
17728 #define v4hi_UP T_V4HI
17729 #define v2si_UP T_V2SI
17730 #define v2sf_UP T_V2SF
17731 #define di_UP T_DI
17732 #define v16qi_UP T_V16QI
17733 #define v8hi_UP T_V8HI
17734 #define v4si_UP T_V4SI
17735 #define v4sf_UP T_V4SF
17736 #define v2di_UP T_V2DI
17737 #define ti_UP T_TI
17738 #define ei_UP T_EI
17739 #define oi_UP T_OI
17741 #define UP(X) X##_UP
17743 typedef enum {
17744 NEON_BINOP,
17745 NEON_TERNOP,
17746 NEON_UNOP,
17747 NEON_GETLANE,
17748 NEON_SETLANE,
17749 NEON_CREATE,
17750 NEON_DUP,
17751 NEON_DUPLANE,
17752 NEON_COMBINE,
17753 NEON_SPLIT,
17754 NEON_LANEMUL,
17755 NEON_LANEMULL,
17756 NEON_LANEMULH,
17757 NEON_LANEMAC,
17758 NEON_SCALARMUL,
17759 NEON_SCALARMULL,
17760 NEON_SCALARMULH,
17761 NEON_SCALARMAC,
17762 NEON_CONVERT,
17763 NEON_FIXCONV,
17764 NEON_SELECT,
17765 NEON_RESULTPAIR,
17766 NEON_REINTERP,
17767 NEON_VTBL,
17768 NEON_VTBX,
17769 NEON_LOAD1,
17770 NEON_LOAD1LANE,
17771 NEON_STORE1,
17772 NEON_STORE1LANE,
17773 NEON_LOADSTRUCT,
17774 NEON_LOADSTRUCTLANE,
17775 NEON_STORESTRUCT,
17776 NEON_STORESTRUCTLANE,
17777 NEON_LOGICBINOP,
17778 NEON_SHIFTINSERT,
17779 NEON_SHIFTIMM,
17780 NEON_SHIFTACC
17781 } neon_itype;
17783 typedef struct {
17784 const char *name;
17785 const neon_itype itype;
17786 const neon_builtin_type_mode mode;
17787 const enum insn_code code;
17788 unsigned int fcode;
17789 } neon_builtin_datum;
17791 #define CF(N,X) CODE_FOR_neon_##N##X
17793 #define VAR1(T, N, A) \
17794 {#N, NEON_##T, UP (A), CF (N, A), 0}
17795 #define VAR2(T, N, A, B) \
17796 VAR1 (T, N, A), \
17797 {#N, NEON_##T, UP (B), CF (N, B), 0}
17798 #define VAR3(T, N, A, B, C) \
17799 VAR2 (T, N, A, B), \
17800 {#N, NEON_##T, UP (C), CF (N, C), 0}
17801 #define VAR4(T, N, A, B, C, D) \
17802 VAR3 (T, N, A, B, C), \
17803 {#N, NEON_##T, UP (D), CF (N, D), 0}
17804 #define VAR5(T, N, A, B, C, D, E) \
17805 VAR4 (T, N, A, B, C, D), \
17806 {#N, NEON_##T, UP (E), CF (N, E), 0}
17807 #define VAR6(T, N, A, B, C, D, E, F) \
17808 VAR5 (T, N, A, B, C, D, E), \
17809 {#N, NEON_##T, UP (F), CF (N, F), 0}
17810 #define VAR7(T, N, A, B, C, D, E, F, G) \
17811 VAR6 (T, N, A, B, C, D, E, F), \
17812 {#N, NEON_##T, UP (G), CF (N, G), 0}
17813 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17814 VAR7 (T, N, A, B, C, D, E, F, G), \
17815 {#N, NEON_##T, UP (H), CF (N, H), 0}
17816 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17817 VAR8 (T, N, A, B, C, D, E, F, G, H), \
17818 {#N, NEON_##T, UP (I), CF (N, I), 0}
17819 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17820 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
17821 {#N, NEON_##T, UP (J), CF (N, J), 0}
17823 /* The mode entries in the following table correspond to the "key" type of the
17824 instruction variant, i.e. equivalent to that which would be specified after
17825 the assembler mnemonic, which usually refers to the last vector operand.
17826 (Signed/unsigned/polynomial types are not differentiated between though, and
17827 are all mapped onto the same mode for a given element size.) The modes
17828 listed per instruction should be the same as those defined for that
17829 instruction's pattern in neon.md. */
17831 static neon_builtin_datum neon_builtin_data[] =
17833 VAR10 (BINOP, vadd,
17834 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17835 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
17836 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
17837 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17838 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17839 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
17840 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17841 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17842 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
17843 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17844 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
17845 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
17846 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
17847 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
17848 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
17849 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
17850 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
17851 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
17852 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
17853 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
17854 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
17855 VAR2 (BINOP, vqdmull, v4hi, v2si),
17856 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17857 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17858 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17859 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
17860 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
17861 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
17862 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17863 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17864 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17865 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
17866 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17867 VAR10 (BINOP, vsub,
17868 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17869 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
17870 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
17871 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17872 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17873 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
17874 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17875 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17876 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17877 VAR2 (BINOP, vcage, v2sf, v4sf),
17878 VAR2 (BINOP, vcagt, v2sf, v4sf),
17879 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17880 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17881 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
17882 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17883 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
17884 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17885 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17886 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
17887 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17888 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17889 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
17890 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
17891 VAR2 (BINOP, vrecps, v2sf, v4sf),
17892 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
17893 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17894 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17895 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17896 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17897 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17898 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17899 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17900 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17901 VAR2 (UNOP, vcnt, v8qi, v16qi),
17902 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
17903 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
17904 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17905 /* FIXME: vget_lane supports more variants than this! */
17906 VAR10 (GETLANE, vget_lane,
17907 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17908 VAR10 (SETLANE, vset_lane,
17909 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17910 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
17911 VAR10 (DUP, vdup_n,
17912 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17913 VAR10 (DUPLANE, vdup_lane,
17914 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17915 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
17916 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
17917 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
17918 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
17919 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
17920 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
17921 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
17922 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17923 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17924 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
17925 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
17926 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17927 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
17928 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
17929 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17930 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17931 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
17932 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
17933 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17934 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
17935 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
17936 VAR10 (BINOP, vext,
17937 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17938 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17939 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
17940 VAR2 (UNOP, vrev16, v8qi, v16qi),
17941 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
17942 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
17943 VAR10 (SELECT, vbsl,
17944 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17945 VAR1 (VTBL, vtbl1, v8qi),
17946 VAR1 (VTBL, vtbl2, v8qi),
17947 VAR1 (VTBL, vtbl3, v8qi),
17948 VAR1 (VTBL, vtbl4, v8qi),
17949 VAR1 (VTBX, vtbx1, v8qi),
17950 VAR1 (VTBX, vtbx2, v8qi),
17951 VAR1 (VTBX, vtbx3, v8qi),
17952 VAR1 (VTBX, vtbx4, v8qi),
17953 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17954 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17955 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17956 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
17957 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
17958 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
17959 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
17960 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
17961 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
17962 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
17963 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
17964 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
17965 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
17966 VAR10 (LOAD1, vld1,
17967 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17968 VAR10 (LOAD1LANE, vld1_lane,
17969 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17970 VAR10 (LOAD1, vld1_dup,
17971 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17972 VAR10 (STORE1, vst1,
17973 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17974 VAR10 (STORE1LANE, vst1_lane,
17975 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17976 VAR9 (LOADSTRUCT,
17977 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17978 VAR7 (LOADSTRUCTLANE, vld2_lane,
17979 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17980 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
17981 VAR9 (STORESTRUCT, vst2,
17982 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17983 VAR7 (STORESTRUCTLANE, vst2_lane,
17984 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17985 VAR9 (LOADSTRUCT,
17986 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17987 VAR7 (LOADSTRUCTLANE, vld3_lane,
17988 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17989 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
17990 VAR9 (STORESTRUCT, vst3,
17991 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17992 VAR7 (STORESTRUCTLANE, vst3_lane,
17993 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17994 VAR9 (LOADSTRUCT, vld4,
17995 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17996 VAR7 (LOADSTRUCTLANE, vld4_lane,
17997 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17998 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
17999 VAR9 (STORESTRUCT, vst4,
18000 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18001 VAR7 (STORESTRUCTLANE, vst4_lane,
18002 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18003 VAR10 (LOGICBINOP, vand,
18004 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18005 VAR10 (LOGICBINOP, vorr,
18006 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18007 VAR10 (BINOP, veor,
18008 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18009 VAR10 (LOGICBINOP, vbic,
18010 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18011 VAR10 (LOGICBINOP, vorn,
18012 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18015 #undef CF
18016 #undef VAR1
18017 #undef VAR2
18018 #undef VAR3
18019 #undef VAR4
18020 #undef VAR5
18021 #undef VAR6
18022 #undef VAR7
18023 #undef VAR8
18024 #undef VAR9
18025 #undef VAR10
18027 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18028 symbolic names defined here (which would require too much duplication).
18029 FIXME? */
18030 enum arm_builtins
18032 ARM_BUILTIN_GETWCX,
18033 ARM_BUILTIN_SETWCX,
18035 ARM_BUILTIN_WZERO,
18037 ARM_BUILTIN_WAVG2BR,
18038 ARM_BUILTIN_WAVG2HR,
18039 ARM_BUILTIN_WAVG2B,
18040 ARM_BUILTIN_WAVG2H,
18042 ARM_BUILTIN_WACCB,
18043 ARM_BUILTIN_WACCH,
18044 ARM_BUILTIN_WACCW,
18046 ARM_BUILTIN_WMACS,
18047 ARM_BUILTIN_WMACSZ,
18048 ARM_BUILTIN_WMACU,
18049 ARM_BUILTIN_WMACUZ,
18051 ARM_BUILTIN_WSADB,
18052 ARM_BUILTIN_WSADBZ,
18053 ARM_BUILTIN_WSADH,
18054 ARM_BUILTIN_WSADHZ,
18056 ARM_BUILTIN_WALIGN,
18058 ARM_BUILTIN_TMIA,
18059 ARM_BUILTIN_TMIAPH,
18060 ARM_BUILTIN_TMIABB,
18061 ARM_BUILTIN_TMIABT,
18062 ARM_BUILTIN_TMIATB,
18063 ARM_BUILTIN_TMIATT,
18065 ARM_BUILTIN_TMOVMSKB,
18066 ARM_BUILTIN_TMOVMSKH,
18067 ARM_BUILTIN_TMOVMSKW,
18069 ARM_BUILTIN_TBCSTB,
18070 ARM_BUILTIN_TBCSTH,
18071 ARM_BUILTIN_TBCSTW,
18073 ARM_BUILTIN_WMADDS,
18074 ARM_BUILTIN_WMADDU,
18076 ARM_BUILTIN_WPACKHSS,
18077 ARM_BUILTIN_WPACKWSS,
18078 ARM_BUILTIN_WPACKDSS,
18079 ARM_BUILTIN_WPACKHUS,
18080 ARM_BUILTIN_WPACKWUS,
18081 ARM_BUILTIN_WPACKDUS,
18083 ARM_BUILTIN_WADDB,
18084 ARM_BUILTIN_WADDH,
18085 ARM_BUILTIN_WADDW,
18086 ARM_BUILTIN_WADDSSB,
18087 ARM_BUILTIN_WADDSSH,
18088 ARM_BUILTIN_WADDSSW,
18089 ARM_BUILTIN_WADDUSB,
18090 ARM_BUILTIN_WADDUSH,
18091 ARM_BUILTIN_WADDUSW,
18092 ARM_BUILTIN_WSUBB,
18093 ARM_BUILTIN_WSUBH,
18094 ARM_BUILTIN_WSUBW,
18095 ARM_BUILTIN_WSUBSSB,
18096 ARM_BUILTIN_WSUBSSH,
18097 ARM_BUILTIN_WSUBSSW,
18098 ARM_BUILTIN_WSUBUSB,
18099 ARM_BUILTIN_WSUBUSH,
18100 ARM_BUILTIN_WSUBUSW,
18102 ARM_BUILTIN_WAND,
18103 ARM_BUILTIN_WANDN,
18104 ARM_BUILTIN_WOR,
18105 ARM_BUILTIN_WXOR,
18107 ARM_BUILTIN_WCMPEQB,
18108 ARM_BUILTIN_WCMPEQH,
18109 ARM_BUILTIN_WCMPEQW,
18110 ARM_BUILTIN_WCMPGTUB,
18111 ARM_BUILTIN_WCMPGTUH,
18112 ARM_BUILTIN_WCMPGTUW,
18113 ARM_BUILTIN_WCMPGTSB,
18114 ARM_BUILTIN_WCMPGTSH,
18115 ARM_BUILTIN_WCMPGTSW,
18117 ARM_BUILTIN_TEXTRMSB,
18118 ARM_BUILTIN_TEXTRMSH,
18119 ARM_BUILTIN_TEXTRMSW,
18120 ARM_BUILTIN_TEXTRMUB,
18121 ARM_BUILTIN_TEXTRMUH,
18122 ARM_BUILTIN_TEXTRMUW,
18123 ARM_BUILTIN_TINSRB,
18124 ARM_BUILTIN_TINSRH,
18125 ARM_BUILTIN_TINSRW,
18127 ARM_BUILTIN_WMAXSW,
18128 ARM_BUILTIN_WMAXSH,
18129 ARM_BUILTIN_WMAXSB,
18130 ARM_BUILTIN_WMAXUW,
18131 ARM_BUILTIN_WMAXUH,
18132 ARM_BUILTIN_WMAXUB,
18133 ARM_BUILTIN_WMINSW,
18134 ARM_BUILTIN_WMINSH,
18135 ARM_BUILTIN_WMINSB,
18136 ARM_BUILTIN_WMINUW,
18137 ARM_BUILTIN_WMINUH,
18138 ARM_BUILTIN_WMINUB,
18140 ARM_BUILTIN_WMULUM,
18141 ARM_BUILTIN_WMULSM,
18142 ARM_BUILTIN_WMULUL,
18144 ARM_BUILTIN_PSADBH,
18145 ARM_BUILTIN_WSHUFH,
18147 ARM_BUILTIN_WSLLH,
18148 ARM_BUILTIN_WSLLW,
18149 ARM_BUILTIN_WSLLD,
18150 ARM_BUILTIN_WSRAH,
18151 ARM_BUILTIN_WSRAW,
18152 ARM_BUILTIN_WSRAD,
18153 ARM_BUILTIN_WSRLH,
18154 ARM_BUILTIN_WSRLW,
18155 ARM_BUILTIN_WSRLD,
18156 ARM_BUILTIN_WRORH,
18157 ARM_BUILTIN_WRORW,
18158 ARM_BUILTIN_WRORD,
18159 ARM_BUILTIN_WSLLHI,
18160 ARM_BUILTIN_WSLLWI,
18161 ARM_BUILTIN_WSLLDI,
18162 ARM_BUILTIN_WSRAHI,
18163 ARM_BUILTIN_WSRAWI,
18164 ARM_BUILTIN_WSRADI,
18165 ARM_BUILTIN_WSRLHI,
18166 ARM_BUILTIN_WSRLWI,
18167 ARM_BUILTIN_WSRLDI,
18168 ARM_BUILTIN_WRORHI,
18169 ARM_BUILTIN_WRORWI,
18170 ARM_BUILTIN_WRORDI,
18172 ARM_BUILTIN_WUNPCKIHB,
18173 ARM_BUILTIN_WUNPCKIHH,
18174 ARM_BUILTIN_WUNPCKIHW,
18175 ARM_BUILTIN_WUNPCKILB,
18176 ARM_BUILTIN_WUNPCKILH,
18177 ARM_BUILTIN_WUNPCKILW,
18179 ARM_BUILTIN_WUNPCKEHSB,
18180 ARM_BUILTIN_WUNPCKEHSH,
18181 ARM_BUILTIN_WUNPCKEHSW,
18182 ARM_BUILTIN_WUNPCKEHUB,
18183 ARM_BUILTIN_WUNPCKEHUH,
18184 ARM_BUILTIN_WUNPCKEHUW,
18185 ARM_BUILTIN_WUNPCKELSB,
18186 ARM_BUILTIN_WUNPCKELSH,
18187 ARM_BUILTIN_WUNPCKELSW,
18188 ARM_BUILTIN_WUNPCKELUB,
18189 ARM_BUILTIN_WUNPCKELUH,
18190 ARM_BUILTIN_WUNPCKELUW,
18192 ARM_BUILTIN_THREAD_POINTER,
18194 ARM_BUILTIN_NEON_BASE,
18196 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18199 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18201 static void
18202 arm_init_neon_builtins (void)
18204 unsigned int i, fcode;
18205 tree decl;
18207 tree neon_intQI_type_node;
18208 tree neon_intHI_type_node;
18209 tree neon_polyQI_type_node;
18210 tree neon_polyHI_type_node;
18211 tree neon_intSI_type_node;
18212 tree neon_intDI_type_node;
18213 tree neon_float_type_node;
18215 tree intQI_pointer_node;
18216 tree intHI_pointer_node;
18217 tree intSI_pointer_node;
18218 tree intDI_pointer_node;
18219 tree float_pointer_node;
18221 tree const_intQI_node;
18222 tree const_intHI_node;
18223 tree const_intSI_node;
18224 tree const_intDI_node;
18225 tree const_float_node;
18227 tree const_intQI_pointer_node;
18228 tree const_intHI_pointer_node;
18229 tree const_intSI_pointer_node;
18230 tree const_intDI_pointer_node;
18231 tree const_float_pointer_node;
18233 tree V8QI_type_node;
18234 tree V4HI_type_node;
18235 tree V2SI_type_node;
18236 tree V2SF_type_node;
18237 tree V16QI_type_node;
18238 tree V8HI_type_node;
18239 tree V4SI_type_node;
18240 tree V4SF_type_node;
18241 tree V2DI_type_node;
18243 tree intUQI_type_node;
18244 tree intUHI_type_node;
18245 tree intUSI_type_node;
18246 tree intUDI_type_node;
18248 tree intEI_type_node;
18249 tree intOI_type_node;
18250 tree intCI_type_node;
18251 tree intXI_type_node;
18253 tree V8QI_pointer_node;
18254 tree V4HI_pointer_node;
18255 tree V2SI_pointer_node;
18256 tree V2SF_pointer_node;
18257 tree V16QI_pointer_node;
18258 tree V8HI_pointer_node;
18259 tree V4SI_pointer_node;
18260 tree V4SF_pointer_node;
18261 tree V2DI_pointer_node;
18263 tree void_ftype_pv8qi_v8qi_v8qi;
18264 tree void_ftype_pv4hi_v4hi_v4hi;
18265 tree void_ftype_pv2si_v2si_v2si;
18266 tree void_ftype_pv2sf_v2sf_v2sf;
18267 tree void_ftype_pdi_di_di;
18268 tree void_ftype_pv16qi_v16qi_v16qi;
18269 tree void_ftype_pv8hi_v8hi_v8hi;
18270 tree void_ftype_pv4si_v4si_v4si;
18271 tree void_ftype_pv4sf_v4sf_v4sf;
18272 tree void_ftype_pv2di_v2di_v2di;
18274 tree reinterp_ftype_dreg[5][5];
18275 tree reinterp_ftype_qreg[5][5];
18276 tree dreg_types[5], qreg_types[5];
18278 /* Create distinguished type nodes for NEON vector element types,
18279 and pointers to values of such types, so we can detect them later. */
18280 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18281 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18282 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18283 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18284 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18285 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18286 neon_float_type_node = make_node (REAL_TYPE);
18287 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18288 layout_type (neon_float_type_node);
18290 /* Define typedefs which exactly correspond to the modes we are basing vector
18291 types on. If you change these names you'll need to change
18292 the table used by arm_mangle_type too. */
18293 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18294 "__builtin_neon_qi");
18295 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18296 "__builtin_neon_hi");
18297 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18298 "__builtin_neon_si");
18299 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18300 "__builtin_neon_sf");
18301 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18302 "__builtin_neon_di");
18303 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18304 "__builtin_neon_poly8");
18305 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18306 "__builtin_neon_poly16");
18308 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18309 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18310 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18311 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18312 float_pointer_node = build_pointer_type (neon_float_type_node);
18314 /* Next create constant-qualified versions of the above types. */
18315 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18316 TYPE_QUAL_CONST);
18317 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18318 TYPE_QUAL_CONST);
18319 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18320 TYPE_QUAL_CONST);
18321 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18322 TYPE_QUAL_CONST);
18323 const_float_node = build_qualified_type (neon_float_type_node,
18324 TYPE_QUAL_CONST);
18326 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18327 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18328 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18329 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18330 const_float_pointer_node = build_pointer_type (const_float_node);
18332 /* Now create vector types based on our NEON element types. */
18333 /* 64-bit vectors. */
18334 V8QI_type_node =
18335 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18336 V4HI_type_node =
18337 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18338 V2SI_type_node =
18339 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18340 V2SF_type_node =
18341 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18342 /* 128-bit vectors. */
18343 V16QI_type_node =
18344 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18345 V8HI_type_node =
18346 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18347 V4SI_type_node =
18348 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18349 V4SF_type_node =
18350 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18351 V2DI_type_node =
18352 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18354 /* Unsigned integer types for various mode sizes. */
18355 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18356 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18357 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18358 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18360 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18361 "__builtin_neon_uqi");
18362 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18363 "__builtin_neon_uhi");
18364 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18365 "__builtin_neon_usi");
18366 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18367 "__builtin_neon_udi");
18369 /* Opaque integer types for structures of vectors. */
18370 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18371 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18372 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18373 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18375 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18376 "__builtin_neon_ti");
18377 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18378 "__builtin_neon_ei");
18379 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18380 "__builtin_neon_oi");
18381 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18382 "__builtin_neon_ci");
18383 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18384 "__builtin_neon_xi");
18386 /* Pointers to vector types. */
18387 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18388 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18389 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18390 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18391 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18392 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18393 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18394 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18395 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18397 /* Operations which return results as pairs. */
18398 void_ftype_pv8qi_v8qi_v8qi =
18399 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18400 V8QI_type_node, NULL);
18401 void_ftype_pv4hi_v4hi_v4hi =
18402 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18403 V4HI_type_node, NULL);
18404 void_ftype_pv2si_v2si_v2si =
18405 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18406 V2SI_type_node, NULL);
18407 void_ftype_pv2sf_v2sf_v2sf =
18408 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18409 V2SF_type_node, NULL);
18410 void_ftype_pdi_di_di =
18411 build_function_type_list (void_type_node, intDI_pointer_node,
18412 neon_intDI_type_node, neon_intDI_type_node, NULL);
18413 void_ftype_pv16qi_v16qi_v16qi =
18414 build_function_type_list (void_type_node, V16QI_pointer_node,
18415 V16QI_type_node, V16QI_type_node, NULL);
18416 void_ftype_pv8hi_v8hi_v8hi =
18417 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18418 V8HI_type_node, NULL);
18419 void_ftype_pv4si_v4si_v4si =
18420 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18421 V4SI_type_node, NULL);
18422 void_ftype_pv4sf_v4sf_v4sf =
18423 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18424 V4SF_type_node, NULL);
18425 void_ftype_pv2di_v2di_v2di =
18426 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18427 V2DI_type_node, NULL);
18429 dreg_types[0] = V8QI_type_node;
18430 dreg_types[1] = V4HI_type_node;
18431 dreg_types[2] = V2SI_type_node;
18432 dreg_types[3] = V2SF_type_node;
18433 dreg_types[4] = neon_intDI_type_node;
18435 qreg_types[0] = V16QI_type_node;
18436 qreg_types[1] = V8HI_type_node;
18437 qreg_types[2] = V4SI_type_node;
18438 qreg_types[3] = V4SF_type_node;
18439 qreg_types[4] = V2DI_type_node;
18441 for (i = 0; i < 5; i++)
18443 int j;
18444 for (j = 0; j < 5; j++)
18446 reinterp_ftype_dreg[i][j]
18447 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18448 reinterp_ftype_qreg[i][j]
18449 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18453 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
18454 i < ARRAY_SIZE (neon_builtin_data);
18455 i++, fcode++)
18457 neon_builtin_datum *d = &neon_builtin_data[i];
18459 const char* const modenames[] = {
18460 "v8qi", "v4hi", "v2si", "v2sf", "di",
18461 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
18462 "ti", "ei", "oi"
18464 char namebuf[60];
18465 tree ftype = NULL;
18466 int is_load = 0, is_store = 0;
18468 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
18470 d->fcode = fcode;
18472 switch (d->itype)
18474 case NEON_LOAD1:
18475 case NEON_LOAD1LANE:
18476 case NEON_LOADSTRUCT:
18477 case NEON_LOADSTRUCTLANE:
18478 is_load = 1;
18479 /* Fall through. */
18480 case NEON_STORE1:
18481 case NEON_STORE1LANE:
18482 case NEON_STORESTRUCT:
18483 case NEON_STORESTRUCTLANE:
18484 if (!is_load)
18485 is_store = 1;
18486 /* Fall through. */
18487 case NEON_UNOP:
18488 case NEON_BINOP:
18489 case NEON_LOGICBINOP:
18490 case NEON_SHIFTINSERT:
18491 case NEON_TERNOP:
18492 case NEON_GETLANE:
18493 case NEON_SETLANE:
18494 case NEON_CREATE:
18495 case NEON_DUP:
18496 case NEON_DUPLANE:
18497 case NEON_SHIFTIMM:
18498 case NEON_SHIFTACC:
18499 case NEON_COMBINE:
18500 case NEON_SPLIT:
18501 case NEON_CONVERT:
18502 case NEON_FIXCONV:
18503 case NEON_LANEMUL:
18504 case NEON_LANEMULL:
18505 case NEON_LANEMULH:
18506 case NEON_LANEMAC:
18507 case NEON_SCALARMUL:
18508 case NEON_SCALARMULL:
18509 case NEON_SCALARMULH:
18510 case NEON_SCALARMAC:
18511 case NEON_SELECT:
18512 case NEON_VTBL:
18513 case NEON_VTBX:
18515 int k;
18516 tree return_type = void_type_node, args = void_list_node;
18518 /* Build a function type directly from the insn_data for
18519 this builtin. The build_function_type() function takes
18520 care of removing duplicates for us. */
18521 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
18523 tree eltype;
18525 if (is_load && k == 1)
18527 /* Neon load patterns always have the memory
18528 operand in the operand 1 position. */
18529 gcc_assert (insn_data[d->code].operand[k].predicate
18530 == neon_struct_operand);
18532 switch (d->mode)
18534 case T_V8QI:
18535 case T_V16QI:
18536 eltype = const_intQI_pointer_node;
18537 break;
18539 case T_V4HI:
18540 case T_V8HI:
18541 eltype = const_intHI_pointer_node;
18542 break;
18544 case T_V2SI:
18545 case T_V4SI:
18546 eltype = const_intSI_pointer_node;
18547 break;
18549 case T_V2SF:
18550 case T_V4SF:
18551 eltype = const_float_pointer_node;
18552 break;
18554 case T_DI:
18555 case T_V2DI:
18556 eltype = const_intDI_pointer_node;
18557 break;
18559 default: gcc_unreachable ();
18562 else if (is_store && k == 0)
18564 /* Similarly, Neon store patterns use operand 0 as
18565 the memory location to store to. */
18566 gcc_assert (insn_data[d->code].operand[k].predicate
18567 == neon_struct_operand);
18569 switch (d->mode)
18571 case T_V8QI:
18572 case T_V16QI:
18573 eltype = intQI_pointer_node;
18574 break;
18576 case T_V4HI:
18577 case T_V8HI:
18578 eltype = intHI_pointer_node;
18579 break;
18581 case T_V2SI:
18582 case T_V4SI:
18583 eltype = intSI_pointer_node;
18584 break;
18586 case T_V2SF:
18587 case T_V4SF:
18588 eltype = float_pointer_node;
18589 break;
18591 case T_DI:
18592 case T_V2DI:
18593 eltype = intDI_pointer_node;
18594 break;
18596 default: gcc_unreachable ();
18599 else
18601 switch (insn_data[d->code].operand[k].mode)
18603 case VOIDmode: eltype = void_type_node; break;
18604 /* Scalars. */
18605 case QImode: eltype = neon_intQI_type_node; break;
18606 case HImode: eltype = neon_intHI_type_node; break;
18607 case SImode: eltype = neon_intSI_type_node; break;
18608 case SFmode: eltype = neon_float_type_node; break;
18609 case DImode: eltype = neon_intDI_type_node; break;
18610 case TImode: eltype = intTI_type_node; break;
18611 case EImode: eltype = intEI_type_node; break;
18612 case OImode: eltype = intOI_type_node; break;
18613 case CImode: eltype = intCI_type_node; break;
18614 case XImode: eltype = intXI_type_node; break;
18615 /* 64-bit vectors. */
18616 case V8QImode: eltype = V8QI_type_node; break;
18617 case V4HImode: eltype = V4HI_type_node; break;
18618 case V2SImode: eltype = V2SI_type_node; break;
18619 case V2SFmode: eltype = V2SF_type_node; break;
18620 /* 128-bit vectors. */
18621 case V16QImode: eltype = V16QI_type_node; break;
18622 case V8HImode: eltype = V8HI_type_node; break;
18623 case V4SImode: eltype = V4SI_type_node; break;
18624 case V4SFmode: eltype = V4SF_type_node; break;
18625 case V2DImode: eltype = V2DI_type_node; break;
18626 default: gcc_unreachable ();
18630 if (k == 0 && !is_store)
18631 return_type = eltype;
18632 else
18633 args = tree_cons (NULL_TREE, eltype, args);
18636 ftype = build_function_type (return_type, args);
18638 break;
18640 case NEON_RESULTPAIR:
18642 switch (insn_data[d->code].operand[1].mode)
18644 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18645 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18646 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18647 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18648 case DImode: ftype = void_ftype_pdi_di_di; break;
18649 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18650 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18651 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18652 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18653 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18654 default: gcc_unreachable ();
18657 break;
18659 case NEON_REINTERP:
18661 /* We iterate over 5 doubleword types, then 5 quadword
18662 types. */
18663 int rhs = d->mode % 5;
18664 switch (insn_data[d->code].operand[0].mode)
18666 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18667 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18668 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18669 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18670 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18671 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18672 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18673 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18674 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18675 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18676 default: gcc_unreachable ();
18679 break;
18681 default:
18682 gcc_unreachable ();
18685 gcc_assert (ftype != NULL);
18687 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
18689 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
18690 NULL_TREE);
18691 arm_builtin_decls[fcode] = decl;
18695 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
18696 do \
18698 if ((MASK) & insn_flags) \
18700 tree bdecl; \
18701 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
18702 BUILT_IN_MD, NULL, NULL_TREE); \
18703 arm_builtin_decls[CODE] = bdecl; \
18706 while (0)
18708 struct builtin_description
18710 const unsigned int mask;
18711 const enum insn_code icode;
18712 const char * const name;
18713 const enum arm_builtins code;
18714 const enum rtx_code comparison;
18715 const unsigned int flag;
18718 static const struct builtin_description bdesc_2arg[] =
18720 #define IWMMXT_BUILTIN(code, string, builtin) \
18721 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
18722 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18724 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
18725 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
18726 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
18727 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
18728 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
18729 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
18730 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
18731 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
18732 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
18733 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
18734 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
18735 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
18736 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
18737 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
18738 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
18739 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
18740 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
18741 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
18742 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
18743 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
18744 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
18745 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
18746 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
18747 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
18748 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
18749 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
18750 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
18751 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
18752 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
18753 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
18754 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
18755 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
18756 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
18757 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
18758 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
18759 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
18760 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
18761 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
18762 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
18763 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
18764 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
18765 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
18766 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
18767 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
18768 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
18769 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
18770 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
18771 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
18772 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
18773 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
18774 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
18775 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
18776 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
18777 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
18778 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
18779 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
18780 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
18781 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
18783 #define IWMMXT_BUILTIN2(code, builtin) \
18784 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18786 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
18787 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
18788 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
18789 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
18790 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
18791 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
18792 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
18793 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
18794 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
18795 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
18796 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
18797 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
18798 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
18799 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
18800 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
18801 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
18802 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
18803 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
18804 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
18805 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
18806 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
18807 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
18808 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
18809 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
18810 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
18811 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
18812 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
18813 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
18814 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
18815 IWMMXT_BUILTIN2 (rordi3, WRORDI)
18816 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
18817 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
18820 static const struct builtin_description bdesc_1arg[] =
18822 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
18823 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
18824 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
18825 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
18826 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
18827 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
18828 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
18829 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
18830 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
18831 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
18832 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
18833 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
18834 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
18835 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
18836 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
18837 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
18838 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
18839 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
18842 /* Set up all the iWMMXt builtins. This is not called if
18843 TARGET_IWMMXT is zero. */
18845 static void
18846 arm_init_iwmmxt_builtins (void)
18848 const struct builtin_description * d;
18849 size_t i;
18851 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18852 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18853 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
18855 tree int_ftype_int
18856 = build_function_type_list (integer_type_node,
18857 integer_type_node, NULL_TREE);
18858 tree v8qi_ftype_v8qi_v8qi_int
18859 = build_function_type_list (V8QI_type_node,
18860 V8QI_type_node, V8QI_type_node,
18861 integer_type_node, NULL_TREE);
18862 tree v4hi_ftype_v4hi_int
18863 = build_function_type_list (V4HI_type_node,
18864 V4HI_type_node, integer_type_node, NULL_TREE);
18865 tree v2si_ftype_v2si_int
18866 = build_function_type_list (V2SI_type_node,
18867 V2SI_type_node, integer_type_node, NULL_TREE);
18868 tree v2si_ftype_di_di
18869 = build_function_type_list (V2SI_type_node,
18870 long_long_integer_type_node,
18871 long_long_integer_type_node,
18872 NULL_TREE);
18873 tree di_ftype_di_int
18874 = build_function_type_list (long_long_integer_type_node,
18875 long_long_integer_type_node,
18876 integer_type_node, NULL_TREE);
18877 tree di_ftype_di_int_int
18878 = build_function_type_list (long_long_integer_type_node,
18879 long_long_integer_type_node,
18880 integer_type_node,
18881 integer_type_node, NULL_TREE);
18882 tree int_ftype_v8qi
18883 = build_function_type_list (integer_type_node,
18884 V8QI_type_node, NULL_TREE);
18885 tree int_ftype_v4hi
18886 = build_function_type_list (integer_type_node,
18887 V4HI_type_node, NULL_TREE);
18888 tree int_ftype_v2si
18889 = build_function_type_list (integer_type_node,
18890 V2SI_type_node, NULL_TREE);
18891 tree int_ftype_v8qi_int
18892 = build_function_type_list (integer_type_node,
18893 V8QI_type_node, integer_type_node, NULL_TREE);
18894 tree int_ftype_v4hi_int
18895 = build_function_type_list (integer_type_node,
18896 V4HI_type_node, integer_type_node, NULL_TREE);
18897 tree int_ftype_v2si_int
18898 = build_function_type_list (integer_type_node,
18899 V2SI_type_node, integer_type_node, NULL_TREE);
18900 tree v8qi_ftype_v8qi_int_int
18901 = build_function_type_list (V8QI_type_node,
18902 V8QI_type_node, integer_type_node,
18903 integer_type_node, NULL_TREE);
18904 tree v4hi_ftype_v4hi_int_int
18905 = build_function_type_list (V4HI_type_node,
18906 V4HI_type_node, integer_type_node,
18907 integer_type_node, NULL_TREE);
18908 tree v2si_ftype_v2si_int_int
18909 = build_function_type_list (V2SI_type_node,
18910 V2SI_type_node, integer_type_node,
18911 integer_type_node, NULL_TREE);
18912 /* Miscellaneous. */
18913 tree v8qi_ftype_v4hi_v4hi
18914 = build_function_type_list (V8QI_type_node,
18915 V4HI_type_node, V4HI_type_node, NULL_TREE);
18916 tree v4hi_ftype_v2si_v2si
18917 = build_function_type_list (V4HI_type_node,
18918 V2SI_type_node, V2SI_type_node, NULL_TREE);
18919 tree v2si_ftype_v4hi_v4hi
18920 = build_function_type_list (V2SI_type_node,
18921 V4HI_type_node, V4HI_type_node, NULL_TREE);
18922 tree v2si_ftype_v8qi_v8qi
18923 = build_function_type_list (V2SI_type_node,
18924 V8QI_type_node, V8QI_type_node, NULL_TREE);
18925 tree v4hi_ftype_v4hi_di
18926 = build_function_type_list (V4HI_type_node,
18927 V4HI_type_node, long_long_integer_type_node,
18928 NULL_TREE);
18929 tree v2si_ftype_v2si_di
18930 = build_function_type_list (V2SI_type_node,
18931 V2SI_type_node, long_long_integer_type_node,
18932 NULL_TREE);
18933 tree void_ftype_int_int
18934 = build_function_type_list (void_type_node,
18935 integer_type_node, integer_type_node,
18936 NULL_TREE);
18937 tree di_ftype_void
18938 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
18939 tree di_ftype_v8qi
18940 = build_function_type_list (long_long_integer_type_node,
18941 V8QI_type_node, NULL_TREE);
18942 tree di_ftype_v4hi
18943 = build_function_type_list (long_long_integer_type_node,
18944 V4HI_type_node, NULL_TREE);
18945 tree di_ftype_v2si
18946 = build_function_type_list (long_long_integer_type_node,
18947 V2SI_type_node, NULL_TREE);
18948 tree v2si_ftype_v4hi
18949 = build_function_type_list (V2SI_type_node,
18950 V4HI_type_node, NULL_TREE);
18951 tree v4hi_ftype_v8qi
18952 = build_function_type_list (V4HI_type_node,
18953 V8QI_type_node, NULL_TREE);
18955 tree di_ftype_di_v4hi_v4hi
18956 = build_function_type_list (long_long_unsigned_type_node,
18957 long_long_unsigned_type_node,
18958 V4HI_type_node, V4HI_type_node,
18959 NULL_TREE);
18961 tree di_ftype_v4hi_v4hi
18962 = build_function_type_list (long_long_unsigned_type_node,
18963 V4HI_type_node,V4HI_type_node,
18964 NULL_TREE);
18966 /* Normal vector binops. */
18967 tree v8qi_ftype_v8qi_v8qi
18968 = build_function_type_list (V8QI_type_node,
18969 V8QI_type_node, V8QI_type_node, NULL_TREE);
18970 tree v4hi_ftype_v4hi_v4hi
18971 = build_function_type_list (V4HI_type_node,
18972 V4HI_type_node,V4HI_type_node, NULL_TREE);
18973 tree v2si_ftype_v2si_v2si
18974 = build_function_type_list (V2SI_type_node,
18975 V2SI_type_node, V2SI_type_node, NULL_TREE);
18976 tree di_ftype_di_di
18977 = build_function_type_list (long_long_unsigned_type_node,
18978 long_long_unsigned_type_node,
18979 long_long_unsigned_type_node,
18980 NULL_TREE);
18982 /* Add all builtins that are more or less simple operations on two
18983 operands. */
18984 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18986 /* Use one of the operands; the target can have a different mode for
18987 mask-generating compares. */
18988 enum machine_mode mode;
18989 tree type;
18991 if (d->name == 0)
18992 continue;
18994 mode = insn_data[d->icode].operand[1].mode;
18996 switch (mode)
18998 case V8QImode:
18999 type = v8qi_ftype_v8qi_v8qi;
19000 break;
19001 case V4HImode:
19002 type = v4hi_ftype_v4hi_v4hi;
19003 break;
19004 case V2SImode:
19005 type = v2si_ftype_v2si_v2si;
19006 break;
19007 case DImode:
19008 type = di_ftype_di_di;
19009 break;
19011 default:
19012 gcc_unreachable ();
19015 def_mbuiltin (d->mask, d->name, type, d->code);
19018 /* Add the remaining MMX insns with somewhat more complicated types. */
19019 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19020 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19021 ARM_BUILTIN_ ## CODE)
19023 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19024 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
19025 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
19027 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19028 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19029 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19030 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19031 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19032 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19034 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19035 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19036 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19037 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19038 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19039 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19041 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19042 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19043 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19044 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19045 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19046 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19048 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19049 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19050 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19051 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19052 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19053 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19055 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19057 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
19058 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
19059 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19060 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19062 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19063 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19064 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19065 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19066 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19067 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19068 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19069 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19070 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19072 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19073 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19074 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19076 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19077 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19078 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19080 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19081 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19082 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19083 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19084 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19085 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19087 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19088 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19089 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19090 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19091 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19092 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19093 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19094 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19095 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19096 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19097 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19098 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19100 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19101 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19102 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19103 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19105 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
19106 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19107 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19108 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19109 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19110 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19111 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19113 #undef iwmmx_mbuiltin
19116 static void
19117 arm_init_tls_builtins (void)
19119 tree ftype, decl;
19121 ftype = build_function_type (ptr_type_node, void_list_node);
19122 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
19123 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
19124 NULL, NULL_TREE);
19125 TREE_NOTHROW (decl) = 1;
19126 TREE_READONLY (decl) = 1;
19127 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
19130 static void
19131 arm_init_fp16_builtins (void)
19133 tree fp16_type = make_node (REAL_TYPE);
19134 TYPE_PRECISION (fp16_type) = 16;
19135 layout_type (fp16_type);
19136 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19139 static void
19140 arm_init_builtins (void)
19142 arm_init_tls_builtins ();
19144 if (TARGET_REALLY_IWMMXT)
19145 arm_init_iwmmxt_builtins ();
19147 if (TARGET_NEON)
19148 arm_init_neon_builtins ();
19150 if (arm_fp16_format)
19151 arm_init_fp16_builtins ();
19154 /* Return the ARM builtin for CODE. */
19156 static tree
19157 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
19159 if (code >= ARM_BUILTIN_MAX)
19160 return error_mark_node;
19162 return arm_builtin_decls[code];
19165 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19167 static const char *
19168 arm_invalid_parameter_type (const_tree t)
19170 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19171 return N_("function parameters cannot have __fp16 type");
19172 return NULL;
19175 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19177 static const char *
19178 arm_invalid_return_type (const_tree t)
19180 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19181 return N_("functions cannot return __fp16 type");
19182 return NULL;
19185 /* Implement TARGET_PROMOTED_TYPE. */
19187 static tree
19188 arm_promoted_type (const_tree t)
19190 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19191 return float_type_node;
19192 return NULL_TREE;
19195 /* Implement TARGET_CONVERT_TO_TYPE.
19196 Specifically, this hook implements the peculiarity of the ARM
19197 half-precision floating-point C semantics that requires conversions between
19198 __fp16 to or from double to do an intermediate conversion to float. */
19200 static tree
19201 arm_convert_to_type (tree type, tree expr)
19203 tree fromtype = TREE_TYPE (expr);
19204 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19205 return NULL_TREE;
19206 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19207 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19208 return convert (type, convert (float_type_node, expr));
19209 return NULL_TREE;
19212 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19213 This simply adds HFmode as a supported mode; even though we don't
19214 implement arithmetic on this type directly, it's supported by
19215 optabs conversions, much the way the double-word arithmetic is
19216 special-cased in the default hook. */
19218 static bool
19219 arm_scalar_mode_supported_p (enum machine_mode mode)
19221 if (mode == HFmode)
19222 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19223 else
19224 return default_scalar_mode_supported_p (mode);
19227 /* Errors in the source file can cause expand_expr to return const0_rtx
19228 where we expect a vector. To avoid crashing, use one of the vector
19229 clear instructions. */
19231 static rtx
19232 safe_vector_operand (rtx x, enum machine_mode mode)
19234 if (x != const0_rtx)
19235 return x;
19236 x = gen_reg_rtx (mode);
19238 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19239 : gen_rtx_SUBREG (DImode, x, 0)));
19240 return x;
19243 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19245 static rtx
19246 arm_expand_binop_builtin (enum insn_code icode,
19247 tree exp, rtx target)
19249 rtx pat;
19250 tree arg0 = CALL_EXPR_ARG (exp, 0);
19251 tree arg1 = CALL_EXPR_ARG (exp, 1);
19252 rtx op0 = expand_normal (arg0);
19253 rtx op1 = expand_normal (arg1);
19254 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19255 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19256 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19258 if (VECTOR_MODE_P (mode0))
19259 op0 = safe_vector_operand (op0, mode0);
19260 if (VECTOR_MODE_P (mode1))
19261 op1 = safe_vector_operand (op1, mode1);
19263 if (! target
19264 || GET_MODE (target) != tmode
19265 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19266 target = gen_reg_rtx (tmode);
19268 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19271 op0 = copy_to_mode_reg (mode0, op0);
19272 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19273 op1 = copy_to_mode_reg (mode1, op1);
19275 pat = GEN_FCN (icode) (target, op0, op1);
19276 if (! pat)
19277 return 0;
19278 emit_insn (pat);
19279 return target;
19282 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19284 static rtx
19285 arm_expand_unop_builtin (enum insn_code icode,
19286 tree exp, rtx target, int do_load)
19288 rtx pat;
19289 tree arg0 = CALL_EXPR_ARG (exp, 0);
19290 rtx op0 = expand_normal (arg0);
19291 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19292 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19294 if (! target
19295 || GET_MODE (target) != tmode
19296 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19297 target = gen_reg_rtx (tmode);
19298 if (do_load)
19299 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19300 else
19302 if (VECTOR_MODE_P (mode0))
19303 op0 = safe_vector_operand (op0, mode0);
19305 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19306 op0 = copy_to_mode_reg (mode0, op0);
19309 pat = GEN_FCN (icode) (target, op0);
19310 if (! pat)
19311 return 0;
19312 emit_insn (pat);
19313 return target;
19316 typedef enum {
19317 NEON_ARG_COPY_TO_REG,
19318 NEON_ARG_CONSTANT,
19319 NEON_ARG_MEMORY,
19320 NEON_ARG_STOP
19321 } builtin_arg;
19323 #define NEON_MAX_BUILTIN_ARGS 5
19325 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
19326 and return an expression for the accessed memory.
19328 The intrinsic function operates on a block of registers that has
19329 mode REG_MODE. This block contains vectors of type TYPE_MODE.
19330 The function references the memory at EXP in mode MEM_MODE;
19331 this mode may be BLKmode if no more suitable mode is available. */
19333 static tree
19334 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
19335 enum machine_mode reg_mode,
19336 neon_builtin_type_mode type_mode)
19338 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
19339 tree elem_type, upper_bound, array_type;
19341 /* Work out the size of the register block in bytes. */
19342 reg_size = GET_MODE_SIZE (reg_mode);
19344 /* Work out the size of each vector in bytes. */
19345 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
19346 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
19348 /* Work out how many vectors there are. */
19349 gcc_assert (reg_size % vector_size == 0);
19350 nvectors = reg_size / vector_size;
19352 /* Work out how many elements are being loaded or stored.
19353 MEM_MODE == REG_MODE implies a one-to-one mapping between register
19354 and memory elements; anything else implies a lane load or store. */
19355 if (mem_mode == reg_mode)
19356 nelems = vector_size * nvectors;
19357 else
19358 nelems = nvectors;
19360 /* Work out the type of each element. */
19361 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
19362 elem_type = TREE_TYPE (TREE_TYPE (exp));
19364 /* Create a type that describes the full access. */
19365 upper_bound = build_int_cst (size_type_node, nelems - 1);
19366 array_type = build_array_type (elem_type, build_index_type (upper_bound));
19368 /* Dereference EXP using that type. */
19369 exp = convert (build_pointer_type (array_type), exp);
19370 return fold_build2 (MEM_REF, array_type, exp,
19371 build_int_cst (TREE_TYPE (exp), 0));
19374 /* Expand a Neon builtin. */
19375 static rtx
19376 arm_expand_neon_args (rtx target, int icode, int have_retval,
19377 neon_builtin_type_mode type_mode,
19378 tree exp, ...)
19380 va_list ap;
19381 rtx pat;
19382 tree arg[NEON_MAX_BUILTIN_ARGS];
19383 rtx op[NEON_MAX_BUILTIN_ARGS];
19384 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19385 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19386 enum machine_mode other_mode;
19387 int argc = 0;
19388 int opno;
19390 if (have_retval
19391 && (!target
19392 || GET_MODE (target) != tmode
19393 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19394 target = gen_reg_rtx (tmode);
19396 va_start (ap, exp);
19398 for (;;)
19400 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19402 if (thisarg == NEON_ARG_STOP)
19403 break;
19404 else
19406 opno = argc + have_retval;
19407 mode[argc] = insn_data[icode].operand[opno].mode;
19408 arg[argc] = CALL_EXPR_ARG (exp, argc);
19409 if (thisarg == NEON_ARG_MEMORY)
19411 other_mode = insn_data[icode].operand[1 - opno].mode;
19412 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
19413 other_mode, type_mode);
19415 op[argc] = expand_normal (arg[argc]);
19417 switch (thisarg)
19419 case NEON_ARG_COPY_TO_REG:
19420 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19421 if (!(*insn_data[icode].operand[opno].predicate)
19422 (op[argc], mode[argc]))
19423 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19424 break;
19426 case NEON_ARG_CONSTANT:
19427 /* FIXME: This error message is somewhat unhelpful. */
19428 if (!(*insn_data[icode].operand[opno].predicate)
19429 (op[argc], mode[argc]))
19430 error ("argument must be a constant");
19431 break;
19433 case NEON_ARG_MEMORY:
19434 gcc_assert (MEM_P (op[argc]));
19435 PUT_MODE (op[argc], mode[argc]);
19436 /* ??? arm_neon.h uses the same built-in functions for signed
19437 and unsigned accesses, casting where necessary. This isn't
19438 alias safe. */
19439 set_mem_alias_set (op[argc], 0);
19440 if (!(*insn_data[icode].operand[opno].predicate)
19441 (op[argc], mode[argc]))
19442 op[argc] = (replace_equiv_address
19443 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
19444 break;
19446 case NEON_ARG_STOP:
19447 gcc_unreachable ();
19450 argc++;
19454 va_end (ap);
19456 if (have_retval)
19457 switch (argc)
19459 case 1:
19460 pat = GEN_FCN (icode) (target, op[0]);
19461 break;
19463 case 2:
19464 pat = GEN_FCN (icode) (target, op[0], op[1]);
19465 break;
19467 case 3:
19468 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19469 break;
19471 case 4:
19472 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19473 break;
19475 case 5:
19476 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19477 break;
19479 default:
19480 gcc_unreachable ();
19482 else
19483 switch (argc)
19485 case 1:
19486 pat = GEN_FCN (icode) (op[0]);
19487 break;
19489 case 2:
19490 pat = GEN_FCN (icode) (op[0], op[1]);
19491 break;
19493 case 3:
19494 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19495 break;
19497 case 4:
19498 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19499 break;
19501 case 5:
19502 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19503 break;
19505 default:
19506 gcc_unreachable ();
19509 if (!pat)
19510 return 0;
19512 emit_insn (pat);
19514 return target;
19517 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19518 constants defined per-instruction or per instruction-variant. Instead, the
19519 required info is looked up in the table neon_builtin_data. */
19520 static rtx
19521 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19523 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
19524 neon_itype itype = d->itype;
19525 enum insn_code icode = d->code;
19526 neon_builtin_type_mode type_mode = d->mode;
19528 switch (itype)
19530 case NEON_UNOP:
19531 case NEON_CONVERT:
19532 case NEON_DUPLANE:
19533 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19534 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19536 case NEON_BINOP:
19537 case NEON_SETLANE:
19538 case NEON_SCALARMUL:
19539 case NEON_SCALARMULL:
19540 case NEON_SCALARMULH:
19541 case NEON_SHIFTINSERT:
19542 case NEON_LOGICBINOP:
19543 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19544 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19545 NEON_ARG_STOP);
19547 case NEON_TERNOP:
19548 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19549 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19550 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19552 case NEON_GETLANE:
19553 case NEON_FIXCONV:
19554 case NEON_SHIFTIMM:
19555 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19556 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19557 NEON_ARG_STOP);
19559 case NEON_CREATE:
19560 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19561 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19563 case NEON_DUP:
19564 case NEON_SPLIT:
19565 case NEON_REINTERP:
19566 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19567 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19569 case NEON_COMBINE:
19570 case NEON_VTBL:
19571 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19572 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19574 case NEON_RESULTPAIR:
19575 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19576 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19577 NEON_ARG_STOP);
19579 case NEON_LANEMUL:
19580 case NEON_LANEMULL:
19581 case NEON_LANEMULH:
19582 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19583 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19584 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19586 case NEON_LANEMAC:
19587 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19588 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19589 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19591 case NEON_SHIFTACC:
19592 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19593 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19594 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19596 case NEON_SCALARMAC:
19597 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19598 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19599 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19601 case NEON_SELECT:
19602 case NEON_VTBX:
19603 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19604 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19605 NEON_ARG_STOP);
19607 case NEON_LOAD1:
19608 case NEON_LOADSTRUCT:
19609 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19610 NEON_ARG_MEMORY, NEON_ARG_STOP);
19612 case NEON_LOAD1LANE:
19613 case NEON_LOADSTRUCTLANE:
19614 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19615 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19616 NEON_ARG_STOP);
19618 case NEON_STORE1:
19619 case NEON_STORESTRUCT:
19620 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19621 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19623 case NEON_STORE1LANE:
19624 case NEON_STORESTRUCTLANE:
19625 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19626 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19627 NEON_ARG_STOP);
19630 gcc_unreachable ();
19633 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19634 void
19635 neon_reinterpret (rtx dest, rtx src)
19637 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19640 /* Emit code to place a Neon pair result in memory locations (with equal
19641 registers). */
19642 void
19643 neon_emit_pair_result_insn (enum machine_mode mode,
19644 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19645 rtx op1, rtx op2)
19647 rtx mem = gen_rtx_MEM (mode, destaddr);
19648 rtx tmp1 = gen_reg_rtx (mode);
19649 rtx tmp2 = gen_reg_rtx (mode);
19651 emit_insn (intfn (tmp1, op1, op2, tmp2));
19653 emit_move_insn (mem, tmp1);
19654 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19655 emit_move_insn (mem, tmp2);
19658 /* Set up operands for a register copy from src to dest, taking care not to
19659 clobber registers in the process.
19660 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19661 be called with a large N, so that should be OK. */
19663 void
19664 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19666 unsigned int copied = 0, opctr = 0;
19667 unsigned int done = (1 << count) - 1;
19668 unsigned int i, j;
19670 while (copied != done)
19672 for (i = 0; i < count; i++)
19674 int good = 1;
19676 for (j = 0; good && j < count; j++)
19677 if (i != j && (copied & (1 << j)) == 0
19678 && reg_overlap_mentioned_p (src[j], dest[i]))
19679 good = 0;
19681 if (good)
19683 operands[opctr++] = dest[i];
19684 operands[opctr++] = src[i];
19685 copied |= 1 << i;
19690 gcc_assert (opctr == count * 2);
19693 /* Expand an expression EXP that calls a built-in function,
19694 with result going to TARGET if that's convenient
19695 (and in mode MODE if that's convenient).
19696 SUBTARGET may be used as the target for computing one of EXP's operands.
19697 IGNORE is nonzero if the value is to be ignored. */
19699 static rtx
19700 arm_expand_builtin (tree exp,
19701 rtx target,
19702 rtx subtarget ATTRIBUTE_UNUSED,
19703 enum machine_mode mode ATTRIBUTE_UNUSED,
19704 int ignore ATTRIBUTE_UNUSED)
19706 const struct builtin_description * d;
19707 enum insn_code icode;
19708 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19709 tree arg0;
19710 tree arg1;
19711 tree arg2;
19712 rtx op0;
19713 rtx op1;
19714 rtx op2;
19715 rtx pat;
19716 int fcode = DECL_FUNCTION_CODE (fndecl);
19717 size_t i;
19718 enum machine_mode tmode;
19719 enum machine_mode mode0;
19720 enum machine_mode mode1;
19721 enum machine_mode mode2;
19723 if (fcode >= ARM_BUILTIN_NEON_BASE)
19724 return arm_expand_neon_builtin (fcode, exp, target);
19726 switch (fcode)
19728 case ARM_BUILTIN_TEXTRMSB:
19729 case ARM_BUILTIN_TEXTRMUB:
19730 case ARM_BUILTIN_TEXTRMSH:
19731 case ARM_BUILTIN_TEXTRMUH:
19732 case ARM_BUILTIN_TEXTRMSW:
19733 case ARM_BUILTIN_TEXTRMUW:
19734 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19735 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19736 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19737 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19738 : CODE_FOR_iwmmxt_textrmw);
19740 arg0 = CALL_EXPR_ARG (exp, 0);
19741 arg1 = CALL_EXPR_ARG (exp, 1);
19742 op0 = expand_normal (arg0);
19743 op1 = expand_normal (arg1);
19744 tmode = insn_data[icode].operand[0].mode;
19745 mode0 = insn_data[icode].operand[1].mode;
19746 mode1 = insn_data[icode].operand[2].mode;
19748 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19749 op0 = copy_to_mode_reg (mode0, op0);
19750 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19752 /* @@@ better error message */
19753 error ("selector must be an immediate");
19754 return gen_reg_rtx (tmode);
19756 if (target == 0
19757 || GET_MODE (target) != tmode
19758 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19759 target = gen_reg_rtx (tmode);
19760 pat = GEN_FCN (icode) (target, op0, op1);
19761 if (! pat)
19762 return 0;
19763 emit_insn (pat);
19764 return target;
19766 case ARM_BUILTIN_TINSRB:
19767 case ARM_BUILTIN_TINSRH:
19768 case ARM_BUILTIN_TINSRW:
19769 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19770 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19771 : CODE_FOR_iwmmxt_tinsrw);
19772 arg0 = CALL_EXPR_ARG (exp, 0);
19773 arg1 = CALL_EXPR_ARG (exp, 1);
19774 arg2 = CALL_EXPR_ARG (exp, 2);
19775 op0 = expand_normal (arg0);
19776 op1 = expand_normal (arg1);
19777 op2 = expand_normal (arg2);
19778 tmode = insn_data[icode].operand[0].mode;
19779 mode0 = insn_data[icode].operand[1].mode;
19780 mode1 = insn_data[icode].operand[2].mode;
19781 mode2 = insn_data[icode].operand[3].mode;
19783 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19784 op0 = copy_to_mode_reg (mode0, op0);
19785 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19786 op1 = copy_to_mode_reg (mode1, op1);
19787 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19789 /* @@@ better error message */
19790 error ("selector must be an immediate");
19791 return const0_rtx;
19793 if (target == 0
19794 || GET_MODE (target) != tmode
19795 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19796 target = gen_reg_rtx (tmode);
19797 pat = GEN_FCN (icode) (target, op0, op1, op2);
19798 if (! pat)
19799 return 0;
19800 emit_insn (pat);
19801 return target;
19803 case ARM_BUILTIN_SETWCX:
19804 arg0 = CALL_EXPR_ARG (exp, 0);
19805 arg1 = CALL_EXPR_ARG (exp, 1);
19806 op0 = force_reg (SImode, expand_normal (arg0));
19807 op1 = expand_normal (arg1);
19808 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19809 return 0;
19811 case ARM_BUILTIN_GETWCX:
19812 arg0 = CALL_EXPR_ARG (exp, 0);
19813 op0 = expand_normal (arg0);
19814 target = gen_reg_rtx (SImode);
19815 emit_insn (gen_iwmmxt_tmrc (target, op0));
19816 return target;
19818 case ARM_BUILTIN_WSHUFH:
19819 icode = CODE_FOR_iwmmxt_wshufh;
19820 arg0 = CALL_EXPR_ARG (exp, 0);
19821 arg1 = CALL_EXPR_ARG (exp, 1);
19822 op0 = expand_normal (arg0);
19823 op1 = expand_normal (arg1);
19824 tmode = insn_data[icode].operand[0].mode;
19825 mode1 = insn_data[icode].operand[1].mode;
19826 mode2 = insn_data[icode].operand[2].mode;
19828 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19829 op0 = copy_to_mode_reg (mode1, op0);
19830 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19832 /* @@@ better error message */
19833 error ("mask must be an immediate");
19834 return const0_rtx;
19836 if (target == 0
19837 || GET_MODE (target) != tmode
19838 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19839 target = gen_reg_rtx (tmode);
19840 pat = GEN_FCN (icode) (target, op0, op1);
19841 if (! pat)
19842 return 0;
19843 emit_insn (pat);
19844 return target;
19846 case ARM_BUILTIN_WSADB:
19847 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19848 case ARM_BUILTIN_WSADH:
19849 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19850 case ARM_BUILTIN_WSADBZ:
19851 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19852 case ARM_BUILTIN_WSADHZ:
19853 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19855 /* Several three-argument builtins. */
19856 case ARM_BUILTIN_WMACS:
19857 case ARM_BUILTIN_WMACU:
19858 case ARM_BUILTIN_WALIGN:
19859 case ARM_BUILTIN_TMIA:
19860 case ARM_BUILTIN_TMIAPH:
19861 case ARM_BUILTIN_TMIATT:
19862 case ARM_BUILTIN_TMIATB:
19863 case ARM_BUILTIN_TMIABT:
19864 case ARM_BUILTIN_TMIABB:
19865 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19866 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19867 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19868 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19869 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19870 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19871 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19872 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19873 : CODE_FOR_iwmmxt_walign);
19874 arg0 = CALL_EXPR_ARG (exp, 0);
19875 arg1 = CALL_EXPR_ARG (exp, 1);
19876 arg2 = CALL_EXPR_ARG (exp, 2);
19877 op0 = expand_normal (arg0);
19878 op1 = expand_normal (arg1);
19879 op2 = expand_normal (arg2);
19880 tmode = insn_data[icode].operand[0].mode;
19881 mode0 = insn_data[icode].operand[1].mode;
19882 mode1 = insn_data[icode].operand[2].mode;
19883 mode2 = insn_data[icode].operand[3].mode;
19885 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19886 op0 = copy_to_mode_reg (mode0, op0);
19887 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19888 op1 = copy_to_mode_reg (mode1, op1);
19889 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19890 op2 = copy_to_mode_reg (mode2, op2);
19891 if (target == 0
19892 || GET_MODE (target) != tmode
19893 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19894 target = gen_reg_rtx (tmode);
19895 pat = GEN_FCN (icode) (target, op0, op1, op2);
19896 if (! pat)
19897 return 0;
19898 emit_insn (pat);
19899 return target;
19901 case ARM_BUILTIN_WZERO:
19902 target = gen_reg_rtx (DImode);
19903 emit_insn (gen_iwmmxt_clrdi (target));
19904 return target;
19906 case ARM_BUILTIN_THREAD_POINTER:
19907 return arm_load_tp (target);
19909 default:
19910 break;
19913 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19914 if (d->code == (const enum arm_builtins) fcode)
19915 return arm_expand_binop_builtin (d->icode, exp, target);
19917 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19918 if (d->code == (const enum arm_builtins) fcode)
19919 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19921 /* @@@ Should really do something sensible here. */
19922 return NULL_RTX;
19925 /* Return the number (counting from 0) of
19926 the least significant set bit in MASK. */
19928 inline static int
19929 number_of_first_bit_set (unsigned mask)
19931 int bit;
19933 for (bit = 0;
19934 (mask & (1 << bit)) == 0;
19935 ++bit)
19936 continue;
19938 return bit;
19941 /* Emit code to push or pop registers to or from the stack. F is the
19942 assembly file. MASK is the registers to push or pop. PUSH is
19943 nonzero if we should push, and zero if we should pop. For debugging
19944 output, if pushing, adjust CFA_OFFSET by the amount of space added
19945 to the stack. REAL_REGS should have the same number of bits set as
19946 MASK, and will be used instead (in the same order) to describe which
19947 registers were saved - this is used to mark the save slots when we
19948 push high registers after moving them to low registers. */
19949 static void
19950 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19951 unsigned long real_regs)
19953 int regno;
19954 int lo_mask = mask & 0xFF;
19955 int pushed_words = 0;
19957 gcc_assert (mask);
19959 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19961 /* Special case. Do not generate a POP PC statement here, do it in
19962 thumb_exit() */
19963 thumb_exit (f, -1);
19964 return;
19967 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19969 fprintf (f, "\t.save\t{");
19970 for (regno = 0; regno < 15; regno++)
19972 if (real_regs & (1 << regno))
19974 if (real_regs & ((1 << regno) -1))
19975 fprintf (f, ", ");
19976 asm_fprintf (f, "%r", regno);
19979 fprintf (f, "}\n");
19982 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19984 /* Look at the low registers first. */
19985 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19987 if (lo_mask & 1)
19989 asm_fprintf (f, "%r", regno);
19991 if ((lo_mask & ~1) != 0)
19992 fprintf (f, ", ");
19994 pushed_words++;
19998 if (push && (mask & (1 << LR_REGNUM)))
20000 /* Catch pushing the LR. */
20001 if (mask & 0xFF)
20002 fprintf (f, ", ");
20004 asm_fprintf (f, "%r", LR_REGNUM);
20006 pushed_words++;
20008 else if (!push && (mask & (1 << PC_REGNUM)))
20010 /* Catch popping the PC. */
20011 if (TARGET_INTERWORK || TARGET_BACKTRACE
20012 || crtl->calls_eh_return)
20014 /* The PC is never poped directly, instead
20015 it is popped into r3 and then BX is used. */
20016 fprintf (f, "}\n");
20018 thumb_exit (f, -1);
20020 return;
20022 else
20024 if (mask & 0xFF)
20025 fprintf (f, ", ");
20027 asm_fprintf (f, "%r", PC_REGNUM);
20031 fprintf (f, "}\n");
20033 if (push && pushed_words && dwarf2out_do_frame ())
20035 char *l = dwarf2out_cfi_label (false);
20036 int pushed_mask = real_regs;
20038 *cfa_offset += pushed_words * 4;
20039 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
20041 pushed_words = 0;
20042 pushed_mask = real_regs;
20043 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
20045 if (pushed_mask & 1)
20046 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
20051 /* Generate code to return from a thumb function.
20052 If 'reg_containing_return_addr' is -1, then the return address is
20053 actually on the stack, at the stack pointer. */
20054 static void
20055 thumb_exit (FILE *f, int reg_containing_return_addr)
20057 unsigned regs_available_for_popping;
20058 unsigned regs_to_pop;
20059 int pops_needed;
20060 unsigned available;
20061 unsigned required;
20062 int mode;
20063 int size;
20064 int restore_a4 = FALSE;
20066 /* Compute the registers we need to pop. */
20067 regs_to_pop = 0;
20068 pops_needed = 0;
20070 if (reg_containing_return_addr == -1)
20072 regs_to_pop |= 1 << LR_REGNUM;
20073 ++pops_needed;
20076 if (TARGET_BACKTRACE)
20078 /* Restore the (ARM) frame pointer and stack pointer. */
20079 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20080 pops_needed += 2;
20083 /* If there is nothing to pop then just emit the BX instruction and
20084 return. */
20085 if (pops_needed == 0)
20087 if (crtl->calls_eh_return)
20088 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20090 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20091 return;
20093 /* Otherwise if we are not supporting interworking and we have not created
20094 a backtrace structure and the function was not entered in ARM mode then
20095 just pop the return address straight into the PC. */
20096 else if (!TARGET_INTERWORK
20097 && !TARGET_BACKTRACE
20098 && !is_called_in_ARM_mode (current_function_decl)
20099 && !crtl->calls_eh_return)
20101 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20102 return;
20105 /* Find out how many of the (return) argument registers we can corrupt. */
20106 regs_available_for_popping = 0;
20108 /* If returning via __builtin_eh_return, the bottom three registers
20109 all contain information needed for the return. */
20110 if (crtl->calls_eh_return)
20111 size = 12;
20112 else
20114 /* If we can deduce the registers used from the function's
20115 return value. This is more reliable that examining
20116 df_regs_ever_live_p () because that will be set if the register is
20117 ever used in the function, not just if the register is used
20118 to hold a return value. */
20120 if (crtl->return_rtx != 0)
20121 mode = GET_MODE (crtl->return_rtx);
20122 else
20123 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20125 size = GET_MODE_SIZE (mode);
20127 if (size == 0)
20129 /* In a void function we can use any argument register.
20130 In a function that returns a structure on the stack
20131 we can use the second and third argument registers. */
20132 if (mode == VOIDmode)
20133 regs_available_for_popping =
20134 (1 << ARG_REGISTER (1))
20135 | (1 << ARG_REGISTER (2))
20136 | (1 << ARG_REGISTER (3));
20137 else
20138 regs_available_for_popping =
20139 (1 << ARG_REGISTER (2))
20140 | (1 << ARG_REGISTER (3));
20142 else if (size <= 4)
20143 regs_available_for_popping =
20144 (1 << ARG_REGISTER (2))
20145 | (1 << ARG_REGISTER (3));
20146 else if (size <= 8)
20147 regs_available_for_popping =
20148 (1 << ARG_REGISTER (3));
20151 /* Match registers to be popped with registers into which we pop them. */
20152 for (available = regs_available_for_popping,
20153 required = regs_to_pop;
20154 required != 0 && available != 0;
20155 available &= ~(available & - available),
20156 required &= ~(required & - required))
20157 -- pops_needed;
20159 /* If we have any popping registers left over, remove them. */
20160 if (available > 0)
20161 regs_available_for_popping &= ~available;
20163 /* Otherwise if we need another popping register we can use
20164 the fourth argument register. */
20165 else if (pops_needed)
20167 /* If we have not found any free argument registers and
20168 reg a4 contains the return address, we must move it. */
20169 if (regs_available_for_popping == 0
20170 && reg_containing_return_addr == LAST_ARG_REGNUM)
20172 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20173 reg_containing_return_addr = LR_REGNUM;
20175 else if (size > 12)
20177 /* Register a4 is being used to hold part of the return value,
20178 but we have dire need of a free, low register. */
20179 restore_a4 = TRUE;
20181 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20184 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20186 /* The fourth argument register is available. */
20187 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20189 --pops_needed;
20193 /* Pop as many registers as we can. */
20194 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20195 regs_available_for_popping);
20197 /* Process the registers we popped. */
20198 if (reg_containing_return_addr == -1)
20200 /* The return address was popped into the lowest numbered register. */
20201 regs_to_pop &= ~(1 << LR_REGNUM);
20203 reg_containing_return_addr =
20204 number_of_first_bit_set (regs_available_for_popping);
20206 /* Remove this register for the mask of available registers, so that
20207 the return address will not be corrupted by further pops. */
20208 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20211 /* If we popped other registers then handle them here. */
20212 if (regs_available_for_popping)
20214 int frame_pointer;
20216 /* Work out which register currently contains the frame pointer. */
20217 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20219 /* Move it into the correct place. */
20220 asm_fprintf (f, "\tmov\t%r, %r\n",
20221 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20223 /* (Temporarily) remove it from the mask of popped registers. */
20224 regs_available_for_popping &= ~(1 << frame_pointer);
20225 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20227 if (regs_available_for_popping)
20229 int stack_pointer;
20231 /* We popped the stack pointer as well,
20232 find the register that contains it. */
20233 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20235 /* Move it into the stack register. */
20236 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20238 /* At this point we have popped all necessary registers, so
20239 do not worry about restoring regs_available_for_popping
20240 to its correct value:
20242 assert (pops_needed == 0)
20243 assert (regs_available_for_popping == (1 << frame_pointer))
20244 assert (regs_to_pop == (1 << STACK_POINTER)) */
20246 else
20248 /* Since we have just move the popped value into the frame
20249 pointer, the popping register is available for reuse, and
20250 we know that we still have the stack pointer left to pop. */
20251 regs_available_for_popping |= (1 << frame_pointer);
20255 /* If we still have registers left on the stack, but we no longer have
20256 any registers into which we can pop them, then we must move the return
20257 address into the link register and make available the register that
20258 contained it. */
20259 if (regs_available_for_popping == 0 && pops_needed > 0)
20261 regs_available_for_popping |= 1 << reg_containing_return_addr;
20263 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20264 reg_containing_return_addr);
20266 reg_containing_return_addr = LR_REGNUM;
20269 /* If we have registers left on the stack then pop some more.
20270 We know that at most we will want to pop FP and SP. */
20271 if (pops_needed > 0)
20273 int popped_into;
20274 int move_to;
20276 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20277 regs_available_for_popping);
20279 /* We have popped either FP or SP.
20280 Move whichever one it is into the correct register. */
20281 popped_into = number_of_first_bit_set (regs_available_for_popping);
20282 move_to = number_of_first_bit_set (regs_to_pop);
20284 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20286 regs_to_pop &= ~(1 << move_to);
20288 --pops_needed;
20291 /* If we still have not popped everything then we must have only
20292 had one register available to us and we are now popping the SP. */
20293 if (pops_needed > 0)
20295 int popped_into;
20297 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20298 regs_available_for_popping);
20300 popped_into = number_of_first_bit_set (regs_available_for_popping);
20302 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20304 assert (regs_to_pop == (1 << STACK_POINTER))
20305 assert (pops_needed == 1)
20309 /* If necessary restore the a4 register. */
20310 if (restore_a4)
20312 if (reg_containing_return_addr != LR_REGNUM)
20314 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20315 reg_containing_return_addr = LR_REGNUM;
20318 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20321 if (crtl->calls_eh_return)
20322 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20324 /* Return to caller. */
20325 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20328 /* Scan INSN just before assembler is output for it.
20329 For Thumb-1, we track the status of the condition codes; this
20330 information is used in the cbranchsi4_insn pattern. */
20331 void
20332 thumb1_final_prescan_insn (rtx insn)
20334 if (flag_print_asm_name)
20335 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20336 INSN_ADDRESSES (INSN_UID (insn)));
20337 /* Don't overwrite the previous setter when we get to a cbranch. */
20338 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20340 enum attr_conds conds;
20342 if (cfun->machine->thumb1_cc_insn)
20344 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20345 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20346 CC_STATUS_INIT;
20348 conds = get_attr_conds (insn);
20349 if (conds == CONDS_SET)
20351 rtx set = single_set (insn);
20352 cfun->machine->thumb1_cc_insn = insn;
20353 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20354 cfun->machine->thumb1_cc_op1 = const0_rtx;
20355 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20356 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20358 rtx src1 = XEXP (SET_SRC (set), 1);
20359 if (src1 == const0_rtx)
20360 cfun->machine->thumb1_cc_mode = CCmode;
20363 else if (conds != CONDS_NOCOND)
20364 cfun->machine->thumb1_cc_insn = NULL_RTX;
20369 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20371 unsigned HOST_WIDE_INT mask = 0xff;
20372 int i;
20374 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20375 if (val == 0) /* XXX */
20376 return 0;
20378 for (i = 0; i < 25; i++)
20379 if ((val & (mask << i)) == val)
20380 return 1;
20382 return 0;
20385 /* Returns nonzero if the current function contains,
20386 or might contain a far jump. */
20387 static int
20388 thumb_far_jump_used_p (void)
20390 rtx insn;
20392 /* This test is only important for leaf functions. */
20393 /* assert (!leaf_function_p ()); */
20395 /* If we have already decided that far jumps may be used,
20396 do not bother checking again, and always return true even if
20397 it turns out that they are not being used. Once we have made
20398 the decision that far jumps are present (and that hence the link
20399 register will be pushed onto the stack) we cannot go back on it. */
20400 if (cfun->machine->far_jump_used)
20401 return 1;
20403 /* If this function is not being called from the prologue/epilogue
20404 generation code then it must be being called from the
20405 INITIAL_ELIMINATION_OFFSET macro. */
20406 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20408 /* In this case we know that we are being asked about the elimination
20409 of the arg pointer register. If that register is not being used,
20410 then there are no arguments on the stack, and we do not have to
20411 worry that a far jump might force the prologue to push the link
20412 register, changing the stack offsets. In this case we can just
20413 return false, since the presence of far jumps in the function will
20414 not affect stack offsets.
20416 If the arg pointer is live (or if it was live, but has now been
20417 eliminated and so set to dead) then we do have to test to see if
20418 the function might contain a far jump. This test can lead to some
20419 false negatives, since before reload is completed, then length of
20420 branch instructions is not known, so gcc defaults to returning their
20421 longest length, which in turn sets the far jump attribute to true.
20423 A false negative will not result in bad code being generated, but it
20424 will result in a needless push and pop of the link register. We
20425 hope that this does not occur too often.
20427 If we need doubleword stack alignment this could affect the other
20428 elimination offsets so we can't risk getting it wrong. */
20429 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20430 cfun->machine->arg_pointer_live = 1;
20431 else if (!cfun->machine->arg_pointer_live)
20432 return 0;
20435 /* Check to see if the function contains a branch
20436 insn with the far jump attribute set. */
20437 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20439 if (GET_CODE (insn) == JUMP_INSN
20440 /* Ignore tablejump patterns. */
20441 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20442 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20443 && get_attr_far_jump (insn) == FAR_JUMP_YES
20446 /* Record the fact that we have decided that
20447 the function does use far jumps. */
20448 cfun->machine->far_jump_used = 1;
20449 return 1;
20453 return 0;
20456 /* Return nonzero if FUNC must be entered in ARM mode. */
20458 is_called_in_ARM_mode (tree func)
20460 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20462 /* Ignore the problem about functions whose address is taken. */
20463 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20464 return TRUE;
20466 #ifdef ARM_PE
20467 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20468 #else
20469 return FALSE;
20470 #endif
20473 /* Given the stack offsets and register mask in OFFSETS, decide how
20474 many additional registers to push instead of subtracting a constant
20475 from SP. For epilogues the principle is the same except we use pop.
20476 FOR_PROLOGUE indicates which we're generating. */
20477 static int
20478 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20480 HOST_WIDE_INT amount;
20481 unsigned long live_regs_mask = offsets->saved_regs_mask;
20482 /* Extract a mask of the ones we can give to the Thumb's push/pop
20483 instruction. */
20484 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20485 /* Then count how many other high registers will need to be pushed. */
20486 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20487 int n_free, reg_base;
20489 if (!for_prologue && frame_pointer_needed)
20490 amount = offsets->locals_base - offsets->saved_regs;
20491 else
20492 amount = offsets->outgoing_args - offsets->saved_regs;
20494 /* If the stack frame size is 512 exactly, we can save one load
20495 instruction, which should make this a win even when optimizing
20496 for speed. */
20497 if (!optimize_size && amount != 512)
20498 return 0;
20500 /* Can't do this if there are high registers to push. */
20501 if (high_regs_pushed != 0)
20502 return 0;
20504 /* Shouldn't do it in the prologue if no registers would normally
20505 be pushed at all. In the epilogue, also allow it if we'll have
20506 a pop insn for the PC. */
20507 if (l_mask == 0
20508 && (for_prologue
20509 || TARGET_BACKTRACE
20510 || (live_regs_mask & 1 << LR_REGNUM) == 0
20511 || TARGET_INTERWORK
20512 || crtl->args.pretend_args_size != 0))
20513 return 0;
20515 /* Don't do this if thumb_expand_prologue wants to emit instructions
20516 between the push and the stack frame allocation. */
20517 if (for_prologue
20518 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20519 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20520 return 0;
20522 reg_base = 0;
20523 n_free = 0;
20524 if (!for_prologue)
20526 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20527 live_regs_mask >>= reg_base;
20530 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20531 && (for_prologue || call_used_regs[reg_base + n_free]))
20533 live_regs_mask >>= 1;
20534 n_free++;
20537 if (n_free == 0)
20538 return 0;
20539 gcc_assert (amount / 4 * 4 == amount);
20541 if (amount >= 512 && (amount - n_free * 4) < 512)
20542 return (amount - 508) / 4;
20543 if (amount <= n_free * 4)
20544 return amount / 4;
20545 return 0;
20548 /* The bits which aren't usefully expanded as rtl. */
20549 const char *
20550 thumb_unexpanded_epilogue (void)
20552 arm_stack_offsets *offsets;
20553 int regno;
20554 unsigned long live_regs_mask = 0;
20555 int high_regs_pushed = 0;
20556 int extra_pop;
20557 int had_to_push_lr;
20558 int size;
20560 if (cfun->machine->return_used_this_function != 0)
20561 return "";
20563 if (IS_NAKED (arm_current_func_type ()))
20564 return "";
20566 offsets = arm_get_frame_offsets ();
20567 live_regs_mask = offsets->saved_regs_mask;
20568 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20570 /* If we can deduce the registers used from the function's return value.
20571 This is more reliable that examining df_regs_ever_live_p () because that
20572 will be set if the register is ever used in the function, not just if
20573 the register is used to hold a return value. */
20574 size = arm_size_return_regs ();
20576 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20577 if (extra_pop > 0)
20579 unsigned long extra_mask = (1 << extra_pop) - 1;
20580 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20583 /* The prolog may have pushed some high registers to use as
20584 work registers. e.g. the testsuite file:
20585 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20586 compiles to produce:
20587 push {r4, r5, r6, r7, lr}
20588 mov r7, r9
20589 mov r6, r8
20590 push {r6, r7}
20591 as part of the prolog. We have to undo that pushing here. */
20593 if (high_regs_pushed)
20595 unsigned long mask = live_regs_mask & 0xff;
20596 int next_hi_reg;
20598 /* The available low registers depend on the size of the value we are
20599 returning. */
20600 if (size <= 12)
20601 mask |= 1 << 3;
20602 if (size <= 8)
20603 mask |= 1 << 2;
20605 if (mask == 0)
20606 /* Oh dear! We have no low registers into which we can pop
20607 high registers! */
20608 internal_error
20609 ("no low registers available for popping high registers");
20611 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20612 if (live_regs_mask & (1 << next_hi_reg))
20613 break;
20615 while (high_regs_pushed)
20617 /* Find lo register(s) into which the high register(s) can
20618 be popped. */
20619 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20621 if (mask & (1 << regno))
20622 high_regs_pushed--;
20623 if (high_regs_pushed == 0)
20624 break;
20627 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20629 /* Pop the values into the low register(s). */
20630 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20632 /* Move the value(s) into the high registers. */
20633 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20635 if (mask & (1 << regno))
20637 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20638 regno);
20640 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20641 if (live_regs_mask & (1 << next_hi_reg))
20642 break;
20646 live_regs_mask &= ~0x0f00;
20649 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20650 live_regs_mask &= 0xff;
20652 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20654 /* Pop the return address into the PC. */
20655 if (had_to_push_lr)
20656 live_regs_mask |= 1 << PC_REGNUM;
20658 /* Either no argument registers were pushed or a backtrace
20659 structure was created which includes an adjusted stack
20660 pointer, so just pop everything. */
20661 if (live_regs_mask)
20662 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20663 live_regs_mask);
20665 /* We have either just popped the return address into the
20666 PC or it is was kept in LR for the entire function.
20667 Note that thumb_pushpop has already called thumb_exit if the
20668 PC was in the list. */
20669 if (!had_to_push_lr)
20670 thumb_exit (asm_out_file, LR_REGNUM);
20672 else
20674 /* Pop everything but the return address. */
20675 if (live_regs_mask)
20676 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20677 live_regs_mask);
20679 if (had_to_push_lr)
20681 if (size > 12)
20683 /* We have no free low regs, so save one. */
20684 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20685 LAST_ARG_REGNUM);
20688 /* Get the return address into a temporary register. */
20689 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20690 1 << LAST_ARG_REGNUM);
20692 if (size > 12)
20694 /* Move the return address to lr. */
20695 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20696 LAST_ARG_REGNUM);
20697 /* Restore the low register. */
20698 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20699 IP_REGNUM);
20700 regno = LR_REGNUM;
20702 else
20703 regno = LAST_ARG_REGNUM;
20705 else
20706 regno = LR_REGNUM;
20708 /* Remove the argument registers that were pushed onto the stack. */
20709 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20710 SP_REGNUM, SP_REGNUM,
20711 crtl->args.pretend_args_size);
20713 thumb_exit (asm_out_file, regno);
20716 return "";
20719 /* Functions to save and restore machine-specific function data. */
20720 static struct machine_function *
20721 arm_init_machine_status (void)
20723 struct machine_function *machine;
20724 machine = ggc_alloc_cleared_machine_function ();
20726 #if ARM_FT_UNKNOWN != 0
20727 machine->func_type = ARM_FT_UNKNOWN;
20728 #endif
20729 return machine;
20732 /* Return an RTX indicating where the return address to the
20733 calling function can be found. */
20735 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20737 if (count != 0)
20738 return NULL_RTX;
20740 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20743 /* Do anything needed before RTL is emitted for each function. */
20744 void
20745 arm_init_expanders (void)
20747 /* Arrange to initialize and mark the machine per-function status. */
20748 init_machine_status = arm_init_machine_status;
20750 /* This is to stop the combine pass optimizing away the alignment
20751 adjustment of va_arg. */
20752 /* ??? It is claimed that this should not be necessary. */
20753 if (cfun)
20754 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20758 /* Like arm_compute_initial_elimination offset. Simpler because there
20759 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20760 to point at the base of the local variables after static stack
20761 space for a function has been allocated. */
20763 HOST_WIDE_INT
20764 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20766 arm_stack_offsets *offsets;
20768 offsets = arm_get_frame_offsets ();
20770 switch (from)
20772 case ARG_POINTER_REGNUM:
20773 switch (to)
20775 case STACK_POINTER_REGNUM:
20776 return offsets->outgoing_args - offsets->saved_args;
20778 case FRAME_POINTER_REGNUM:
20779 return offsets->soft_frame - offsets->saved_args;
20781 case ARM_HARD_FRAME_POINTER_REGNUM:
20782 return offsets->saved_regs - offsets->saved_args;
20784 case THUMB_HARD_FRAME_POINTER_REGNUM:
20785 return offsets->locals_base - offsets->saved_args;
20787 default:
20788 gcc_unreachable ();
20790 break;
20792 case FRAME_POINTER_REGNUM:
20793 switch (to)
20795 case STACK_POINTER_REGNUM:
20796 return offsets->outgoing_args - offsets->soft_frame;
20798 case ARM_HARD_FRAME_POINTER_REGNUM:
20799 return offsets->saved_regs - offsets->soft_frame;
20801 case THUMB_HARD_FRAME_POINTER_REGNUM:
20802 return offsets->locals_base - offsets->soft_frame;
20804 default:
20805 gcc_unreachable ();
20807 break;
20809 default:
20810 gcc_unreachable ();
20814 /* Generate the rest of a function's prologue. */
20815 void
20816 thumb1_expand_prologue (void)
20818 rtx insn, dwarf;
20820 HOST_WIDE_INT amount;
20821 arm_stack_offsets *offsets;
20822 unsigned long func_type;
20823 int regno;
20824 unsigned long live_regs_mask;
20826 func_type = arm_current_func_type ();
20828 /* Naked functions don't have prologues. */
20829 if (IS_NAKED (func_type))
20830 return;
20832 if (IS_INTERRUPT (func_type))
20834 error ("interrupt Service Routines cannot be coded in Thumb mode");
20835 return;
20838 offsets = arm_get_frame_offsets ();
20839 live_regs_mask = offsets->saved_regs_mask;
20840 /* Load the pic register before setting the frame pointer,
20841 so we can use r7 as a temporary work register. */
20842 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20843 arm_load_pic_register (live_regs_mask);
20845 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20846 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20847 stack_pointer_rtx);
20849 if (flag_stack_usage_info)
20850 current_function_static_stack_size
20851 = offsets->outgoing_args - offsets->saved_args;
20853 amount = offsets->outgoing_args - offsets->saved_regs;
20854 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20855 if (amount)
20857 if (amount < 512)
20859 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20860 GEN_INT (- amount)));
20861 RTX_FRAME_RELATED_P (insn) = 1;
20863 else
20865 rtx reg;
20867 /* The stack decrement is too big for an immediate value in a single
20868 insn. In theory we could issue multiple subtracts, but after
20869 three of them it becomes more space efficient to place the full
20870 value in the constant pool and load into a register. (Also the
20871 ARM debugger really likes to see only one stack decrement per
20872 function). So instead we look for a scratch register into which
20873 we can load the decrement, and then we subtract this from the
20874 stack pointer. Unfortunately on the thumb the only available
20875 scratch registers are the argument registers, and we cannot use
20876 these as they may hold arguments to the function. Instead we
20877 attempt to locate a call preserved register which is used by this
20878 function. If we can find one, then we know that it will have
20879 been pushed at the start of the prologue and so we can corrupt
20880 it now. */
20881 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20882 if (live_regs_mask & (1 << regno))
20883 break;
20885 gcc_assert(regno <= LAST_LO_REGNUM);
20887 reg = gen_rtx_REG (SImode, regno);
20889 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20891 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20892 stack_pointer_rtx, reg));
20893 RTX_FRAME_RELATED_P (insn) = 1;
20894 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20895 plus_constant (stack_pointer_rtx,
20896 -amount));
20897 RTX_FRAME_RELATED_P (dwarf) = 1;
20898 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20902 if (frame_pointer_needed)
20903 thumb_set_frame_pointer (offsets);
20905 /* If we are profiling, make sure no instructions are scheduled before
20906 the call to mcount. Similarly if the user has requested no
20907 scheduling in the prolog. Similarly if we want non-call exceptions
20908 using the EABI unwinder, to prevent faulting instructions from being
20909 swapped with a stack adjustment. */
20910 if (crtl->profile || !TARGET_SCHED_PROLOG
20911 || (arm_except_unwind_info (&global_options) == UI_TARGET
20912 && cfun->can_throw_non_call_exceptions))
20913 emit_insn (gen_blockage ());
20915 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20916 if (live_regs_mask & 0xff)
20917 cfun->machine->lr_save_eliminated = 0;
20921 void
20922 thumb1_expand_epilogue (void)
20924 HOST_WIDE_INT amount;
20925 arm_stack_offsets *offsets;
20926 int regno;
20928 /* Naked functions don't have prologues. */
20929 if (IS_NAKED (arm_current_func_type ()))
20930 return;
20932 offsets = arm_get_frame_offsets ();
20933 amount = offsets->outgoing_args - offsets->saved_regs;
20935 if (frame_pointer_needed)
20937 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20938 amount = offsets->locals_base - offsets->saved_regs;
20940 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20942 gcc_assert (amount >= 0);
20943 if (amount)
20945 if (amount < 512)
20946 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20947 GEN_INT (amount)));
20948 else
20950 /* r3 is always free in the epilogue. */
20951 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20953 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20954 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20958 /* Emit a USE (stack_pointer_rtx), so that
20959 the stack adjustment will not be deleted. */
20960 emit_insn (gen_prologue_use (stack_pointer_rtx));
20962 if (crtl->profile || !TARGET_SCHED_PROLOG)
20963 emit_insn (gen_blockage ());
20965 /* Emit a clobber for each insn that will be restored in the epilogue,
20966 so that flow2 will get register lifetimes correct. */
20967 for (regno = 0; regno < 13; regno++)
20968 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20969 emit_clobber (gen_rtx_REG (SImode, regno));
20971 if (! df_regs_ever_live_p (LR_REGNUM))
20972 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20975 static void
20976 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20978 arm_stack_offsets *offsets;
20979 unsigned long live_regs_mask = 0;
20980 unsigned long l_mask;
20981 unsigned high_regs_pushed = 0;
20982 int cfa_offset = 0;
20983 int regno;
20985 if (IS_NAKED (arm_current_func_type ()))
20986 return;
20988 if (is_called_in_ARM_mode (current_function_decl))
20990 const char * name;
20992 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20993 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20994 == SYMBOL_REF);
20995 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20997 /* Generate code sequence to switch us into Thumb mode. */
20998 /* The .code 32 directive has already been emitted by
20999 ASM_DECLARE_FUNCTION_NAME. */
21000 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
21001 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
21003 /* Generate a label, so that the debugger will notice the
21004 change in instruction sets. This label is also used by
21005 the assembler to bypass the ARM code when this function
21006 is called from a Thumb encoded function elsewhere in the
21007 same file. Hence the definition of STUB_NAME here must
21008 agree with the definition in gas/config/tc-arm.c. */
21010 #define STUB_NAME ".real_start_of"
21012 fprintf (f, "\t.code\t16\n");
21013 #ifdef ARM_PE
21014 if (arm_dllexport_name_p (name))
21015 name = arm_strip_name_encoding (name);
21016 #endif
21017 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
21018 fprintf (f, "\t.thumb_func\n");
21019 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
21022 if (crtl->args.pretend_args_size)
21024 /* Output unwind directive for the stack adjustment. */
21025 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21026 fprintf (f, "\t.pad #%d\n",
21027 crtl->args.pretend_args_size);
21029 if (cfun->machine->uses_anonymous_args)
21031 int num_pushes;
21033 fprintf (f, "\tpush\t{");
21035 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
21037 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
21038 regno <= LAST_ARG_REGNUM;
21039 regno++)
21040 asm_fprintf (f, "%r%s", regno,
21041 regno == LAST_ARG_REGNUM ? "" : ", ");
21043 fprintf (f, "}\n");
21045 else
21046 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
21047 SP_REGNUM, SP_REGNUM,
21048 crtl->args.pretend_args_size);
21050 /* We don't need to record the stores for unwinding (would it
21051 help the debugger any if we did?), but record the change in
21052 the stack pointer. */
21053 if (dwarf2out_do_frame ())
21055 char *l = dwarf2out_cfi_label (false);
21057 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
21058 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21062 /* Get the registers we are going to push. */
21063 offsets = arm_get_frame_offsets ();
21064 live_regs_mask = offsets->saved_regs_mask;
21065 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21066 l_mask = live_regs_mask & 0x40ff;
21067 /* Then count how many other high registers will need to be pushed. */
21068 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21070 if (TARGET_BACKTRACE)
21072 unsigned offset;
21073 unsigned work_register;
21075 /* We have been asked to create a stack backtrace structure.
21076 The code looks like this:
21078 0 .align 2
21079 0 func:
21080 0 sub SP, #16 Reserve space for 4 registers.
21081 2 push {R7} Push low registers.
21082 4 add R7, SP, #20 Get the stack pointer before the push.
21083 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21084 8 mov R7, PC Get hold of the start of this code plus 12.
21085 10 str R7, [SP, #16] Store it.
21086 12 mov R7, FP Get hold of the current frame pointer.
21087 14 str R7, [SP, #4] Store it.
21088 16 mov R7, LR Get hold of the current return address.
21089 18 str R7, [SP, #12] Store it.
21090 20 add R7, SP, #16 Point at the start of the backtrace structure.
21091 22 mov FP, R7 Put this value into the frame pointer. */
21093 work_register = thumb_find_work_register (live_regs_mask);
21095 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21096 asm_fprintf (f, "\t.pad #16\n");
21098 asm_fprintf
21099 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21100 SP_REGNUM, SP_REGNUM);
21102 if (dwarf2out_do_frame ())
21104 char *l = dwarf2out_cfi_label (false);
21106 cfa_offset = cfa_offset + 16;
21107 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21110 if (l_mask)
21112 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
21113 offset = bit_count (l_mask) * UNITS_PER_WORD;
21115 else
21116 offset = 0;
21118 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21119 offset + 16 + crtl->args.pretend_args_size);
21121 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21122 offset + 4);
21124 /* Make sure that the instruction fetching the PC is in the right place
21125 to calculate "start of backtrace creation code + 12". */
21126 if (l_mask)
21128 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21129 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21130 offset + 12);
21131 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21132 ARM_HARD_FRAME_POINTER_REGNUM);
21133 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21134 offset);
21136 else
21138 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21139 ARM_HARD_FRAME_POINTER_REGNUM);
21140 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21141 offset);
21142 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21143 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21144 offset + 12);
21147 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21148 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21149 offset + 8);
21150 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21151 offset + 12);
21152 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21153 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21155 /* Optimization: If we are not pushing any low registers but we are going
21156 to push some high registers then delay our first push. This will just
21157 be a push of LR and we can combine it with the push of the first high
21158 register. */
21159 else if ((l_mask & 0xff) != 0
21160 || (high_regs_pushed == 0 && l_mask))
21162 unsigned long mask = l_mask;
21163 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21164 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21167 if (high_regs_pushed)
21169 unsigned pushable_regs;
21170 unsigned next_hi_reg;
21172 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21173 if (live_regs_mask & (1 << next_hi_reg))
21174 break;
21176 pushable_regs = l_mask & 0xff;
21178 if (pushable_regs == 0)
21179 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21181 while (high_regs_pushed > 0)
21183 unsigned long real_regs_mask = 0;
21185 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21187 if (pushable_regs & (1 << regno))
21189 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21191 high_regs_pushed --;
21192 real_regs_mask |= (1 << next_hi_reg);
21194 if (high_regs_pushed)
21196 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21197 next_hi_reg --)
21198 if (live_regs_mask & (1 << next_hi_reg))
21199 break;
21201 else
21203 pushable_regs &= ~((1 << regno) - 1);
21204 break;
21209 /* If we had to find a work register and we have not yet
21210 saved the LR then add it to the list of regs to push. */
21211 if (l_mask == (1 << LR_REGNUM))
21213 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21214 1, &cfa_offset,
21215 real_regs_mask | (1 << LR_REGNUM));
21216 l_mask = 0;
21218 else
21219 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21224 /* Handle the case of a double word load into a low register from
21225 a computed memory address. The computed address may involve a
21226 register which is overwritten by the load. */
21227 const char *
21228 thumb_load_double_from_address (rtx *operands)
21230 rtx addr;
21231 rtx base;
21232 rtx offset;
21233 rtx arg1;
21234 rtx arg2;
21236 gcc_assert (GET_CODE (operands[0]) == REG);
21237 gcc_assert (GET_CODE (operands[1]) == MEM);
21239 /* Get the memory address. */
21240 addr = XEXP (operands[1], 0);
21242 /* Work out how the memory address is computed. */
21243 switch (GET_CODE (addr))
21245 case REG:
21246 operands[2] = adjust_address (operands[1], SImode, 4);
21248 if (REGNO (operands[0]) == REGNO (addr))
21250 output_asm_insn ("ldr\t%H0, %2", operands);
21251 output_asm_insn ("ldr\t%0, %1", operands);
21253 else
21255 output_asm_insn ("ldr\t%0, %1", operands);
21256 output_asm_insn ("ldr\t%H0, %2", operands);
21258 break;
21260 case CONST:
21261 /* Compute <address> + 4 for the high order load. */
21262 operands[2] = adjust_address (operands[1], SImode, 4);
21264 output_asm_insn ("ldr\t%0, %1", operands);
21265 output_asm_insn ("ldr\t%H0, %2", operands);
21266 break;
21268 case PLUS:
21269 arg1 = XEXP (addr, 0);
21270 arg2 = XEXP (addr, 1);
21272 if (CONSTANT_P (arg1))
21273 base = arg2, offset = arg1;
21274 else
21275 base = arg1, offset = arg2;
21277 gcc_assert (GET_CODE (base) == REG);
21279 /* Catch the case of <address> = <reg> + <reg> */
21280 if (GET_CODE (offset) == REG)
21282 int reg_offset = REGNO (offset);
21283 int reg_base = REGNO (base);
21284 int reg_dest = REGNO (operands[0]);
21286 /* Add the base and offset registers together into the
21287 higher destination register. */
21288 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21289 reg_dest + 1, reg_base, reg_offset);
21291 /* Load the lower destination register from the address in
21292 the higher destination register. */
21293 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21294 reg_dest, reg_dest + 1);
21296 /* Load the higher destination register from its own address
21297 plus 4. */
21298 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21299 reg_dest + 1, reg_dest + 1);
21301 else
21303 /* Compute <address> + 4 for the high order load. */
21304 operands[2] = adjust_address (operands[1], SImode, 4);
21306 /* If the computed address is held in the low order register
21307 then load the high order register first, otherwise always
21308 load the low order register first. */
21309 if (REGNO (operands[0]) == REGNO (base))
21311 output_asm_insn ("ldr\t%H0, %2", operands);
21312 output_asm_insn ("ldr\t%0, %1", operands);
21314 else
21316 output_asm_insn ("ldr\t%0, %1", operands);
21317 output_asm_insn ("ldr\t%H0, %2", operands);
21320 break;
21322 case LABEL_REF:
21323 /* With no registers to worry about we can just load the value
21324 directly. */
21325 operands[2] = adjust_address (operands[1], SImode, 4);
21327 output_asm_insn ("ldr\t%H0, %2", operands);
21328 output_asm_insn ("ldr\t%0, %1", operands);
21329 break;
21331 default:
21332 gcc_unreachable ();
21335 return "";
21338 const char *
21339 thumb_output_move_mem_multiple (int n, rtx *operands)
21341 rtx tmp;
21343 switch (n)
21345 case 2:
21346 if (REGNO (operands[4]) > REGNO (operands[5]))
21348 tmp = operands[4];
21349 operands[4] = operands[5];
21350 operands[5] = tmp;
21352 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21353 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21354 break;
21356 case 3:
21357 if (REGNO (operands[4]) > REGNO (operands[5]))
21359 tmp = operands[4];
21360 operands[4] = operands[5];
21361 operands[5] = tmp;
21363 if (REGNO (operands[5]) > REGNO (operands[6]))
21365 tmp = operands[5];
21366 operands[5] = operands[6];
21367 operands[6] = tmp;
21369 if (REGNO (operands[4]) > REGNO (operands[5]))
21371 tmp = operands[4];
21372 operands[4] = operands[5];
21373 operands[5] = tmp;
21376 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21377 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21378 break;
21380 default:
21381 gcc_unreachable ();
21384 return "";
21387 /* Output a call-via instruction for thumb state. */
21388 const char *
21389 thumb_call_via_reg (rtx reg)
21391 int regno = REGNO (reg);
21392 rtx *labelp;
21394 gcc_assert (regno < LR_REGNUM);
21396 /* If we are in the normal text section we can use a single instance
21397 per compilation unit. If we are doing function sections, then we need
21398 an entry per section, since we can't rely on reachability. */
21399 if (in_section == text_section)
21401 thumb_call_reg_needed = 1;
21403 if (thumb_call_via_label[regno] == NULL)
21404 thumb_call_via_label[regno] = gen_label_rtx ();
21405 labelp = thumb_call_via_label + regno;
21407 else
21409 if (cfun->machine->call_via[regno] == NULL)
21410 cfun->machine->call_via[regno] = gen_label_rtx ();
21411 labelp = cfun->machine->call_via + regno;
21414 output_asm_insn ("bl\t%a0", labelp);
21415 return "";
21418 /* Routines for generating rtl. */
21419 void
21420 thumb_expand_movmemqi (rtx *operands)
21422 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21423 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21424 HOST_WIDE_INT len = INTVAL (operands[2]);
21425 HOST_WIDE_INT offset = 0;
21427 while (len >= 12)
21429 emit_insn (gen_movmem12b (out, in, out, in));
21430 len -= 12;
21433 if (len >= 8)
21435 emit_insn (gen_movmem8b (out, in, out, in));
21436 len -= 8;
21439 if (len >= 4)
21441 rtx reg = gen_reg_rtx (SImode);
21442 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21443 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21444 len -= 4;
21445 offset += 4;
21448 if (len >= 2)
21450 rtx reg = gen_reg_rtx (HImode);
21451 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21452 plus_constant (in, offset))));
21453 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21454 reg));
21455 len -= 2;
21456 offset += 2;
21459 if (len)
21461 rtx reg = gen_reg_rtx (QImode);
21462 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21463 plus_constant (in, offset))));
21464 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21465 reg));
21469 void
21470 thumb_reload_out_hi (rtx *operands)
21472 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21475 /* Handle reading a half-word from memory during reload. */
21476 void
21477 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21479 gcc_unreachable ();
21482 /* Return the length of a function name prefix
21483 that starts with the character 'c'. */
21484 static int
21485 arm_get_strip_length (int c)
21487 switch (c)
21489 ARM_NAME_ENCODING_LENGTHS
21490 default: return 0;
21494 /* Return a pointer to a function's name with any
21495 and all prefix encodings stripped from it. */
21496 const char *
21497 arm_strip_name_encoding (const char *name)
21499 int skip;
21501 while ((skip = arm_get_strip_length (* name)))
21502 name += skip;
21504 return name;
21507 /* If there is a '*' anywhere in the name's prefix, then
21508 emit the stripped name verbatim, otherwise prepend an
21509 underscore if leading underscores are being used. */
21510 void
21511 arm_asm_output_labelref (FILE *stream, const char *name)
21513 int skip;
21514 int verbatim = 0;
21516 while ((skip = arm_get_strip_length (* name)))
21518 verbatim |= (*name == '*');
21519 name += skip;
21522 if (verbatim)
21523 fputs (name, stream);
21524 else
21525 asm_fprintf (stream, "%U%s", name);
21528 static void
21529 arm_file_start (void)
21531 int val;
21533 if (TARGET_UNIFIED_ASM)
21534 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21536 if (TARGET_BPABI)
21538 const char *fpu_name;
21539 if (arm_selected_arch)
21540 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21541 else
21542 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21544 if (TARGET_SOFT_FLOAT)
21546 if (TARGET_VFP)
21547 fpu_name = "softvfp";
21548 else
21549 fpu_name = "softfpa";
21551 else
21553 fpu_name = arm_fpu_desc->name;
21554 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21556 if (TARGET_HARD_FLOAT)
21557 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21558 if (TARGET_HARD_FLOAT_ABI)
21559 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21562 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21564 /* Some of these attributes only apply when the corresponding features
21565 are used. However we don't have any easy way of figuring this out.
21566 Conservatively record the setting that would have been used. */
21568 /* Tag_ABI_FP_rounding. */
21569 if (flag_rounding_math)
21570 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21571 if (!flag_unsafe_math_optimizations)
21573 /* Tag_ABI_FP_denomal. */
21574 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21575 /* Tag_ABI_FP_exceptions. */
21576 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21578 /* Tag_ABI_FP_user_exceptions. */
21579 if (flag_signaling_nans)
21580 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21581 /* Tag_ABI_FP_number_model. */
21582 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21583 flag_finite_math_only ? 1 : 3);
21585 /* Tag_ABI_align8_needed. */
21586 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21587 /* Tag_ABI_align8_preserved. */
21588 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21589 /* Tag_ABI_enum_size. */
21590 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21591 flag_short_enums ? 1 : 2);
21593 /* Tag_ABI_optimization_goals. */
21594 if (optimize_size)
21595 val = 4;
21596 else if (optimize >= 2)
21597 val = 2;
21598 else if (optimize)
21599 val = 1;
21600 else
21601 val = 6;
21602 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21604 /* Tag_ABI_FP_16bit_format. */
21605 if (arm_fp16_format)
21606 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21607 (int)arm_fp16_format);
21609 if (arm_lang_output_object_attributes_hook)
21610 arm_lang_output_object_attributes_hook();
21612 default_file_start();
21615 static void
21616 arm_file_end (void)
21618 int regno;
21620 if (NEED_INDICATE_EXEC_STACK)
21621 /* Add .note.GNU-stack. */
21622 file_end_indicate_exec_stack ();
21624 if (! thumb_call_reg_needed)
21625 return;
21627 switch_to_section (text_section);
21628 asm_fprintf (asm_out_file, "\t.code 16\n");
21629 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21631 for (regno = 0; regno < LR_REGNUM; regno++)
21633 rtx label = thumb_call_via_label[regno];
21635 if (label != 0)
21637 targetm.asm_out.internal_label (asm_out_file, "L",
21638 CODE_LABEL_NUMBER (label));
21639 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21644 #ifndef ARM_PE
21645 /* Symbols in the text segment can be accessed without indirecting via the
21646 constant pool; it may take an extra binary operation, but this is still
21647 faster than indirecting via memory. Don't do this when not optimizing,
21648 since we won't be calculating al of the offsets necessary to do this
21649 simplification. */
21651 static void
21652 arm_encode_section_info (tree decl, rtx rtl, int first)
21654 if (optimize > 0 && TREE_CONSTANT (decl))
21655 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21657 default_encode_section_info (decl, rtl, first);
21659 #endif /* !ARM_PE */
21661 static void
21662 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21664 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21665 && !strcmp (prefix, "L"))
21667 arm_ccfsm_state = 0;
21668 arm_target_insn = NULL;
21670 default_internal_label (stream, prefix, labelno);
21673 /* Output code to add DELTA to the first argument, and then jump
21674 to FUNCTION. Used for C++ multiple inheritance. */
21675 static void
21676 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21677 HOST_WIDE_INT delta,
21678 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21679 tree function)
21681 static int thunk_label = 0;
21682 char label[256];
21683 char labelpc[256];
21684 int mi_delta = delta;
21685 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21686 int shift = 0;
21687 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21688 ? 1 : 0);
21689 if (mi_delta < 0)
21690 mi_delta = - mi_delta;
21692 if (TARGET_THUMB1)
21694 int labelno = thunk_label++;
21695 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21696 /* Thunks are entered in arm mode when avaiable. */
21697 if (TARGET_THUMB1_ONLY)
21699 /* push r3 so we can use it as a temporary. */
21700 /* TODO: Omit this save if r3 is not used. */
21701 fputs ("\tpush {r3}\n", file);
21702 fputs ("\tldr\tr3, ", file);
21704 else
21706 fputs ("\tldr\tr12, ", file);
21708 assemble_name (file, label);
21709 fputc ('\n', file);
21710 if (flag_pic)
21712 /* If we are generating PIC, the ldr instruction below loads
21713 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21714 the address of the add + 8, so we have:
21716 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21717 = target + 1.
21719 Note that we have "+ 1" because some versions of GNU ld
21720 don't set the low bit of the result for R_ARM_REL32
21721 relocations against thumb function symbols.
21722 On ARMv6M this is +4, not +8. */
21723 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21724 assemble_name (file, labelpc);
21725 fputs (":\n", file);
21726 if (TARGET_THUMB1_ONLY)
21728 /* This is 2 insns after the start of the thunk, so we know it
21729 is 4-byte aligned. */
21730 fputs ("\tadd\tr3, pc, r3\n", file);
21731 fputs ("\tmov r12, r3\n", file);
21733 else
21734 fputs ("\tadd\tr12, pc, r12\n", file);
21736 else if (TARGET_THUMB1_ONLY)
21737 fputs ("\tmov r12, r3\n", file);
21739 if (TARGET_THUMB1_ONLY)
21741 if (mi_delta > 255)
21743 fputs ("\tldr\tr3, ", file);
21744 assemble_name (file, label);
21745 fputs ("+4\n", file);
21746 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21747 mi_op, this_regno, this_regno);
21749 else if (mi_delta != 0)
21751 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21752 mi_op, this_regno, this_regno,
21753 mi_delta);
21756 else
21758 /* TODO: Use movw/movt for large constants when available. */
21759 while (mi_delta != 0)
21761 if ((mi_delta & (3 << shift)) == 0)
21762 shift += 2;
21763 else
21765 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21766 mi_op, this_regno, this_regno,
21767 mi_delta & (0xff << shift));
21768 mi_delta &= ~(0xff << shift);
21769 shift += 8;
21773 if (TARGET_THUMB1)
21775 if (TARGET_THUMB1_ONLY)
21776 fputs ("\tpop\t{r3}\n", file);
21778 fprintf (file, "\tbx\tr12\n");
21779 ASM_OUTPUT_ALIGN (file, 2);
21780 assemble_name (file, label);
21781 fputs (":\n", file);
21782 if (flag_pic)
21784 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21785 rtx tem = XEXP (DECL_RTL (function), 0);
21786 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21787 tem = gen_rtx_MINUS (GET_MODE (tem),
21788 tem,
21789 gen_rtx_SYMBOL_REF (Pmode,
21790 ggc_strdup (labelpc)));
21791 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21793 else
21794 /* Output ".word .LTHUNKn". */
21795 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21797 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21798 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21800 else
21802 fputs ("\tb\t", file);
21803 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21804 if (NEED_PLT_RELOC)
21805 fputs ("(PLT)", file);
21806 fputc ('\n', file);
21811 arm_emit_vector_const (FILE *file, rtx x)
21813 int i;
21814 const char * pattern;
21816 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21818 switch (GET_MODE (x))
21820 case V2SImode: pattern = "%08x"; break;
21821 case V4HImode: pattern = "%04x"; break;
21822 case V8QImode: pattern = "%02x"; break;
21823 default: gcc_unreachable ();
21826 fprintf (file, "0x");
21827 for (i = CONST_VECTOR_NUNITS (x); i--;)
21829 rtx element;
21831 element = CONST_VECTOR_ELT (x, i);
21832 fprintf (file, pattern, INTVAL (element));
21835 return 1;
21838 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21839 HFmode constant pool entries are actually loaded with ldr. */
21840 void
21841 arm_emit_fp16_const (rtx c)
21843 REAL_VALUE_TYPE r;
21844 long bits;
21846 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21847 bits = real_to_target (NULL, &r, HFmode);
21848 if (WORDS_BIG_ENDIAN)
21849 assemble_zeros (2);
21850 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21851 if (!WORDS_BIG_ENDIAN)
21852 assemble_zeros (2);
21855 const char *
21856 arm_output_load_gr (rtx *operands)
21858 rtx reg;
21859 rtx offset;
21860 rtx wcgr;
21861 rtx sum;
21863 if (GET_CODE (operands [1]) != MEM
21864 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21865 || GET_CODE (reg = XEXP (sum, 0)) != REG
21866 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21867 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21868 return "wldrw%?\t%0, %1";
21870 /* Fix up an out-of-range load of a GR register. */
21871 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21872 wcgr = operands[0];
21873 operands[0] = reg;
21874 output_asm_insn ("ldr%?\t%0, %1", operands);
21876 operands[0] = wcgr;
21877 operands[1] = reg;
21878 output_asm_insn ("tmcr%?\t%0, %1", operands);
21879 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21881 return "";
21884 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21886 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21887 named arg and all anonymous args onto the stack.
21888 XXX I know the prologue shouldn't be pushing registers, but it is faster
21889 that way. */
21891 static void
21892 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21893 enum machine_mode mode,
21894 tree type,
21895 int *pretend_size,
21896 int second_time ATTRIBUTE_UNUSED)
21898 int nregs;
21900 cfun->machine->uses_anonymous_args = 1;
21901 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21903 nregs = pcum->aapcs_ncrn;
21904 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21905 nregs++;
21907 else
21908 nregs = pcum->nregs;
21910 if (nregs < NUM_ARG_REGS)
21911 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21914 /* Return nonzero if the CONSUMER instruction (a store) does not need
21915 PRODUCER's value to calculate the address. */
21918 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21920 rtx value = PATTERN (producer);
21921 rtx addr = PATTERN (consumer);
21923 if (GET_CODE (value) == COND_EXEC)
21924 value = COND_EXEC_CODE (value);
21925 if (GET_CODE (value) == PARALLEL)
21926 value = XVECEXP (value, 0, 0);
21927 value = XEXP (value, 0);
21928 if (GET_CODE (addr) == COND_EXEC)
21929 addr = COND_EXEC_CODE (addr);
21930 if (GET_CODE (addr) == PARALLEL)
21931 addr = XVECEXP (addr, 0, 0);
21932 addr = XEXP (addr, 0);
21934 return !reg_overlap_mentioned_p (value, addr);
21937 /* Return nonzero if the CONSUMER instruction (a store) does need
21938 PRODUCER's value to calculate the address. */
21941 arm_early_store_addr_dep (rtx producer, rtx consumer)
21943 return !arm_no_early_store_addr_dep (producer, consumer);
21946 /* Return nonzero if the CONSUMER instruction (a load) does need
21947 PRODUCER's value to calculate the address. */
21950 arm_early_load_addr_dep (rtx producer, rtx consumer)
21952 rtx value = PATTERN (producer);
21953 rtx addr = PATTERN (consumer);
21955 if (GET_CODE (value) == COND_EXEC)
21956 value = COND_EXEC_CODE (value);
21957 if (GET_CODE (value) == PARALLEL)
21958 value = XVECEXP (value, 0, 0);
21959 value = XEXP (value, 0);
21960 if (GET_CODE (addr) == COND_EXEC)
21961 addr = COND_EXEC_CODE (addr);
21962 if (GET_CODE (addr) == PARALLEL)
21963 addr = XVECEXP (addr, 0, 0);
21964 addr = XEXP (addr, 1);
21966 return reg_overlap_mentioned_p (value, addr);
21969 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21970 have an early register shift value or amount dependency on the
21971 result of PRODUCER. */
21974 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21976 rtx value = PATTERN (producer);
21977 rtx op = PATTERN (consumer);
21978 rtx early_op;
21980 if (GET_CODE (value) == COND_EXEC)
21981 value = COND_EXEC_CODE (value);
21982 if (GET_CODE (value) == PARALLEL)
21983 value = XVECEXP (value, 0, 0);
21984 value = XEXP (value, 0);
21985 if (GET_CODE (op) == COND_EXEC)
21986 op = COND_EXEC_CODE (op);
21987 if (GET_CODE (op) == PARALLEL)
21988 op = XVECEXP (op, 0, 0);
21989 op = XEXP (op, 1);
21991 early_op = XEXP (op, 0);
21992 /* This is either an actual independent shift, or a shift applied to
21993 the first operand of another operation. We want the whole shift
21994 operation. */
21995 if (GET_CODE (early_op) == REG)
21996 early_op = op;
21998 return !reg_overlap_mentioned_p (value, early_op);
22001 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22002 have an early register shift value dependency on the result of
22003 PRODUCER. */
22006 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
22008 rtx value = PATTERN (producer);
22009 rtx op = PATTERN (consumer);
22010 rtx early_op;
22012 if (GET_CODE (value) == COND_EXEC)
22013 value = COND_EXEC_CODE (value);
22014 if (GET_CODE (value) == PARALLEL)
22015 value = XVECEXP (value, 0, 0);
22016 value = XEXP (value, 0);
22017 if (GET_CODE (op) == COND_EXEC)
22018 op = COND_EXEC_CODE (op);
22019 if (GET_CODE (op) == PARALLEL)
22020 op = XVECEXP (op, 0, 0);
22021 op = XEXP (op, 1);
22023 early_op = XEXP (op, 0);
22025 /* This is either an actual independent shift, or a shift applied to
22026 the first operand of another operation. We want the value being
22027 shifted, in either case. */
22028 if (GET_CODE (early_op) != REG)
22029 early_op = XEXP (early_op, 0);
22031 return !reg_overlap_mentioned_p (value, early_op);
22034 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22035 have an early register mult dependency on the result of
22036 PRODUCER. */
22039 arm_no_early_mul_dep (rtx producer, rtx consumer)
22041 rtx value = PATTERN (producer);
22042 rtx op = PATTERN (consumer);
22044 if (GET_CODE (value) == COND_EXEC)
22045 value = COND_EXEC_CODE (value);
22046 if (GET_CODE (value) == PARALLEL)
22047 value = XVECEXP (value, 0, 0);
22048 value = XEXP (value, 0);
22049 if (GET_CODE (op) == COND_EXEC)
22050 op = COND_EXEC_CODE (op);
22051 if (GET_CODE (op) == PARALLEL)
22052 op = XVECEXP (op, 0, 0);
22053 op = XEXP (op, 1);
22055 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22057 if (GET_CODE (XEXP (op, 0)) == MULT)
22058 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22059 else
22060 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22063 return 0;
22066 /* We can't rely on the caller doing the proper promotion when
22067 using APCS or ATPCS. */
22069 static bool
22070 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22072 return !TARGET_AAPCS_BASED;
22075 static enum machine_mode
22076 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22077 enum machine_mode mode,
22078 int *punsignedp ATTRIBUTE_UNUSED,
22079 const_tree fntype ATTRIBUTE_UNUSED,
22080 int for_return ATTRIBUTE_UNUSED)
22082 if (GET_MODE_CLASS (mode) == MODE_INT
22083 && GET_MODE_SIZE (mode) < 4)
22084 return SImode;
22086 return mode;
22089 /* AAPCS based ABIs use short enums by default. */
22091 static bool
22092 arm_default_short_enums (void)
22094 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22098 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22100 static bool
22101 arm_align_anon_bitfield (void)
22103 return TARGET_AAPCS_BASED;
22107 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22109 static tree
22110 arm_cxx_guard_type (void)
22112 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22115 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22116 has an accumulator dependency on the result of the producer (a
22117 multiplication instruction) and no other dependency on that result. */
22119 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22121 rtx mul = PATTERN (producer);
22122 rtx mac = PATTERN (consumer);
22123 rtx mul_result;
22124 rtx mac_op0, mac_op1, mac_acc;
22126 if (GET_CODE (mul) == COND_EXEC)
22127 mul = COND_EXEC_CODE (mul);
22128 if (GET_CODE (mac) == COND_EXEC)
22129 mac = COND_EXEC_CODE (mac);
22131 /* Check that mul is of the form (set (...) (mult ...))
22132 and mla is of the form (set (...) (plus (mult ...) (...))). */
22133 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22134 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22135 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22136 return 0;
22138 mul_result = XEXP (mul, 0);
22139 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22140 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22141 mac_acc = XEXP (XEXP (mac, 1), 1);
22143 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22144 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22145 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22149 /* The EABI says test the least significant bit of a guard variable. */
22151 static bool
22152 arm_cxx_guard_mask_bit (void)
22154 return TARGET_AAPCS_BASED;
22158 /* The EABI specifies that all array cookies are 8 bytes long. */
22160 static tree
22161 arm_get_cookie_size (tree type)
22163 tree size;
22165 if (!TARGET_AAPCS_BASED)
22166 return default_cxx_get_cookie_size (type);
22168 size = build_int_cst (sizetype, 8);
22169 return size;
22173 /* The EABI says that array cookies should also contain the element size. */
22175 static bool
22176 arm_cookie_has_size (void)
22178 return TARGET_AAPCS_BASED;
22182 /* The EABI says constructors and destructors should return a pointer to
22183 the object constructed/destroyed. */
22185 static bool
22186 arm_cxx_cdtor_returns_this (void)
22188 return TARGET_AAPCS_BASED;
22191 /* The EABI says that an inline function may never be the key
22192 method. */
22194 static bool
22195 arm_cxx_key_method_may_be_inline (void)
22197 return !TARGET_AAPCS_BASED;
22200 static void
22201 arm_cxx_determine_class_data_visibility (tree decl)
22203 if (!TARGET_AAPCS_BASED
22204 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22205 return;
22207 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22208 is exported. However, on systems without dynamic vague linkage,
22209 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22210 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22211 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22212 else
22213 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22214 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22217 static bool
22218 arm_cxx_class_data_always_comdat (void)
22220 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22221 vague linkage if the class has no key function. */
22222 return !TARGET_AAPCS_BASED;
22226 /* The EABI says __aeabi_atexit should be used to register static
22227 destructors. */
22229 static bool
22230 arm_cxx_use_aeabi_atexit (void)
22232 return TARGET_AAPCS_BASED;
22236 void
22237 arm_set_return_address (rtx source, rtx scratch)
22239 arm_stack_offsets *offsets;
22240 HOST_WIDE_INT delta;
22241 rtx addr;
22242 unsigned long saved_regs;
22244 offsets = arm_get_frame_offsets ();
22245 saved_regs = offsets->saved_regs_mask;
22247 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22248 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22249 else
22251 if (frame_pointer_needed)
22252 addr = plus_constant(hard_frame_pointer_rtx, -4);
22253 else
22255 /* LR will be the first saved register. */
22256 delta = offsets->outgoing_args - (offsets->frame + 4);
22259 if (delta >= 4096)
22261 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22262 GEN_INT (delta & ~4095)));
22263 addr = scratch;
22264 delta &= 4095;
22266 else
22267 addr = stack_pointer_rtx;
22269 addr = plus_constant (addr, delta);
22271 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22276 void
22277 thumb_set_return_address (rtx source, rtx scratch)
22279 arm_stack_offsets *offsets;
22280 HOST_WIDE_INT delta;
22281 HOST_WIDE_INT limit;
22282 int reg;
22283 rtx addr;
22284 unsigned long mask;
22286 emit_use (source);
22288 offsets = arm_get_frame_offsets ();
22289 mask = offsets->saved_regs_mask;
22290 if (mask & (1 << LR_REGNUM))
22292 limit = 1024;
22293 /* Find the saved regs. */
22294 if (frame_pointer_needed)
22296 delta = offsets->soft_frame - offsets->saved_args;
22297 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22298 if (TARGET_THUMB1)
22299 limit = 128;
22301 else
22303 delta = offsets->outgoing_args - offsets->saved_args;
22304 reg = SP_REGNUM;
22306 /* Allow for the stack frame. */
22307 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22308 delta -= 16;
22309 /* The link register is always the first saved register. */
22310 delta -= 4;
22312 /* Construct the address. */
22313 addr = gen_rtx_REG (SImode, reg);
22314 if (delta > limit)
22316 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22317 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22318 addr = scratch;
22320 else
22321 addr = plus_constant (addr, delta);
22323 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22325 else
22326 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22329 /* Implements target hook vector_mode_supported_p. */
22330 bool
22331 arm_vector_mode_supported_p (enum machine_mode mode)
22333 /* Neon also supports V2SImode, etc. listed in the clause below. */
22334 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22335 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22336 return true;
22338 if ((TARGET_NEON || TARGET_IWMMXT)
22339 && ((mode == V2SImode)
22340 || (mode == V4HImode)
22341 || (mode == V8QImode)))
22342 return true;
22344 return false;
22347 /* Implements target hook array_mode_supported_p. */
22349 static bool
22350 arm_array_mode_supported_p (enum machine_mode mode,
22351 unsigned HOST_WIDE_INT nelems)
22353 if (TARGET_NEON
22354 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
22355 && (nelems >= 2 && nelems <= 4))
22356 return true;
22358 return false;
22361 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22362 registers when autovectorizing for Neon, at least until multiple vector
22363 widths are supported properly by the middle-end. */
22365 static enum machine_mode
22366 arm_preferred_simd_mode (enum machine_mode mode)
22368 if (TARGET_NEON)
22369 switch (mode)
22371 case SFmode:
22372 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22373 case SImode:
22374 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22375 case HImode:
22376 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22377 case QImode:
22378 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22379 case DImode:
22380 if (TARGET_NEON_VECTORIZE_QUAD)
22381 return V2DImode;
22382 break;
22384 default:;
22387 if (TARGET_REALLY_IWMMXT)
22388 switch (mode)
22390 case SImode:
22391 return V2SImode;
22392 case HImode:
22393 return V4HImode;
22394 case QImode:
22395 return V8QImode;
22397 default:;
22400 return word_mode;
22403 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22405 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22406 using r0-r4 for function arguments, r7 for the stack frame and don't have
22407 enough left over to do doubleword arithmetic. For Thumb-2 all the
22408 potentially problematic instructions accept high registers so this is not
22409 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22410 that require many low registers. */
22411 static bool
22412 arm_class_likely_spilled_p (reg_class_t rclass)
22414 if ((TARGET_THUMB1 && rclass == LO_REGS)
22415 || rclass == CC_REG)
22416 return true;
22418 return false;
22421 /* Implements target hook small_register_classes_for_mode_p. */
22422 bool
22423 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22425 return TARGET_THUMB1;
22428 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22429 ARM insns and therefore guarantee that the shift count is modulo 256.
22430 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22431 guarantee no particular behavior for out-of-range counts. */
22433 static unsigned HOST_WIDE_INT
22434 arm_shift_truncation_mask (enum machine_mode mode)
22436 return mode == SImode ? 255 : 0;
22440 /* Map internal gcc register numbers to DWARF2 register numbers. */
22442 unsigned int
22443 arm_dbx_register_number (unsigned int regno)
22445 if (regno < 16)
22446 return regno;
22448 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22449 compatibility. The EABI defines them as registers 96-103. */
22450 if (IS_FPA_REGNUM (regno))
22451 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22453 if (IS_VFP_REGNUM (regno))
22455 /* See comment in arm_dwarf_register_span. */
22456 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22457 return 64 + regno - FIRST_VFP_REGNUM;
22458 else
22459 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22462 if (IS_IWMMXT_GR_REGNUM (regno))
22463 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22465 if (IS_IWMMXT_REGNUM (regno))
22466 return 112 + regno - FIRST_IWMMXT_REGNUM;
22468 gcc_unreachable ();
22471 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22472 GCC models tham as 64 32-bit registers, so we need to describe this to
22473 the DWARF generation code. Other registers can use the default. */
22474 static rtx
22475 arm_dwarf_register_span (rtx rtl)
22477 unsigned regno;
22478 int nregs;
22479 int i;
22480 rtx p;
22482 regno = REGNO (rtl);
22483 if (!IS_VFP_REGNUM (regno))
22484 return NULL_RTX;
22486 /* XXX FIXME: The EABI defines two VFP register ranges:
22487 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22488 256-287: D0-D31
22489 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22490 corresponding D register. Until GDB supports this, we shall use the
22491 legacy encodings. We also use these encodings for D0-D15 for
22492 compatibility with older debuggers. */
22493 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22494 return NULL_RTX;
22496 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22497 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22498 regno = (regno - FIRST_VFP_REGNUM) / 2;
22499 for (i = 0; i < nregs; i++)
22500 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22502 return p;
22505 #if ARM_UNWIND_INFO
22506 /* Emit unwind directives for a store-multiple instruction or stack pointer
22507 push during alignment.
22508 These should only ever be generated by the function prologue code, so
22509 expect them to have a particular form. */
22511 static void
22512 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22514 int i;
22515 HOST_WIDE_INT offset;
22516 HOST_WIDE_INT nregs;
22517 int reg_size;
22518 unsigned reg;
22519 unsigned lastreg;
22520 rtx e;
22522 e = XVECEXP (p, 0, 0);
22523 if (GET_CODE (e) != SET)
22524 abort ();
22526 /* First insn will adjust the stack pointer. */
22527 if (GET_CODE (e) != SET
22528 || GET_CODE (XEXP (e, 0)) != REG
22529 || REGNO (XEXP (e, 0)) != SP_REGNUM
22530 || GET_CODE (XEXP (e, 1)) != PLUS)
22531 abort ();
22533 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22534 nregs = XVECLEN (p, 0) - 1;
22536 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22537 if (reg < 16)
22539 /* The function prologue may also push pc, but not annotate it as it is
22540 never restored. We turn this into a stack pointer adjustment. */
22541 if (nregs * 4 == offset - 4)
22543 fprintf (asm_out_file, "\t.pad #4\n");
22544 offset -= 4;
22546 reg_size = 4;
22547 fprintf (asm_out_file, "\t.save {");
22549 else if (IS_VFP_REGNUM (reg))
22551 reg_size = 8;
22552 fprintf (asm_out_file, "\t.vsave {");
22554 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22556 /* FPA registers are done differently. */
22557 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22558 return;
22560 else
22561 /* Unknown register type. */
22562 abort ();
22564 /* If the stack increment doesn't match the size of the saved registers,
22565 something has gone horribly wrong. */
22566 if (offset != nregs * reg_size)
22567 abort ();
22569 offset = 0;
22570 lastreg = 0;
22571 /* The remaining insns will describe the stores. */
22572 for (i = 1; i <= nregs; i++)
22574 /* Expect (set (mem <addr>) (reg)).
22575 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22576 e = XVECEXP (p, 0, i);
22577 if (GET_CODE (e) != SET
22578 || GET_CODE (XEXP (e, 0)) != MEM
22579 || GET_CODE (XEXP (e, 1)) != REG)
22580 abort ();
22582 reg = REGNO (XEXP (e, 1));
22583 if (reg < lastreg)
22584 abort ();
22586 if (i != 1)
22587 fprintf (asm_out_file, ", ");
22588 /* We can't use %r for vfp because we need to use the
22589 double precision register names. */
22590 if (IS_VFP_REGNUM (reg))
22591 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22592 else
22593 asm_fprintf (asm_out_file, "%r", reg);
22595 #ifdef ENABLE_CHECKING
22596 /* Check that the addresses are consecutive. */
22597 e = XEXP (XEXP (e, 0), 0);
22598 if (GET_CODE (e) == PLUS)
22600 offset += reg_size;
22601 if (GET_CODE (XEXP (e, 0)) != REG
22602 || REGNO (XEXP (e, 0)) != SP_REGNUM
22603 || GET_CODE (XEXP (e, 1)) != CONST_INT
22604 || offset != INTVAL (XEXP (e, 1)))
22605 abort ();
22607 else if (i != 1
22608 || GET_CODE (e) != REG
22609 || REGNO (e) != SP_REGNUM)
22610 abort ();
22611 #endif
22613 fprintf (asm_out_file, "}\n");
22616 /* Emit unwind directives for a SET. */
22618 static void
22619 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22621 rtx e0;
22622 rtx e1;
22623 unsigned reg;
22625 e0 = XEXP (p, 0);
22626 e1 = XEXP (p, 1);
22627 switch (GET_CODE (e0))
22629 case MEM:
22630 /* Pushing a single register. */
22631 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22632 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22633 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22634 abort ();
22636 asm_fprintf (asm_out_file, "\t.save ");
22637 if (IS_VFP_REGNUM (REGNO (e1)))
22638 asm_fprintf(asm_out_file, "{d%d}\n",
22639 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22640 else
22641 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22642 break;
22644 case REG:
22645 if (REGNO (e0) == SP_REGNUM)
22647 /* A stack increment. */
22648 if (GET_CODE (e1) != PLUS
22649 || GET_CODE (XEXP (e1, 0)) != REG
22650 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22651 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22652 abort ();
22654 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22655 -INTVAL (XEXP (e1, 1)));
22657 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22659 HOST_WIDE_INT offset;
22661 if (GET_CODE (e1) == PLUS)
22663 if (GET_CODE (XEXP (e1, 0)) != REG
22664 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22665 abort ();
22666 reg = REGNO (XEXP (e1, 0));
22667 offset = INTVAL (XEXP (e1, 1));
22668 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22669 HARD_FRAME_POINTER_REGNUM, reg,
22670 offset);
22672 else if (GET_CODE (e1) == REG)
22674 reg = REGNO (e1);
22675 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22676 HARD_FRAME_POINTER_REGNUM, reg);
22678 else
22679 abort ();
22681 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22683 /* Move from sp to reg. */
22684 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22686 else if (GET_CODE (e1) == PLUS
22687 && GET_CODE (XEXP (e1, 0)) == REG
22688 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22689 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22691 /* Set reg to offset from sp. */
22692 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22693 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22695 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22697 /* Stack pointer save before alignment. */
22698 reg = REGNO (e0);
22699 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22700 reg + 0x90, reg);
22702 else
22703 abort ();
22704 break;
22706 default:
22707 abort ();
22712 /* Emit unwind directives for the given insn. */
22714 static void
22715 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22717 rtx pat;
22719 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22720 return;
22722 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22723 && (TREE_NOTHROW (current_function_decl)
22724 || crtl->all_throwers_are_sibcalls))
22725 return;
22727 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22728 return;
22730 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22731 if (pat)
22732 pat = XEXP (pat, 0);
22733 else
22734 pat = PATTERN (insn);
22736 switch (GET_CODE (pat))
22738 case SET:
22739 arm_unwind_emit_set (asm_out_file, pat);
22740 break;
22742 case SEQUENCE:
22743 /* Store multiple. */
22744 arm_unwind_emit_sequence (asm_out_file, pat);
22745 break;
22747 default:
22748 abort();
22753 /* Output a reference from a function exception table to the type_info
22754 object X. The EABI specifies that the symbol should be relocated by
22755 an R_ARM_TARGET2 relocation. */
22757 static bool
22758 arm_output_ttype (rtx x)
22760 fputs ("\t.word\t", asm_out_file);
22761 output_addr_const (asm_out_file, x);
22762 /* Use special relocations for symbol references. */
22763 if (GET_CODE (x) != CONST_INT)
22764 fputs ("(TARGET2)", asm_out_file);
22765 fputc ('\n', asm_out_file);
22767 return TRUE;
22770 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22772 static void
22773 arm_asm_emit_except_personality (rtx personality)
22775 fputs ("\t.personality\t", asm_out_file);
22776 output_addr_const (asm_out_file, personality);
22777 fputc ('\n', asm_out_file);
22780 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22782 static void
22783 arm_asm_init_sections (void)
22785 exception_section = get_unnamed_section (0, output_section_asm_op,
22786 "\t.handlerdata");
22788 #endif /* ARM_UNWIND_INFO */
22790 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22792 static enum unwind_info_type
22793 arm_except_unwind_info (struct gcc_options *opts)
22795 /* Honor the --enable-sjlj-exceptions configure switch. */
22796 #ifdef CONFIG_SJLJ_EXCEPTIONS
22797 if (CONFIG_SJLJ_EXCEPTIONS)
22798 return UI_SJLJ;
22799 #endif
22801 /* If not using ARM EABI unwind tables... */
22802 if (ARM_UNWIND_INFO)
22804 /* For simplicity elsewhere in this file, indicate that all unwind
22805 info is disabled if we're not emitting unwind tables. */
22806 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22807 return UI_NONE;
22808 else
22809 return UI_TARGET;
22812 /* ... we use sjlj exceptions for backwards compatibility. */
22813 return UI_SJLJ;
22817 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22818 stack alignment. */
22820 static void
22821 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22823 rtx unspec = SET_SRC (pattern);
22824 gcc_assert (GET_CODE (unspec) == UNSPEC);
22826 switch (index)
22828 case UNSPEC_STACK_ALIGN:
22829 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22830 put anything on the stack, so hopefully it won't matter.
22831 CFA = SP will be correct after alignment. */
22832 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22833 SET_DEST (pattern));
22834 break;
22835 default:
22836 gcc_unreachable ();
22841 /* Output unwind directives for the start/end of a function. */
22843 void
22844 arm_output_fn_unwind (FILE * f, bool prologue)
22846 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22847 return;
22849 if (prologue)
22850 fputs ("\t.fnstart\n", f);
22851 else
22853 /* If this function will never be unwound, then mark it as such.
22854 The came condition is used in arm_unwind_emit to suppress
22855 the frame annotations. */
22856 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22857 && (TREE_NOTHROW (current_function_decl)
22858 || crtl->all_throwers_are_sibcalls))
22859 fputs("\t.cantunwind\n", f);
22861 fputs ("\t.fnend\n", f);
22865 static bool
22866 arm_emit_tls_decoration (FILE *fp, rtx x)
22868 enum tls_reloc reloc;
22869 rtx val;
22871 val = XVECEXP (x, 0, 0);
22872 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22874 output_addr_const (fp, val);
22876 switch (reloc)
22878 case TLS_GD32:
22879 fputs ("(tlsgd)", fp);
22880 break;
22881 case TLS_LDM32:
22882 fputs ("(tlsldm)", fp);
22883 break;
22884 case TLS_LDO32:
22885 fputs ("(tlsldo)", fp);
22886 break;
22887 case TLS_IE32:
22888 fputs ("(gottpoff)", fp);
22889 break;
22890 case TLS_LE32:
22891 fputs ("(tpoff)", fp);
22892 break;
22893 default:
22894 gcc_unreachable ();
22897 switch (reloc)
22899 case TLS_GD32:
22900 case TLS_LDM32:
22901 case TLS_IE32:
22902 fputs (" + (. - ", fp);
22903 output_addr_const (fp, XVECEXP (x, 0, 2));
22904 fputs (" - ", fp);
22905 output_addr_const (fp, XVECEXP (x, 0, 3));
22906 fputc (')', fp);
22907 break;
22908 default:
22909 break;
22912 return TRUE;
22915 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22917 static void
22918 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22920 gcc_assert (size == 4);
22921 fputs ("\t.word\t", file);
22922 output_addr_const (file, x);
22923 fputs ("(tlsldo)", file);
22926 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22928 static bool
22929 arm_output_addr_const_extra (FILE *fp, rtx x)
22931 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22932 return arm_emit_tls_decoration (fp, x);
22933 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22935 char label[256];
22936 int labelno = INTVAL (XVECEXP (x, 0, 0));
22938 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22939 assemble_name_raw (fp, label);
22941 return TRUE;
22943 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22945 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22946 if (GOT_PCREL)
22947 fputs ("+.", fp);
22948 fputs ("-(", fp);
22949 output_addr_const (fp, XVECEXP (x, 0, 0));
22950 fputc (')', fp);
22951 return TRUE;
22953 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22955 output_addr_const (fp, XVECEXP (x, 0, 0));
22956 if (GOT_PCREL)
22957 fputs ("+.", fp);
22958 fputs ("-(", fp);
22959 output_addr_const (fp, XVECEXP (x, 0, 1));
22960 fputc (')', fp);
22961 return TRUE;
22963 else if (GET_CODE (x) == CONST_VECTOR)
22964 return arm_emit_vector_const (fp, x);
22966 return FALSE;
22969 /* Output assembly for a shift instruction.
22970 SET_FLAGS determines how the instruction modifies the condition codes.
22971 0 - Do not set condition codes.
22972 1 - Set condition codes.
22973 2 - Use smallest instruction. */
22974 const char *
22975 arm_output_shift(rtx * operands, int set_flags)
22977 char pattern[100];
22978 static const char flag_chars[3] = {'?', '.', '!'};
22979 const char *shift;
22980 HOST_WIDE_INT val;
22981 char c;
22983 c = flag_chars[set_flags];
22984 if (TARGET_UNIFIED_ASM)
22986 shift = shift_op(operands[3], &val);
22987 if (shift)
22989 if (val != -1)
22990 operands[2] = GEN_INT(val);
22991 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22993 else
22994 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22996 else
22997 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22998 output_asm_insn (pattern, operands);
22999 return "";
23002 /* Output a Thumb-1 casesi dispatch sequence. */
23003 const char *
23004 thumb1_output_casesi (rtx *operands)
23006 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
23008 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23010 switch (GET_MODE(diff_vec))
23012 case QImode:
23013 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23014 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23015 case HImode:
23016 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23017 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23018 case SImode:
23019 return "bl\t%___gnu_thumb1_case_si";
23020 default:
23021 gcc_unreachable ();
23025 /* Output a Thumb-2 casesi instruction. */
23026 const char *
23027 thumb2_output_casesi (rtx *operands)
23029 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
23031 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23033 output_asm_insn ("cmp\t%0, %1", operands);
23034 output_asm_insn ("bhi\t%l3", operands);
23035 switch (GET_MODE(diff_vec))
23037 case QImode:
23038 return "tbb\t[%|pc, %0]";
23039 case HImode:
23040 return "tbh\t[%|pc, %0, lsl #1]";
23041 case SImode:
23042 if (flag_pic)
23044 output_asm_insn ("adr\t%4, %l2", operands);
23045 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
23046 output_asm_insn ("add\t%4, %4, %5", operands);
23047 return "bx\t%4";
23049 else
23051 output_asm_insn ("adr\t%4, %l2", operands);
23052 return "ldr\t%|pc, [%4, %0, lsl #2]";
23054 default:
23055 gcc_unreachable ();
23059 /* Most ARM cores are single issue, but some newer ones can dual issue.
23060 The scheduler descriptions rely on this being correct. */
23061 static int
23062 arm_issue_rate (void)
23064 switch (arm_tune)
23066 case cortexr4:
23067 case cortexr4f:
23068 case cortexr5:
23069 case cortexa5:
23070 case cortexa8:
23071 case cortexa9:
23072 case fa726te:
23073 return 2;
23075 default:
23076 return 1;
23080 /* A table and a function to perform ARM-specific name mangling for
23081 NEON vector types in order to conform to the AAPCS (see "Procedure
23082 Call Standard for the ARM Architecture", Appendix A). To qualify
23083 for emission with the mangled names defined in that document, a
23084 vector type must not only be of the correct mode but also be
23085 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23086 typedef struct
23088 enum machine_mode mode;
23089 const char *element_type_name;
23090 const char *aapcs_name;
23091 } arm_mangle_map_entry;
23093 static arm_mangle_map_entry arm_mangle_map[] = {
23094 /* 64-bit containerized types. */
23095 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23096 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23097 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23098 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23099 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23100 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23101 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23102 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23103 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23104 /* 128-bit containerized types. */
23105 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23106 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23107 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23108 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23109 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23110 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23111 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23112 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23113 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23114 { VOIDmode, NULL, NULL }
23117 const char *
23118 arm_mangle_type (const_tree type)
23120 arm_mangle_map_entry *pos = arm_mangle_map;
23122 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23123 has to be managled as if it is in the "std" namespace. */
23124 if (TARGET_AAPCS_BASED
23125 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23127 static bool warned;
23128 if (!warned && warn_psabi && !in_system_header)
23130 warned = true;
23131 inform (input_location,
23132 "the mangling of %<va_list%> has changed in GCC 4.4");
23134 return "St9__va_list";
23137 /* Half-precision float. */
23138 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23139 return "Dh";
23141 if (TREE_CODE (type) != VECTOR_TYPE)
23142 return NULL;
23144 /* Check the mode of the vector type, and the name of the vector
23145 element type, against the table. */
23146 while (pos->mode != VOIDmode)
23148 tree elt_type = TREE_TYPE (type);
23150 if (pos->mode == TYPE_MODE (type)
23151 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23152 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23153 pos->element_type_name))
23154 return pos->aapcs_name;
23156 pos++;
23159 /* Use the default mangling for unrecognized (possibly user-defined)
23160 vector types. */
23161 return NULL;
23164 /* Order of allocation of core registers for Thumb: this allocation is
23165 written over the corresponding initial entries of the array
23166 initialized with REG_ALLOC_ORDER. We allocate all low registers
23167 first. Saving and restoring a low register is usually cheaper than
23168 using a call-clobbered high register. */
23170 static const int thumb_core_reg_alloc_order[] =
23172 3, 2, 1, 0, 4, 5, 6, 7,
23173 14, 12, 8, 9, 10, 11, 13, 15
23176 /* Adjust register allocation order when compiling for Thumb. */
23178 void
23179 arm_order_regs_for_local_alloc (void)
23181 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23182 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23183 if (TARGET_THUMB)
23184 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23185 sizeof (thumb_core_reg_alloc_order));
23188 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23190 bool
23191 arm_frame_pointer_required (void)
23193 return (cfun->has_nonlocal_label
23194 || SUBTARGET_FRAME_POINTER_REQUIRED
23195 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23198 /* Only thumb1 can't support conditional execution, so return true if
23199 the target is not thumb1. */
23200 static bool
23201 arm_have_conditional_execution (void)
23203 return !TARGET_THUMB1;
23206 /* Legitimize a memory reference for sync primitive implemented using
23207 ldrex / strex. We currently force the form of the reference to be
23208 indirect without offset. We do not yet support the indirect offset
23209 addressing supported by some ARM targets for these
23210 instructions. */
23211 static rtx
23212 arm_legitimize_sync_memory (rtx memory)
23214 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23215 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23217 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23218 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23219 return legitimate_memory;
23222 /* An instruction emitter. */
23223 typedef void (* emit_f) (int label, const char *, rtx *);
23225 /* An instruction emitter that emits via the conventional
23226 output_asm_insn. */
23227 static void
23228 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23230 output_asm_insn (pattern, operands);
23233 /* Count the number of emitted synchronization instructions. */
23234 static unsigned arm_insn_count;
23236 /* An emitter that counts emitted instructions but does not actually
23237 emit instruction into the instruction stream. */
23238 static void
23239 arm_count (int label,
23240 const char *pattern ATTRIBUTE_UNUSED,
23241 rtx *operands ATTRIBUTE_UNUSED)
23243 if (! label)
23244 ++ arm_insn_count;
23247 /* Construct a pattern using conventional output formatting and feed
23248 it to output_asm_insn. Provides a mechanism to construct the
23249 output pattern on the fly. Note the hard limit on the pattern
23250 buffer size. */
23251 static void ATTRIBUTE_PRINTF_4
23252 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23253 const char *pattern, ...)
23255 va_list ap;
23256 char buffer[256];
23258 va_start (ap, pattern);
23259 vsprintf (buffer, pattern, ap);
23260 va_end (ap);
23261 emit (label, buffer, operands);
23264 /* Emit the memory barrier instruction, if any, provided by this
23265 target to a specified emitter. */
23266 static void
23267 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23269 if (TARGET_HAVE_DMB)
23271 /* Note we issue a system level barrier. We should consider
23272 issuing a inner shareabilty zone barrier here instead, ie.
23273 "DMB ISH". */
23274 emit (0, "dmb\tsy", operands);
23275 return;
23278 if (TARGET_HAVE_DMB_MCR)
23280 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23281 return;
23284 gcc_unreachable ();
23287 /* Emit the memory barrier instruction, if any, provided by this
23288 target. */
23289 const char *
23290 arm_output_memory_barrier (rtx *operands)
23292 arm_process_output_memory_barrier (arm_emit, operands);
23293 return "";
23296 /* Helper to figure out the instruction suffix required on ldrex/strex
23297 for operations on an object of the specified mode. */
23298 static const char *
23299 arm_ldrex_suffix (enum machine_mode mode)
23301 switch (mode)
23303 case QImode: return "b";
23304 case HImode: return "h";
23305 case SImode: return "";
23306 case DImode: return "d";
23307 default:
23308 gcc_unreachable ();
23310 return "";
23313 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23314 mode. */
23315 static void
23316 arm_output_ldrex (emit_f emit,
23317 enum machine_mode mode,
23318 rtx target,
23319 rtx memory)
23321 const char *suffix = arm_ldrex_suffix (mode);
23322 rtx operands[2];
23324 operands[0] = target;
23325 operands[1] = memory;
23326 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23329 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23330 mode. */
23331 static void
23332 arm_output_strex (emit_f emit,
23333 enum machine_mode mode,
23334 const char *cc,
23335 rtx result,
23336 rtx value,
23337 rtx memory)
23339 const char *suffix = arm_ldrex_suffix (mode);
23340 rtx operands[3];
23342 operands[0] = result;
23343 operands[1] = value;
23344 operands[2] = memory;
23345 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23346 cc);
23349 /* Helper to emit a two operand instruction. */
23350 static void
23351 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23353 rtx operands[2];
23355 operands[0] = d;
23356 operands[1] = s;
23357 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23360 /* Helper to emit a three operand instruction. */
23361 static void
23362 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23364 rtx operands[3];
23366 operands[0] = d;
23367 operands[1] = a;
23368 operands[2] = b;
23369 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23372 /* Emit a load store exclusive synchronization loop.
23375 old_value = [mem]
23376 if old_value != required_value
23377 break;
23378 t1 = sync_op (old_value, new_value)
23379 [mem] = t1, t2 = [0|1]
23380 while ! t2
23382 Note:
23383 t1 == t2 is not permitted
23384 t1 == old_value is permitted
23386 required_value:
23388 RTX register or const_int representing the required old_value for
23389 the modify to continue, if NULL no comparsion is performed. */
23390 static void
23391 arm_output_sync_loop (emit_f emit,
23392 enum machine_mode mode,
23393 rtx old_value,
23394 rtx memory,
23395 rtx required_value,
23396 rtx new_value,
23397 rtx t1,
23398 rtx t2,
23399 enum attr_sync_op sync_op,
23400 int early_barrier_required)
23402 rtx operands[1];
23404 gcc_assert (t1 != t2);
23406 if (early_barrier_required)
23407 arm_process_output_memory_barrier (emit, NULL);
23409 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23411 arm_output_ldrex (emit, mode, old_value, memory);
23413 if (required_value)
23415 rtx operands[2];
23417 operands[0] = old_value;
23418 operands[1] = required_value;
23419 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23420 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23423 switch (sync_op)
23425 case SYNC_OP_ADD:
23426 arm_output_op3 (emit, "add", t1, old_value, new_value);
23427 break;
23429 case SYNC_OP_SUB:
23430 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23431 break;
23433 case SYNC_OP_IOR:
23434 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23435 break;
23437 case SYNC_OP_XOR:
23438 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23439 break;
23441 case SYNC_OP_AND:
23442 arm_output_op3 (emit,"and", t1, old_value, new_value);
23443 break;
23445 case SYNC_OP_NAND:
23446 arm_output_op3 (emit, "and", t1, old_value, new_value);
23447 arm_output_op2 (emit, "mvn", t1, t1);
23448 break;
23450 case SYNC_OP_NONE:
23451 t1 = new_value;
23452 break;
23455 if (t2)
23457 arm_output_strex (emit, mode, "", t2, t1, memory);
23458 operands[0] = t2;
23459 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23460 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23461 LOCAL_LABEL_PREFIX);
23463 else
23465 /* Use old_value for the return value because for some operations
23466 the old_value can easily be restored. This saves one register. */
23467 arm_output_strex (emit, mode, "", old_value, t1, memory);
23468 operands[0] = old_value;
23469 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23470 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23471 LOCAL_LABEL_PREFIX);
23473 switch (sync_op)
23475 case SYNC_OP_ADD:
23476 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23477 break;
23479 case SYNC_OP_SUB:
23480 arm_output_op3 (emit, "add", old_value, t1, new_value);
23481 break;
23483 case SYNC_OP_XOR:
23484 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23485 break;
23487 case SYNC_OP_NONE:
23488 arm_output_op2 (emit, "mov", old_value, required_value);
23489 break;
23491 default:
23492 gcc_unreachable ();
23496 arm_process_output_memory_barrier (emit, NULL);
23497 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23500 static rtx
23501 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23503 if (index > 0)
23504 default_value = operands[index - 1];
23506 return default_value;
23509 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23510 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23512 /* Extract the operands for a synchroniztion instruction from the
23513 instructions attributes and emit the instruction. */
23514 static void
23515 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23517 rtx result, memory, required_value, new_value, t1, t2;
23518 int early_barrier;
23519 enum machine_mode mode;
23520 enum attr_sync_op sync_op;
23522 result = FETCH_SYNC_OPERAND(result, 0);
23523 memory = FETCH_SYNC_OPERAND(memory, 0);
23524 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23525 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23526 t1 = FETCH_SYNC_OPERAND(t1, 0);
23527 t2 = FETCH_SYNC_OPERAND(t2, 0);
23528 early_barrier =
23529 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23530 sync_op = get_attr_sync_op (insn);
23531 mode = GET_MODE (memory);
23533 arm_output_sync_loop (emit, mode, result, memory, required_value,
23534 new_value, t1, t2, sync_op, early_barrier);
23537 /* Emit a synchronization instruction loop. */
23538 const char *
23539 arm_output_sync_insn (rtx insn, rtx *operands)
23541 arm_process_output_sync_insn (arm_emit, insn, operands);
23542 return "";
23545 /* Count the number of machine instruction that will be emitted for a
23546 synchronization instruction. Note that the emitter used does not
23547 emit instructions, it just counts instructions being carefull not
23548 to count labels. */
23549 unsigned int
23550 arm_sync_loop_insns (rtx insn, rtx *operands)
23552 arm_insn_count = 0;
23553 arm_process_output_sync_insn (arm_count, insn, operands);
23554 return arm_insn_count;
23557 /* Helper to call a target sync instruction generator, dealing with
23558 the variation in operands required by the different generators. */
23559 static rtx
23560 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23561 rtx memory, rtx required_value, rtx new_value)
23563 switch (generator->op)
23565 case arm_sync_generator_omn:
23566 gcc_assert (! required_value);
23567 return generator->u.omn (old_value, memory, new_value);
23569 case arm_sync_generator_omrn:
23570 gcc_assert (required_value);
23571 return generator->u.omrn (old_value, memory, required_value, new_value);
23574 return NULL;
23577 /* Expand a synchronization loop. The synchronization loop is expanded
23578 as an opaque block of instructions in order to ensure that we do
23579 not subsequently get extraneous memory accesses inserted within the
23580 critical region. The exclusive access property of ldrex/strex is
23581 only guaranteed in there are no intervening memory accesses. */
23582 void
23583 arm_expand_sync (enum machine_mode mode,
23584 struct arm_sync_generator *generator,
23585 rtx target, rtx memory, rtx required_value, rtx new_value)
23587 if (target == NULL)
23588 target = gen_reg_rtx (mode);
23590 memory = arm_legitimize_sync_memory (memory);
23591 if (mode != SImode)
23593 rtx load_temp = gen_reg_rtx (SImode);
23595 if (required_value)
23596 required_value = convert_modes (SImode, mode, required_value, true);
23598 new_value = convert_modes (SImode, mode, new_value, true);
23599 emit_insn (arm_call_generator (generator, load_temp, memory,
23600 required_value, new_value));
23601 emit_move_insn (target, gen_lowpart (mode, load_temp));
23603 else
23605 emit_insn (arm_call_generator (generator, target, memory, required_value,
23606 new_value));
23610 static unsigned int
23611 arm_autovectorize_vector_sizes (void)
23613 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
23616 static bool
23617 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23619 /* Vectors which aren't in packed structures will not be less aligned than
23620 the natural alignment of their element type, so this is safe. */
23621 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23622 return !is_packed;
23624 return default_builtin_vector_alignment_reachable (type, is_packed);
23627 static bool
23628 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23629 const_tree type, int misalignment,
23630 bool is_packed)
23632 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23634 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23636 if (is_packed)
23637 return align == 1;
23639 /* If the misalignment is unknown, we should be able to handle the access
23640 so long as it is not to a member of a packed data structure. */
23641 if (misalignment == -1)
23642 return true;
23644 /* Return true if the misalignment is a multiple of the natural alignment
23645 of the vector's element type. This is probably always going to be
23646 true in practice, since we've already established that this isn't a
23647 packed access. */
23648 return ((misalignment % align) == 0);
23651 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23652 is_packed);
23655 static void
23656 arm_conditional_register_usage (void)
23658 int regno;
23660 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23662 for (regno = FIRST_FPA_REGNUM;
23663 regno <= LAST_FPA_REGNUM; ++regno)
23664 fixed_regs[regno] = call_used_regs[regno] = 1;
23667 if (TARGET_THUMB1 && optimize_size)
23669 /* When optimizing for size on Thumb-1, it's better not
23670 to use the HI regs, because of the overhead of
23671 stacking them. */
23672 for (regno = FIRST_HI_REGNUM;
23673 regno <= LAST_HI_REGNUM; ++regno)
23674 fixed_regs[regno] = call_used_regs[regno] = 1;
23677 /* The link register can be clobbered by any branch insn,
23678 but we have no way to track that at present, so mark
23679 it as unavailable. */
23680 if (TARGET_THUMB1)
23681 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23683 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23685 if (TARGET_MAVERICK)
23687 for (regno = FIRST_FPA_REGNUM;
23688 regno <= LAST_FPA_REGNUM; ++ regno)
23689 fixed_regs[regno] = call_used_regs[regno] = 1;
23690 for (regno = FIRST_CIRRUS_FP_REGNUM;
23691 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23693 fixed_regs[regno] = 0;
23694 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23697 if (TARGET_VFP)
23699 /* VFPv3 registers are disabled when earlier VFP
23700 versions are selected due to the definition of
23701 LAST_VFP_REGNUM. */
23702 for (regno = FIRST_VFP_REGNUM;
23703 regno <= LAST_VFP_REGNUM; ++ regno)
23705 fixed_regs[regno] = 0;
23706 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23707 || regno >= FIRST_VFP_REGNUM + 32;
23712 if (TARGET_REALLY_IWMMXT)
23714 regno = FIRST_IWMMXT_GR_REGNUM;
23715 /* The 2002/10/09 revision of the XScale ABI has wCG0
23716 and wCG1 as call-preserved registers. The 2002/11/21
23717 revision changed this so that all wCG registers are
23718 scratch registers. */
23719 for (regno = FIRST_IWMMXT_GR_REGNUM;
23720 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23721 fixed_regs[regno] = 0;
23722 /* The XScale ABI has wR0 - wR9 as scratch registers,
23723 the rest as call-preserved registers. */
23724 for (regno = FIRST_IWMMXT_REGNUM;
23725 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23727 fixed_regs[regno] = 0;
23728 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23732 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23734 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23735 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23737 else if (TARGET_APCS_STACK)
23739 fixed_regs[10] = 1;
23740 call_used_regs[10] = 1;
23742 /* -mcaller-super-interworking reserves r11 for calls to
23743 _interwork_r11_call_via_rN(). Making the register global
23744 is an easy way of ensuring that it remains valid for all
23745 calls. */
23746 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23747 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23749 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23750 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23751 if (TARGET_CALLER_INTERWORKING)
23752 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23754 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23757 static reg_class_t
23758 arm_preferred_rename_class (reg_class_t rclass)
23760 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23761 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23762 and code size can be reduced. */
23763 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23764 return LO_REGS;
23765 else
23766 return NO_REGS;
23769 /* Compute the atrribute "length" of insn "*push_multi".
23770 So this function MUST be kept in sync with that insn pattern. */
23772 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
23774 int i, regno, hi_reg;
23775 int num_saves = XVECLEN (parallel_op, 0);
23777 /* ARM mode. */
23778 if (TARGET_ARM)
23779 return 4;
23781 /* Thumb2 mode. */
23782 regno = REGNO (first_op);
23783 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23784 for (i = 1; i < num_saves && !hi_reg; i++)
23786 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
23787 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23790 if (!hi_reg)
23791 return 2;
23792 return 4;
23795 #include "gt-arm.h"