re PR target/47246 (Invalid immediate offset for Thumb VFP store regression)
[official-gcc.git] / gcc / config / arm / arm.c
blobb93756a8ef4a5cbc106d072864bf0225a36fe16f
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
96 int, HOST_WIDE_INT);
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
113 rtx);
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
125 #endif
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
137 const_tree, int);
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
144 tree);
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
166 tree, bool);
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
168 const_tree, bool);
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
170 const_tree, bool);
171 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
172 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
173 const_tree);
174 static int aapcs_select_return_coproc (const_tree, const_tree);
176 #ifdef OBJECT_FORMAT_ELF
177 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
178 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
179 #endif
180 #ifndef ARM_PE
181 static void arm_encode_section_info (tree, rtx, int);
182 #endif
184 static void arm_file_end (void);
185 static void arm_file_start (void);
187 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
188 tree, int *, int);
189 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
190 enum machine_mode, const_tree, bool);
191 static bool arm_promote_prototypes (const_tree);
192 static bool arm_default_short_enums (void);
193 static bool arm_align_anon_bitfield (void);
194 static bool arm_return_in_msb (const_tree);
195 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
196 static bool arm_return_in_memory (const_tree, const_tree);
197 #if ARM_UNWIND_INFO
198 static void arm_unwind_emit (FILE *, rtx);
199 static bool arm_output_ttype (rtx);
200 static void arm_asm_emit_except_personality (rtx);
201 static void arm_asm_init_sections (void);
202 #endif
203 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
204 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
205 static rtx arm_dwarf_register_span (rtx);
207 static tree arm_cxx_guard_type (void);
208 static bool arm_cxx_guard_mask_bit (void);
209 static tree arm_get_cookie_size (tree);
210 static bool arm_cookie_has_size (void);
211 static bool arm_cxx_cdtor_returns_this (void);
212 static bool arm_cxx_key_method_may_be_inline (void);
213 static void arm_cxx_determine_class_data_visibility (tree);
214 static bool arm_cxx_class_data_always_comdat (void);
215 static bool arm_cxx_use_aeabi_atexit (void);
216 static void arm_init_libfuncs (void);
217 static tree arm_build_builtin_va_list (void);
218 static void arm_expand_builtin_va_start (tree, rtx);
219 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
220 static void arm_option_override (void);
221 static bool arm_handle_option (size_t, const char *, int);
222 static void arm_target_help (void);
223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
224 static bool arm_cannot_copy_insn_p (rtx);
225 static bool arm_tls_symbol_p (rtx x);
226 static int arm_issue_rate (void);
227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
228 static bool arm_output_addr_const_extra (FILE *, rtx);
229 static bool arm_allocate_stack_slots_for_args (void);
230 static const char *arm_invalid_parameter_type (const_tree t);
231 static const char *arm_invalid_return_type (const_tree t);
232 static tree arm_promoted_type (const_tree t);
233 static tree arm_convert_to_type (tree type, tree expr);
234 static bool arm_scalar_mode_supported_p (enum machine_mode);
235 static bool arm_frame_pointer_required (void);
236 static bool arm_can_eliminate (const int, const int);
237 static void arm_asm_trampoline_template (FILE *);
238 static void arm_trampoline_init (rtx, tree, rtx);
239 static rtx arm_trampoline_adjust_address (rtx);
240 static rtx arm_pic_static_addr (rtx orig, rtx reg);
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
245 static bool arm_class_likely_spilled_p (reg_class_t);
246 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
247 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
248 const_tree type,
249 int misalignment,
250 bool is_packed);
251 static void arm_conditional_register_usage (void);
252 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
255 /* Table of machine attributes. */
256 static const struct attribute_spec arm_attribute_table[] =
258 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
259 /* Function calls made to this symbol must be done indirectly, because
260 it may lie outside of the 26 bit addressing range of a normal function
261 call. */
262 { "long_call", 0, 0, false, true, true, NULL },
263 /* Whereas these functions are always known to reside within the 26 bit
264 addressing range. */
265 { "short_call", 0, 0, false, true, true, NULL },
266 /* Specify the procedure call conventions for a function. */
267 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
268 /* Interrupt Service Routines have special prologue and epilogue requirements. */
269 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
270 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
271 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
272 #ifdef ARM_PE
273 /* ARM/PE has three new attributes:
274 interfacearm - ?
275 dllexport - for exporting a function/variable that will live in a dll
276 dllimport - for importing a function/variable from a dll
278 Microsoft allows multiple declspecs in one __declspec, separating
279 them with spaces. We do NOT support this. Instead, use __declspec
280 multiple times.
282 { "dllimport", 0, 0, true, false, false, NULL },
283 { "dllexport", 0, 0, true, false, false, NULL },
284 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
285 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
286 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
287 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
288 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
289 #endif
290 { NULL, 0, 0, false, false, false, NULL }
293 /* Set default optimization options. */
294 static const struct default_options arm_option_optimization_table[] =
296 /* Enable section anchors by default at -O1 or higher. */
297 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
298 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
299 { OPT_LEVELS_NONE, 0, NULL, 0 }
302 /* Initialize the GCC target structure. */
303 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
304 #undef TARGET_MERGE_DECL_ATTRIBUTES
305 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
306 #endif
308 #undef TARGET_LEGITIMIZE_ADDRESS
309 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
311 #undef TARGET_ATTRIBUTE_TABLE
312 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
314 #undef TARGET_ASM_FILE_START
315 #define TARGET_ASM_FILE_START arm_file_start
316 #undef TARGET_ASM_FILE_END
317 #define TARGET_ASM_FILE_END arm_file_end
319 #undef TARGET_ASM_ALIGNED_SI_OP
320 #define TARGET_ASM_ALIGNED_SI_OP NULL
321 #undef TARGET_ASM_INTEGER
322 #define TARGET_ASM_INTEGER arm_assemble_integer
324 #undef TARGET_PRINT_OPERAND
325 #define TARGET_PRINT_OPERAND arm_print_operand
326 #undef TARGET_PRINT_OPERAND_ADDRESS
327 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
328 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
329 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
331 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
332 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
334 #undef TARGET_ASM_FUNCTION_PROLOGUE
335 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
337 #undef TARGET_ASM_FUNCTION_EPILOGUE
338 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
340 #undef TARGET_DEFAULT_TARGET_FLAGS
341 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
342 #undef TARGET_HANDLE_OPTION
343 #define TARGET_HANDLE_OPTION arm_handle_option
344 #undef TARGET_HELP
345 #define TARGET_HELP arm_target_help
346 #undef TARGET_OPTION_OVERRIDE
347 #define TARGET_OPTION_OVERRIDE arm_option_override
348 #undef TARGET_OPTION_OPTIMIZATION_TABLE
349 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
351 #undef TARGET_COMP_TYPE_ATTRIBUTES
352 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
354 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
355 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
357 #undef TARGET_SCHED_ADJUST_COST
358 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
360 #undef TARGET_ENCODE_SECTION_INFO
361 #ifdef ARM_PE
362 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
363 #else
364 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
365 #endif
367 #undef TARGET_STRIP_NAME_ENCODING
368 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
370 #undef TARGET_ASM_INTERNAL_LABEL
371 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
373 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
374 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
376 #undef TARGET_FUNCTION_VALUE
377 #define TARGET_FUNCTION_VALUE arm_function_value
379 #undef TARGET_LIBCALL_VALUE
380 #define TARGET_LIBCALL_VALUE arm_libcall_value
382 #undef TARGET_ASM_OUTPUT_MI_THUNK
383 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
384 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
385 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
387 #undef TARGET_RTX_COSTS
388 #define TARGET_RTX_COSTS arm_rtx_costs
389 #undef TARGET_ADDRESS_COST
390 #define TARGET_ADDRESS_COST arm_address_cost
392 #undef TARGET_SHIFT_TRUNCATION_MASK
393 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
394 #undef TARGET_VECTOR_MODE_SUPPORTED_P
395 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
396 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
397 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
399 #undef TARGET_MACHINE_DEPENDENT_REORG
400 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
402 #undef TARGET_INIT_BUILTINS
403 #define TARGET_INIT_BUILTINS arm_init_builtins
404 #undef TARGET_EXPAND_BUILTIN
405 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
407 #undef TARGET_INIT_LIBFUNCS
408 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
410 #undef TARGET_PROMOTE_FUNCTION_MODE
411 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
414 #undef TARGET_PASS_BY_REFERENCE
415 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
416 #undef TARGET_ARG_PARTIAL_BYTES
417 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
418 #undef TARGET_FUNCTION_ARG
419 #define TARGET_FUNCTION_ARG arm_function_arg
420 #undef TARGET_FUNCTION_ARG_ADVANCE
421 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
422 #undef TARGET_FUNCTION_ARG_BOUNDARY
423 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
425 #undef TARGET_SETUP_INCOMING_VARARGS
426 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
428 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
429 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
431 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
432 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
433 #undef TARGET_TRAMPOLINE_INIT
434 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
435 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
436 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
438 #undef TARGET_DEFAULT_SHORT_ENUMS
439 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
441 #undef TARGET_ALIGN_ANON_BITFIELD
442 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
444 #undef TARGET_NARROW_VOLATILE_BITFIELD
445 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
447 #undef TARGET_CXX_GUARD_TYPE
448 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
450 #undef TARGET_CXX_GUARD_MASK_BIT
451 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
453 #undef TARGET_CXX_GET_COOKIE_SIZE
454 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
456 #undef TARGET_CXX_COOKIE_HAS_SIZE
457 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
459 #undef TARGET_CXX_CDTOR_RETURNS_THIS
460 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
462 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
463 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
465 #undef TARGET_CXX_USE_AEABI_ATEXIT
466 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
468 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
469 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
470 arm_cxx_determine_class_data_visibility
472 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
473 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
475 #undef TARGET_RETURN_IN_MSB
476 #define TARGET_RETURN_IN_MSB arm_return_in_msb
478 #undef TARGET_RETURN_IN_MEMORY
479 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
481 #undef TARGET_MUST_PASS_IN_STACK
482 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
484 #if ARM_UNWIND_INFO
485 #undef TARGET_ASM_UNWIND_EMIT
486 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
488 /* EABI unwinding tables use a different format for the typeinfo tables. */
489 #undef TARGET_ASM_TTYPE
490 #define TARGET_ASM_TTYPE arm_output_ttype
492 #undef TARGET_ARM_EABI_UNWINDER
493 #define TARGET_ARM_EABI_UNWINDER true
495 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
496 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
498 #undef TARGET_ASM_INIT_SECTIONS
499 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
500 #endif /* ARM_UNWIND_INFO */
502 #undef TARGET_EXCEPT_UNWIND_INFO
503 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
505 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
506 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
508 #undef TARGET_DWARF_REGISTER_SPAN
509 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
511 #undef TARGET_CANNOT_COPY_INSN_P
512 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
514 #ifdef HAVE_AS_TLS
515 #undef TARGET_HAVE_TLS
516 #define TARGET_HAVE_TLS true
517 #endif
519 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
520 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
522 #undef TARGET_CANNOT_FORCE_CONST_MEM
523 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
525 #undef TARGET_MAX_ANCHOR_OFFSET
526 #define TARGET_MAX_ANCHOR_OFFSET 4095
528 /* The minimum is set such that the total size of the block
529 for a particular anchor is -4088 + 1 + 4095 bytes, which is
530 divisible by eight, ensuring natural spacing of anchors. */
531 #undef TARGET_MIN_ANCHOR_OFFSET
532 #define TARGET_MIN_ANCHOR_OFFSET -4088
534 #undef TARGET_SCHED_ISSUE_RATE
535 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
537 #undef TARGET_MANGLE_TYPE
538 #define TARGET_MANGLE_TYPE arm_mangle_type
540 #undef TARGET_BUILD_BUILTIN_VA_LIST
541 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
542 #undef TARGET_EXPAND_BUILTIN_VA_START
543 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
544 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
545 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
547 #ifdef HAVE_AS_TLS
548 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
549 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
550 #endif
552 #undef TARGET_LEGITIMATE_ADDRESS_P
553 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
555 #undef TARGET_INVALID_PARAMETER_TYPE
556 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
558 #undef TARGET_INVALID_RETURN_TYPE
559 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
561 #undef TARGET_PROMOTED_TYPE
562 #define TARGET_PROMOTED_TYPE arm_promoted_type
564 #undef TARGET_CONVERT_TO_TYPE
565 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
567 #undef TARGET_SCALAR_MODE_SUPPORTED_P
568 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
570 #undef TARGET_FRAME_POINTER_REQUIRED
571 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
573 #undef TARGET_CAN_ELIMINATE
574 #define TARGET_CAN_ELIMINATE arm_can_eliminate
576 #undef TARGET_CONDITIONAL_REGISTER_USAGE
577 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
579 #undef TARGET_CLASS_LIKELY_SPILLED_P
580 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
582 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
583 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
584 arm_vector_alignment_reachable
586 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
587 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
588 arm_builtin_support_vector_misalignment
590 #undef TARGET_PREFERRED_RENAME_CLASS
591 #define TARGET_PREFERRED_RENAME_CLASS \
592 arm_preferred_rename_class
594 struct gcc_target targetm = TARGET_INITIALIZER;
596 /* Obstack for minipool constant handling. */
597 static struct obstack minipool_obstack;
598 static char * minipool_startobj;
600 /* The maximum number of insns skipped which
601 will be conditionalised if possible. */
602 static int max_insns_skipped = 5;
604 extern FILE * asm_out_file;
606 /* True if we are currently building a constant table. */
607 int making_const_table;
609 /* The processor for which instructions should be scheduled. */
610 enum processor_type arm_tune = arm_none;
612 /* The current tuning set. */
613 const struct tune_params *current_tune;
615 /* Which floating point hardware to schedule for. */
616 int arm_fpu_attr;
618 /* Which floating popint hardware to use. */
619 const struct arm_fpu_desc *arm_fpu_desc;
621 /* Whether to use floating point hardware. */
622 enum float_abi_type arm_float_abi;
624 /* Which __fp16 format to use. */
625 enum arm_fp16_format_type arm_fp16_format;
627 /* Which ABI to use. */
628 enum arm_abi_type arm_abi;
630 /* Which thread pointer model to use. */
631 enum arm_tp_type target_thread_pointer = TP_AUTO;
633 /* Used to parse -mstructure_size_boundary command line option. */
634 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
636 /* Used for Thumb call_via trampolines. */
637 rtx thumb_call_via_label[14];
638 static int thumb_call_reg_needed;
640 /* Bit values used to identify processor capabilities. */
641 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
642 #define FL_ARCH3M (1 << 1) /* Extended multiply */
643 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
644 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
645 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
646 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
647 #define FL_THUMB (1 << 6) /* Thumb aware */
648 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
649 #define FL_STRONG (1 << 8) /* StrongARM */
650 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
651 #define FL_XSCALE (1 << 10) /* XScale */
652 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
653 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
654 media instructions. */
655 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
656 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
657 Note: ARM6 & 7 derivatives only. */
658 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
659 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
660 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
661 profile. */
662 #define FL_DIV (1 << 18) /* Hardware divide. */
663 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
664 #define FL_NEON (1 << 20) /* Neon instructions. */
665 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
666 architecture. */
667 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
669 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
671 /* Flags that only effect tuning, not available instructions. */
672 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
673 | FL_CO_PROC)
675 #define FL_FOR_ARCH2 FL_NOTM
676 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
677 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
678 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
679 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
680 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
681 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
682 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
683 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
684 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
685 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
686 #define FL_FOR_ARCH6J FL_FOR_ARCH6
687 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
688 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
689 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
690 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
691 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
692 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
693 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
694 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
695 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
696 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
698 /* The bits in this mask specify which
699 instructions we are allowed to generate. */
700 static unsigned long insn_flags = 0;
702 /* The bits in this mask specify which instruction scheduling options should
703 be used. */
704 static unsigned long tune_flags = 0;
706 /* The following are used in the arm.md file as equivalents to bits
707 in the above two flag variables. */
709 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
710 int arm_arch3m = 0;
712 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
713 int arm_arch4 = 0;
715 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
716 int arm_arch4t = 0;
718 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
719 int arm_arch5 = 0;
721 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
722 int arm_arch5e = 0;
724 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
725 int arm_arch6 = 0;
727 /* Nonzero if this chip supports the ARM 6K extensions. */
728 int arm_arch6k = 0;
730 /* Nonzero if this chip supports the ARM 7 extensions. */
731 int arm_arch7 = 0;
733 /* Nonzero if instructions not present in the 'M' profile can be used. */
734 int arm_arch_notm = 0;
736 /* Nonzero if instructions present in ARMv7E-M can be used. */
737 int arm_arch7em = 0;
739 /* Nonzero if this chip can benefit from load scheduling. */
740 int arm_ld_sched = 0;
742 /* Nonzero if this chip is a StrongARM. */
743 int arm_tune_strongarm = 0;
745 /* Nonzero if this chip is a Cirrus variant. */
746 int arm_arch_cirrus = 0;
748 /* Nonzero if this chip supports Intel Wireless MMX technology. */
749 int arm_arch_iwmmxt = 0;
751 /* Nonzero if this chip is an XScale. */
752 int arm_arch_xscale = 0;
754 /* Nonzero if tuning for XScale */
755 int arm_tune_xscale = 0;
757 /* Nonzero if we want to tune for stores that access the write-buffer.
758 This typically means an ARM6 or ARM7 with MMU or MPU. */
759 int arm_tune_wbuf = 0;
761 /* Nonzero if tuning for Cortex-A9. */
762 int arm_tune_cortex_a9 = 0;
764 /* Nonzero if generating Thumb instructions. */
765 int thumb_code = 0;
767 /* Nonzero if generating Thumb-1 instructions. */
768 int thumb1_code = 0;
770 /* Nonzero if we should define __THUMB_INTERWORK__ in the
771 preprocessor.
772 XXX This is a bit of a hack, it's intended to help work around
773 problems in GLD which doesn't understand that armv5t code is
774 interworking clean. */
775 int arm_cpp_interwork = 0;
777 /* Nonzero if chip supports Thumb 2. */
778 int arm_arch_thumb2;
780 /* Nonzero if chip supports integer division instruction. */
781 int arm_arch_hwdiv;
783 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
784 we must report the mode of the memory reference from
785 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
786 enum machine_mode output_memory_reference_mode;
788 /* The register number to be used for the PIC offset register. */
789 unsigned arm_pic_register = INVALID_REGNUM;
791 /* Set to 1 after arm_reorg has started. Reset to start at the start of
792 the next function. */
793 static int after_arm_reorg = 0;
795 enum arm_pcs arm_pcs_default;
797 /* For an explanation of these variables, see final_prescan_insn below. */
798 int arm_ccfsm_state;
799 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
800 enum arm_cond_code arm_current_cc;
802 rtx arm_target_insn;
803 int arm_target_label;
804 /* The number of conditionally executed insns, including the current insn. */
805 int arm_condexec_count = 0;
806 /* A bitmask specifying the patterns for the IT block.
807 Zero means do not output an IT block before this insn. */
808 int arm_condexec_mask = 0;
809 /* The number of bits used in arm_condexec_mask. */
810 int arm_condexec_masklen = 0;
812 /* The condition codes of the ARM, and the inverse function. */
813 static const char * const arm_condition_codes[] =
815 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
816 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
819 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
820 int arm_regs_in_sequence[] =
822 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
825 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
826 #define streq(string1, string2) (strcmp (string1, string2) == 0)
828 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
829 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
830 | (1 << PIC_OFFSET_TABLE_REGNUM)))
832 /* Initialization code. */
834 struct processors
836 const char *const name;
837 enum processor_type core;
838 const char *arch;
839 const unsigned long flags;
840 const struct tune_params *const tune;
844 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
845 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
846 prefetch_slots, \
847 l1_size, \
848 l1_line_size
850 const struct tune_params arm_slowmul_tune =
852 arm_slowmul_rtx_costs,
853 NULL,
855 ARM_PREFETCH_NOT_BENEFICIAL
858 const struct tune_params arm_fastmul_tune =
860 arm_fastmul_rtx_costs,
861 NULL,
863 ARM_PREFETCH_NOT_BENEFICIAL
866 const struct tune_params arm_xscale_tune =
868 arm_xscale_rtx_costs,
869 xscale_sched_adjust_cost,
871 ARM_PREFETCH_NOT_BENEFICIAL
874 const struct tune_params arm_9e_tune =
876 arm_9e_rtx_costs,
877 NULL,
879 ARM_PREFETCH_NOT_BENEFICIAL
882 const struct tune_params arm_cortex_a9_tune =
884 arm_9e_rtx_costs,
885 cortex_a9_sched_adjust_cost,
887 ARM_PREFETCH_BENEFICIAL(4,32,32)
890 const struct tune_params arm_fa726te_tune =
892 arm_9e_rtx_costs,
893 fa726te_sched_adjust_cost,
895 ARM_PREFETCH_NOT_BENEFICIAL
899 /* Not all of these give usefully different compilation alternatives,
900 but there is no simple way of generalizing them. */
901 static const struct processors all_cores[] =
903 /* ARM Cores */
904 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
905 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
906 #include "arm-cores.def"
907 #undef ARM_CORE
908 {NULL, arm_none, NULL, 0, NULL}
911 static const struct processors all_architectures[] =
913 /* ARM Architectures */
914 /* We don't specify tuning costs here as it will be figured out
915 from the core. */
917 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
918 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
919 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
920 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
921 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
922 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
923 implementations that support it, so we will leave it out for now. */
924 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
925 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
926 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
927 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
928 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
929 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
930 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
931 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
932 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
933 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
934 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
935 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
936 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
937 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
938 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
939 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
940 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
941 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
942 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
943 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
944 {NULL, arm_none, NULL, 0 , NULL}
948 /* These are populated as commandline arguments are processed, or NULL
949 if not specified. */
950 static const struct processors *arm_selected_arch;
951 static const struct processors *arm_selected_cpu;
952 static const struct processors *arm_selected_tune;
954 /* The name of the preprocessor macro to define for this architecture. */
956 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
958 /* Available values for -mfpu=. */
960 static const struct arm_fpu_desc all_fpus[] =
962 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
963 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
964 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
965 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
966 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
967 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
968 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
969 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
970 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
971 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
972 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
973 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
974 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
975 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
976 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
977 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
978 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
979 /* Compatibility aliases. */
980 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
984 struct float_abi
986 const char * name;
987 enum float_abi_type abi_type;
991 /* Available values for -mfloat-abi=. */
993 static const struct float_abi all_float_abis[] =
995 {"soft", ARM_FLOAT_ABI_SOFT},
996 {"softfp", ARM_FLOAT_ABI_SOFTFP},
997 {"hard", ARM_FLOAT_ABI_HARD}
1001 struct fp16_format
1003 const char *name;
1004 enum arm_fp16_format_type fp16_format_type;
1008 /* Available values for -mfp16-format=. */
1010 static const struct fp16_format all_fp16_formats[] =
1012 {"none", ARM_FP16_FORMAT_NONE},
1013 {"ieee", ARM_FP16_FORMAT_IEEE},
1014 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1018 struct abi_name
1020 const char *name;
1021 enum arm_abi_type abi_type;
1025 /* Available values for -mabi=. */
1027 static const struct abi_name arm_all_abis[] =
1029 {"apcs-gnu", ARM_ABI_APCS},
1030 {"atpcs", ARM_ABI_ATPCS},
1031 {"aapcs", ARM_ABI_AAPCS},
1032 {"iwmmxt", ARM_ABI_IWMMXT},
1033 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1036 /* Supported TLS relocations. */
1038 enum tls_reloc {
1039 TLS_GD32,
1040 TLS_LDM32,
1041 TLS_LDO32,
1042 TLS_IE32,
1043 TLS_LE32
1046 /* The maximum number of insns to be used when loading a constant. */
1047 inline static int
1048 arm_constant_limit (bool size_p)
1050 return size_p ? 1 : current_tune->constant_limit;
1053 /* Emit an insn that's a simple single-set. Both the operands must be known
1054 to be valid. */
1055 inline static rtx
1056 emit_set_insn (rtx x, rtx y)
1058 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1061 /* Return the number of bits set in VALUE. */
1062 static unsigned
1063 bit_count (unsigned long value)
1065 unsigned long count = 0;
1067 while (value)
1069 count++;
1070 value &= value - 1; /* Clear the least-significant set bit. */
1073 return count;
1076 /* Set up library functions unique to ARM. */
1078 static void
1079 arm_init_libfuncs (void)
1081 /* There are no special library functions unless we are using the
1082 ARM BPABI. */
1083 if (!TARGET_BPABI)
1084 return;
1086 /* The functions below are described in Section 4 of the "Run-Time
1087 ABI for the ARM architecture", Version 1.0. */
1089 /* Double-precision floating-point arithmetic. Table 2. */
1090 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1091 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1092 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1093 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1094 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1096 /* Double-precision comparisons. Table 3. */
1097 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1098 set_optab_libfunc (ne_optab, DFmode, NULL);
1099 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1100 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1101 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1102 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1103 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1105 /* Single-precision floating-point arithmetic. Table 4. */
1106 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1107 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1108 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1109 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1110 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1112 /* Single-precision comparisons. Table 5. */
1113 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1114 set_optab_libfunc (ne_optab, SFmode, NULL);
1115 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1116 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1117 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1118 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1119 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1121 /* Floating-point to integer conversions. Table 6. */
1122 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1123 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1124 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1125 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1126 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1127 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1128 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1129 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1131 /* Conversions between floating types. Table 7. */
1132 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1133 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1135 /* Integer to floating-point conversions. Table 8. */
1136 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1137 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1138 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1139 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1140 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1141 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1142 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1143 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1145 /* Long long. Table 9. */
1146 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1147 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1148 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1149 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1150 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1151 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1152 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1153 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1155 /* Integer (32/32->32) division. \S 4.3.1. */
1156 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1157 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1159 /* The divmod functions are designed so that they can be used for
1160 plain division, even though they return both the quotient and the
1161 remainder. The quotient is returned in the usual location (i.e.,
1162 r0 for SImode, {r0, r1} for DImode), just as would be expected
1163 for an ordinary division routine. Because the AAPCS calling
1164 conventions specify that all of { r0, r1, r2, r3 } are
1165 callee-saved registers, there is no need to tell the compiler
1166 explicitly that those registers are clobbered by these
1167 routines. */
1168 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1169 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1171 /* For SImode division the ABI provides div-without-mod routines,
1172 which are faster. */
1173 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1174 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1176 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1177 divmod libcalls instead. */
1178 set_optab_libfunc (smod_optab, DImode, NULL);
1179 set_optab_libfunc (umod_optab, DImode, NULL);
1180 set_optab_libfunc (smod_optab, SImode, NULL);
1181 set_optab_libfunc (umod_optab, SImode, NULL);
1183 /* Half-precision float operations. The compiler handles all operations
1184 with NULL libfuncs by converting the SFmode. */
1185 switch (arm_fp16_format)
1187 case ARM_FP16_FORMAT_IEEE:
1188 case ARM_FP16_FORMAT_ALTERNATIVE:
1190 /* Conversions. */
1191 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1192 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1193 ? "__gnu_f2h_ieee"
1194 : "__gnu_f2h_alternative"));
1195 set_conv_libfunc (sext_optab, SFmode, HFmode,
1196 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1197 ? "__gnu_h2f_ieee"
1198 : "__gnu_h2f_alternative"));
1200 /* Arithmetic. */
1201 set_optab_libfunc (add_optab, HFmode, NULL);
1202 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1203 set_optab_libfunc (smul_optab, HFmode, NULL);
1204 set_optab_libfunc (neg_optab, HFmode, NULL);
1205 set_optab_libfunc (sub_optab, HFmode, NULL);
1207 /* Comparisons. */
1208 set_optab_libfunc (eq_optab, HFmode, NULL);
1209 set_optab_libfunc (ne_optab, HFmode, NULL);
1210 set_optab_libfunc (lt_optab, HFmode, NULL);
1211 set_optab_libfunc (le_optab, HFmode, NULL);
1212 set_optab_libfunc (ge_optab, HFmode, NULL);
1213 set_optab_libfunc (gt_optab, HFmode, NULL);
1214 set_optab_libfunc (unord_optab, HFmode, NULL);
1215 break;
1217 default:
1218 break;
1221 if (TARGET_AAPCS_BASED)
1222 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1225 /* On AAPCS systems, this is the "struct __va_list". */
1226 static GTY(()) tree va_list_type;
1228 /* Return the type to use as __builtin_va_list. */
1229 static tree
1230 arm_build_builtin_va_list (void)
1232 tree va_list_name;
1233 tree ap_field;
1235 if (!TARGET_AAPCS_BASED)
1236 return std_build_builtin_va_list ();
1238 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1239 defined as:
1241 struct __va_list
1243 void *__ap;
1246 The C Library ABI further reinforces this definition in \S
1247 4.1.
1249 We must follow this definition exactly. The structure tag
1250 name is visible in C++ mangled names, and thus forms a part
1251 of the ABI. The field name may be used by people who
1252 #include <stdarg.h>. */
1253 /* Create the type. */
1254 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1255 /* Give it the required name. */
1256 va_list_name = build_decl (BUILTINS_LOCATION,
1257 TYPE_DECL,
1258 get_identifier ("__va_list"),
1259 va_list_type);
1260 DECL_ARTIFICIAL (va_list_name) = 1;
1261 TYPE_NAME (va_list_type) = va_list_name;
1262 TYPE_STUB_DECL (va_list_type) = va_list_name;
1263 /* Create the __ap field. */
1264 ap_field = build_decl (BUILTINS_LOCATION,
1265 FIELD_DECL,
1266 get_identifier ("__ap"),
1267 ptr_type_node);
1268 DECL_ARTIFICIAL (ap_field) = 1;
1269 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1270 TYPE_FIELDS (va_list_type) = ap_field;
1271 /* Compute its layout. */
1272 layout_type (va_list_type);
1274 return va_list_type;
1277 /* Return an expression of type "void *" pointing to the next
1278 available argument in a variable-argument list. VALIST is the
1279 user-level va_list object, of type __builtin_va_list. */
1280 static tree
1281 arm_extract_valist_ptr (tree valist)
1283 if (TREE_TYPE (valist) == error_mark_node)
1284 return error_mark_node;
1286 /* On an AAPCS target, the pointer is stored within "struct
1287 va_list". */
1288 if (TARGET_AAPCS_BASED)
1290 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1291 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1292 valist, ap_field, NULL_TREE);
1295 return valist;
1298 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1299 static void
1300 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1302 valist = arm_extract_valist_ptr (valist);
1303 std_expand_builtin_va_start (valist, nextarg);
1306 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1307 static tree
1308 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1309 gimple_seq *post_p)
1311 valist = arm_extract_valist_ptr (valist);
1312 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1315 /* Lookup NAME in SEL. */
1317 static const struct processors *
1318 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1320 if (!(name && *name))
1321 return NULL;
1323 for (; sel->name != NULL; sel++)
1325 if (streq (name, sel->name))
1326 return sel;
1329 error ("bad value (%s) for %s switch", name, desc);
1330 return NULL;
1333 /* Implement TARGET_HANDLE_OPTION. */
1335 static bool
1336 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1338 switch (code)
1340 case OPT_march_:
1341 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1342 return true;
1344 case OPT_mcpu_:
1345 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1346 return true;
1348 case OPT_mhard_float:
1349 target_float_abi_name = "hard";
1350 return true;
1352 case OPT_msoft_float:
1353 target_float_abi_name = "soft";
1354 return true;
1356 case OPT_mtune_:
1357 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1358 return true;
1360 default:
1361 return true;
1365 static void
1366 arm_target_help (void)
1368 int i;
1369 static int columns = 0;
1370 int remaining;
1372 /* If we have not done so already, obtain the desired maximum width of
1373 the output. Note - this is a duplication of the code at the start of
1374 gcc/opts.c:print_specific_help() - the two copies should probably be
1375 replaced by a single function. */
1376 if (columns == 0)
1378 const char *p;
1380 p = getenv ("COLUMNS");
1381 if (p != NULL)
1383 int value = atoi (p);
1385 if (value > 0)
1386 columns = value;
1389 if (columns == 0)
1390 /* Use a reasonable default. */
1391 columns = 80;
1394 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1396 /* The - 2 is because we know that the last entry in the array is NULL. */
1397 i = ARRAY_SIZE (all_cores) - 2;
1398 gcc_assert (i > 0);
1399 printf (" %s", all_cores[i].name);
1400 remaining = columns - (strlen (all_cores[i].name) + 4);
1401 gcc_assert (remaining >= 0);
1403 while (i--)
1405 int len = strlen (all_cores[i].name);
1407 if (remaining > len + 2)
1409 printf (", %s", all_cores[i].name);
1410 remaining -= len + 2;
1412 else
1414 if (remaining > 0)
1415 printf (",");
1416 printf ("\n %s", all_cores[i].name);
1417 remaining = columns - (len + 4);
1421 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1423 i = ARRAY_SIZE (all_architectures) - 2;
1424 gcc_assert (i > 0);
1426 printf (" %s", all_architectures[i].name);
1427 remaining = columns - (strlen (all_architectures[i].name) + 4);
1428 gcc_assert (remaining >= 0);
1430 while (i--)
1432 int len = strlen (all_architectures[i].name);
1434 if (remaining > len + 2)
1436 printf (", %s", all_architectures[i].name);
1437 remaining -= len + 2;
1439 else
1441 if (remaining > 0)
1442 printf (",");
1443 printf ("\n %s", all_architectures[i].name);
1444 remaining = columns - (len + 4);
1447 printf ("\n");
1451 /* Fix up any incompatible options that the user has specified. */
1452 static void
1453 arm_option_override (void)
1455 unsigned i;
1457 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1458 SUBTARGET_OVERRIDE_OPTIONS;
1459 #endif
1461 if (arm_selected_arch)
1463 if (arm_selected_cpu)
1465 /* Check for conflict between mcpu and march. */
1466 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1468 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1469 arm_selected_cpu->name, arm_selected_arch->name);
1470 /* -march wins for code generation.
1471 -mcpu wins for default tuning. */
1472 if (!arm_selected_tune)
1473 arm_selected_tune = arm_selected_cpu;
1475 arm_selected_cpu = arm_selected_arch;
1477 else
1478 /* -mcpu wins. */
1479 arm_selected_arch = NULL;
1481 else
1482 /* Pick a CPU based on the architecture. */
1483 arm_selected_cpu = arm_selected_arch;
1486 /* If the user did not specify a processor, choose one for them. */
1487 if (!arm_selected_cpu)
1489 const struct processors * sel;
1490 unsigned int sought;
1492 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1493 if (!arm_selected_cpu->name)
1495 #ifdef SUBTARGET_CPU_DEFAULT
1496 /* Use the subtarget default CPU if none was specified by
1497 configure. */
1498 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1499 #endif
1500 /* Default to ARM6. */
1501 if (!arm_selected_cpu->name)
1502 arm_selected_cpu = &all_cores[arm6];
1505 sel = arm_selected_cpu;
1506 insn_flags = sel->flags;
1508 /* Now check to see if the user has specified some command line
1509 switch that require certain abilities from the cpu. */
1510 sought = 0;
1512 if (TARGET_INTERWORK || TARGET_THUMB)
1514 sought |= (FL_THUMB | FL_MODE32);
1516 /* There are no ARM processors that support both APCS-26 and
1517 interworking. Therefore we force FL_MODE26 to be removed
1518 from insn_flags here (if it was set), so that the search
1519 below will always be able to find a compatible processor. */
1520 insn_flags &= ~FL_MODE26;
1523 if (sought != 0 && ((sought & insn_flags) != sought))
1525 /* Try to locate a CPU type that supports all of the abilities
1526 of the default CPU, plus the extra abilities requested by
1527 the user. */
1528 for (sel = all_cores; sel->name != NULL; sel++)
1529 if ((sel->flags & sought) == (sought | insn_flags))
1530 break;
1532 if (sel->name == NULL)
1534 unsigned current_bit_count = 0;
1535 const struct processors * best_fit = NULL;
1537 /* Ideally we would like to issue an error message here
1538 saying that it was not possible to find a CPU compatible
1539 with the default CPU, but which also supports the command
1540 line options specified by the programmer, and so they
1541 ought to use the -mcpu=<name> command line option to
1542 override the default CPU type.
1544 If we cannot find a cpu that has both the
1545 characteristics of the default cpu and the given
1546 command line options we scan the array again looking
1547 for a best match. */
1548 for (sel = all_cores; sel->name != NULL; sel++)
1549 if ((sel->flags & sought) == sought)
1551 unsigned count;
1553 count = bit_count (sel->flags & insn_flags);
1555 if (count >= current_bit_count)
1557 best_fit = sel;
1558 current_bit_count = count;
1562 gcc_assert (best_fit);
1563 sel = best_fit;
1566 arm_selected_cpu = sel;
1570 gcc_assert (arm_selected_cpu);
1571 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1572 if (!arm_selected_tune)
1573 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1575 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1576 insn_flags = arm_selected_cpu->flags;
1578 arm_tune = arm_selected_tune->core;
1579 tune_flags = arm_selected_tune->flags;
1580 current_tune = arm_selected_tune->tune;
1582 if (target_fp16_format_name)
1584 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1586 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1588 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1589 break;
1592 if (i == ARRAY_SIZE (all_fp16_formats))
1593 error ("invalid __fp16 format option: -mfp16-format=%s",
1594 target_fp16_format_name);
1596 else
1597 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1599 if (target_abi_name)
1601 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1603 if (streq (arm_all_abis[i].name, target_abi_name))
1605 arm_abi = arm_all_abis[i].abi_type;
1606 break;
1609 if (i == ARRAY_SIZE (arm_all_abis))
1610 error ("invalid ABI option: -mabi=%s", target_abi_name);
1612 else
1613 arm_abi = ARM_DEFAULT_ABI;
1615 /* Make sure that the processor choice does not conflict with any of the
1616 other command line choices. */
1617 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1618 error ("target CPU does not support ARM mode");
1620 /* BPABI targets use linker tricks to allow interworking on cores
1621 without thumb support. */
1622 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1624 warning (0, "target CPU does not support interworking" );
1625 target_flags &= ~MASK_INTERWORK;
1628 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1630 warning (0, "target CPU does not support THUMB instructions");
1631 target_flags &= ~MASK_THUMB;
1634 if (TARGET_APCS_FRAME && TARGET_THUMB)
1636 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1637 target_flags &= ~MASK_APCS_FRAME;
1640 /* Callee super interworking implies thumb interworking. Adding
1641 this to the flags here simplifies the logic elsewhere. */
1642 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1643 target_flags |= MASK_INTERWORK;
1645 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1646 from here where no function is being compiled currently. */
1647 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1648 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1650 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1651 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1653 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1655 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1656 target_flags |= MASK_APCS_FRAME;
1659 if (TARGET_POKE_FUNCTION_NAME)
1660 target_flags |= MASK_APCS_FRAME;
1662 if (TARGET_APCS_REENT && flag_pic)
1663 error ("-fpic and -mapcs-reent are incompatible");
1665 if (TARGET_APCS_REENT)
1666 warning (0, "APCS reentrant code not supported. Ignored");
1668 /* If this target is normally configured to use APCS frames, warn if they
1669 are turned off and debugging is turned on. */
1670 if (TARGET_ARM
1671 && write_symbols != NO_DEBUG
1672 && !TARGET_APCS_FRAME
1673 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1674 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1676 if (TARGET_APCS_FLOAT)
1677 warning (0, "passing floating point arguments in fp regs not yet supported");
1679 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1680 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1681 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1682 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1683 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1684 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1685 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1686 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1687 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1688 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1689 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1690 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1691 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1692 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1694 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1695 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1696 thumb_code = TARGET_ARM == 0;
1697 thumb1_code = TARGET_THUMB1 != 0;
1698 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1699 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1700 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1701 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1702 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1704 /* If we are not using the default (ARM mode) section anchor offset
1705 ranges, then set the correct ranges now. */
1706 if (TARGET_THUMB1)
1708 /* Thumb-1 LDR instructions cannot have negative offsets.
1709 Permissible positive offset ranges are 5-bit (for byte loads),
1710 6-bit (for halfword loads), or 7-bit (for word loads).
1711 Empirical results suggest a 7-bit anchor range gives the best
1712 overall code size. */
1713 targetm.min_anchor_offset = 0;
1714 targetm.max_anchor_offset = 127;
1716 else if (TARGET_THUMB2)
1718 /* The minimum is set such that the total size of the block
1719 for a particular anchor is 248 + 1 + 4095 bytes, which is
1720 divisible by eight, ensuring natural spacing of anchors. */
1721 targetm.min_anchor_offset = -248;
1722 targetm.max_anchor_offset = 4095;
1725 /* V5 code we generate is completely interworking capable, so we turn off
1726 TARGET_INTERWORK here to avoid many tests later on. */
1728 /* XXX However, we must pass the right pre-processor defines to CPP
1729 or GLD can get confused. This is a hack. */
1730 if (TARGET_INTERWORK)
1731 arm_cpp_interwork = 1;
1733 if (arm_arch5)
1734 target_flags &= ~MASK_INTERWORK;
1736 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1737 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1739 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1740 error ("iwmmxt abi requires an iwmmxt capable cpu");
1742 if (target_fpu_name == NULL && target_fpe_name != NULL)
1744 if (streq (target_fpe_name, "2"))
1745 target_fpu_name = "fpe2";
1746 else if (streq (target_fpe_name, "3"))
1747 target_fpu_name = "fpe3";
1748 else
1749 error ("invalid floating point emulation option: -mfpe=%s",
1750 target_fpe_name);
1753 if (target_fpu_name == NULL)
1755 #ifdef FPUTYPE_DEFAULT
1756 target_fpu_name = FPUTYPE_DEFAULT;
1757 #else
1758 if (arm_arch_cirrus)
1759 target_fpu_name = "maverick";
1760 else
1761 target_fpu_name = "fpe2";
1762 #endif
1765 arm_fpu_desc = NULL;
1766 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1768 if (streq (all_fpus[i].name, target_fpu_name))
1770 arm_fpu_desc = &all_fpus[i];
1771 break;
1775 if (!arm_fpu_desc)
1777 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1778 return;
1781 switch (arm_fpu_desc->model)
1783 case ARM_FP_MODEL_FPA:
1784 if (arm_fpu_desc->rev == 2)
1785 arm_fpu_attr = FPU_FPE2;
1786 else if (arm_fpu_desc->rev == 3)
1787 arm_fpu_attr = FPU_FPE3;
1788 else
1789 arm_fpu_attr = FPU_FPA;
1790 break;
1792 case ARM_FP_MODEL_MAVERICK:
1793 arm_fpu_attr = FPU_MAVERICK;
1794 break;
1796 case ARM_FP_MODEL_VFP:
1797 arm_fpu_attr = FPU_VFP;
1798 break;
1800 default:
1801 gcc_unreachable();
1804 if (target_float_abi_name != NULL)
1806 /* The user specified a FP ABI. */
1807 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1809 if (streq (all_float_abis[i].name, target_float_abi_name))
1811 arm_float_abi = all_float_abis[i].abi_type;
1812 break;
1815 if (i == ARRAY_SIZE (all_float_abis))
1816 error ("invalid floating point abi: -mfloat-abi=%s",
1817 target_float_abi_name);
1819 else
1820 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1822 if (TARGET_AAPCS_BASED
1823 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1824 error ("FPA is unsupported in the AAPCS");
1826 if (TARGET_AAPCS_BASED)
1828 if (TARGET_CALLER_INTERWORKING)
1829 error ("AAPCS does not support -mcaller-super-interworking");
1830 else
1831 if (TARGET_CALLEE_INTERWORKING)
1832 error ("AAPCS does not support -mcallee-super-interworking");
1835 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1836 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1837 will ever exist. GCC makes no attempt to support this combination. */
1838 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1839 sorry ("iWMMXt and hardware floating point");
1841 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1842 if (TARGET_THUMB2 && TARGET_IWMMXT)
1843 sorry ("Thumb-2 iWMMXt");
1845 /* __fp16 support currently assumes the core has ldrh. */
1846 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1847 sorry ("__fp16 and no ldrh");
1849 /* If soft-float is specified then don't use FPU. */
1850 if (TARGET_SOFT_FLOAT)
1851 arm_fpu_attr = FPU_NONE;
1853 if (TARGET_AAPCS_BASED)
1855 if (arm_abi == ARM_ABI_IWMMXT)
1856 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1857 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1858 && TARGET_HARD_FLOAT
1859 && TARGET_VFP)
1860 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1861 else
1862 arm_pcs_default = ARM_PCS_AAPCS;
1864 else
1866 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1867 sorry ("-mfloat-abi=hard and VFP");
1869 if (arm_abi == ARM_ABI_APCS)
1870 arm_pcs_default = ARM_PCS_APCS;
1871 else
1872 arm_pcs_default = ARM_PCS_ATPCS;
1875 /* For arm2/3 there is no need to do any scheduling if there is only
1876 a floating point emulator, or we are doing software floating-point. */
1877 if ((TARGET_SOFT_FLOAT
1878 || (TARGET_FPA && arm_fpu_desc->rev))
1879 && (tune_flags & FL_MODE32) == 0)
1880 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1882 if (target_thread_switch)
1884 if (strcmp (target_thread_switch, "soft") == 0)
1885 target_thread_pointer = TP_SOFT;
1886 else if (strcmp (target_thread_switch, "auto") == 0)
1887 target_thread_pointer = TP_AUTO;
1888 else if (strcmp (target_thread_switch, "cp15") == 0)
1889 target_thread_pointer = TP_CP15;
1890 else
1891 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1894 /* Use the cp15 method if it is available. */
1895 if (target_thread_pointer == TP_AUTO)
1897 if (arm_arch6k && !TARGET_THUMB1)
1898 target_thread_pointer = TP_CP15;
1899 else
1900 target_thread_pointer = TP_SOFT;
1903 if (TARGET_HARD_TP && TARGET_THUMB1)
1904 error ("can not use -mtp=cp15 with 16-bit Thumb");
1906 /* Override the default structure alignment for AAPCS ABI. */
1907 if (TARGET_AAPCS_BASED)
1908 arm_structure_size_boundary = 8;
1910 if (structure_size_string != NULL)
1912 int size = strtol (structure_size_string, NULL, 0);
1914 if (size == 8 || size == 32
1915 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1916 arm_structure_size_boundary = size;
1917 else
1918 warning (0, "structure size boundary can only be set to %s",
1919 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1922 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1924 error ("RTP PIC is incompatible with Thumb");
1925 flag_pic = 0;
1928 /* If stack checking is disabled, we can use r10 as the PIC register,
1929 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1930 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1932 if (TARGET_VXWORKS_RTP)
1933 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1934 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1937 if (flag_pic && TARGET_VXWORKS_RTP)
1938 arm_pic_register = 9;
1940 if (arm_pic_register_string != NULL)
1942 int pic_register = decode_reg_name (arm_pic_register_string);
1944 if (!flag_pic)
1945 warning (0, "-mpic-register= is useless without -fpic");
1947 /* Prevent the user from choosing an obviously stupid PIC register. */
1948 else if (pic_register < 0 || call_used_regs[pic_register]
1949 || pic_register == HARD_FRAME_POINTER_REGNUM
1950 || pic_register == STACK_POINTER_REGNUM
1951 || pic_register >= PC_REGNUM
1952 || (TARGET_VXWORKS_RTP
1953 && (unsigned int) pic_register != arm_pic_register))
1954 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1955 else
1956 arm_pic_register = pic_register;
1959 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1960 if (fix_cm3_ldrd == 2)
1962 if (arm_selected_cpu->core == cortexm3)
1963 fix_cm3_ldrd = 1;
1964 else
1965 fix_cm3_ldrd = 0;
1968 if (TARGET_THUMB1 && flag_schedule_insns)
1970 /* Don't warn since it's on by default in -O2. */
1971 flag_schedule_insns = 0;
1974 if (optimize_size)
1976 /* If optimizing for size, bump the number of instructions that we
1977 are prepared to conditionally execute (even on a StrongARM). */
1978 max_insns_skipped = 6;
1980 else
1982 /* StrongARM has early execution of branches, so a sequence
1983 that is worth skipping is shorter. */
1984 if (arm_tune_strongarm)
1985 max_insns_skipped = 3;
1988 /* Hot/Cold partitioning is not currently supported, since we can't
1989 handle literal pool placement in that case. */
1990 if (flag_reorder_blocks_and_partition)
1992 inform (input_location,
1993 "-freorder-blocks-and-partition not supported on this architecture");
1994 flag_reorder_blocks_and_partition = 0;
1995 flag_reorder_blocks = 1;
1998 if (flag_pic)
1999 /* Hoisting PIC address calculations more aggressively provides a small,
2000 but measurable, size reduction for PIC code. Therefore, we decrease
2001 the bar for unrestricted expression hoisting to the cost of PIC address
2002 calculation, which is 2 instructions. */
2003 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2004 global_options.x_param_values,
2005 global_options_set.x_param_values);
2007 /* ARM EABI defaults to strict volatile bitfields. */
2008 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
2009 flag_strict_volatile_bitfields = 1;
2011 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2012 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2013 if (flag_prefetch_loop_arrays < 0
2014 && HAVE_prefetch
2015 && optimize >= 3
2016 && current_tune->num_prefetch_slots > 0)
2017 flag_prefetch_loop_arrays = 1;
2019 /* Set up parameters to be used in prefetching algorithm. Do not override the
2020 defaults unless we are tuning for a core we have researched values for. */
2021 if (current_tune->num_prefetch_slots > 0)
2022 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2023 current_tune->num_prefetch_slots,
2024 global_options.x_param_values,
2025 global_options_set.x_param_values);
2026 if (current_tune->l1_cache_line_size >= 0)
2027 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2028 current_tune->l1_cache_line_size,
2029 global_options.x_param_values,
2030 global_options_set.x_param_values);
2031 if (current_tune->l1_cache_size >= 0)
2032 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2033 current_tune->l1_cache_size,
2034 global_options.x_param_values,
2035 global_options_set.x_param_values);
2037 /* Register global variables with the garbage collector. */
2038 arm_add_gc_roots ();
2041 static void
2042 arm_add_gc_roots (void)
2044 gcc_obstack_init(&minipool_obstack);
2045 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2048 /* A table of known ARM exception types.
2049 For use with the interrupt function attribute. */
2051 typedef struct
2053 const char *const arg;
2054 const unsigned long return_value;
2056 isr_attribute_arg;
2058 static const isr_attribute_arg isr_attribute_args [] =
2060 { "IRQ", ARM_FT_ISR },
2061 { "irq", ARM_FT_ISR },
2062 { "FIQ", ARM_FT_FIQ },
2063 { "fiq", ARM_FT_FIQ },
2064 { "ABORT", ARM_FT_ISR },
2065 { "abort", ARM_FT_ISR },
2066 { "ABORT", ARM_FT_ISR },
2067 { "abort", ARM_FT_ISR },
2068 { "UNDEF", ARM_FT_EXCEPTION },
2069 { "undef", ARM_FT_EXCEPTION },
2070 { "SWI", ARM_FT_EXCEPTION },
2071 { "swi", ARM_FT_EXCEPTION },
2072 { NULL, ARM_FT_NORMAL }
2075 /* Returns the (interrupt) function type of the current
2076 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2078 static unsigned long
2079 arm_isr_value (tree argument)
2081 const isr_attribute_arg * ptr;
2082 const char * arg;
2084 if (!arm_arch_notm)
2085 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2087 /* No argument - default to IRQ. */
2088 if (argument == NULL_TREE)
2089 return ARM_FT_ISR;
2091 /* Get the value of the argument. */
2092 if (TREE_VALUE (argument) == NULL_TREE
2093 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2094 return ARM_FT_UNKNOWN;
2096 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2098 /* Check it against the list of known arguments. */
2099 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2100 if (streq (arg, ptr->arg))
2101 return ptr->return_value;
2103 /* An unrecognized interrupt type. */
2104 return ARM_FT_UNKNOWN;
2107 /* Computes the type of the current function. */
2109 static unsigned long
2110 arm_compute_func_type (void)
2112 unsigned long type = ARM_FT_UNKNOWN;
2113 tree a;
2114 tree attr;
2116 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2118 /* Decide if the current function is volatile. Such functions
2119 never return, and many memory cycles can be saved by not storing
2120 register values that will never be needed again. This optimization
2121 was added to speed up context switching in a kernel application. */
2122 if (optimize > 0
2123 && (TREE_NOTHROW (current_function_decl)
2124 || !(flag_unwind_tables
2125 || (flag_exceptions
2126 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2127 && TREE_THIS_VOLATILE (current_function_decl))
2128 type |= ARM_FT_VOLATILE;
2130 if (cfun->static_chain_decl != NULL)
2131 type |= ARM_FT_NESTED;
2133 attr = DECL_ATTRIBUTES (current_function_decl);
2135 a = lookup_attribute ("naked", attr);
2136 if (a != NULL_TREE)
2137 type |= ARM_FT_NAKED;
2139 a = lookup_attribute ("isr", attr);
2140 if (a == NULL_TREE)
2141 a = lookup_attribute ("interrupt", attr);
2143 if (a == NULL_TREE)
2144 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2145 else
2146 type |= arm_isr_value (TREE_VALUE (a));
2148 return type;
2151 /* Returns the type of the current function. */
2153 unsigned long
2154 arm_current_func_type (void)
2156 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2157 cfun->machine->func_type = arm_compute_func_type ();
2159 return cfun->machine->func_type;
2162 bool
2163 arm_allocate_stack_slots_for_args (void)
2165 /* Naked functions should not allocate stack slots for arguments. */
2166 return !IS_NAKED (arm_current_func_type ());
2170 /* Output assembler code for a block containing the constant parts
2171 of a trampoline, leaving space for the variable parts.
2173 On the ARM, (if r8 is the static chain regnum, and remembering that
2174 referencing pc adds an offset of 8) the trampoline looks like:
2175 ldr r8, [pc, #0]
2176 ldr pc, [pc]
2177 .word static chain value
2178 .word function's address
2179 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2181 static void
2182 arm_asm_trampoline_template (FILE *f)
2184 if (TARGET_ARM)
2186 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2187 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2189 else if (TARGET_THUMB2)
2191 /* The Thumb-2 trampoline is similar to the arm implementation.
2192 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2193 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2194 STATIC_CHAIN_REGNUM, PC_REGNUM);
2195 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2197 else
2199 ASM_OUTPUT_ALIGN (f, 2);
2200 fprintf (f, "\t.code\t16\n");
2201 fprintf (f, ".Ltrampoline_start:\n");
2202 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2203 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2204 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2205 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2206 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2207 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2209 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2210 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2213 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2215 static void
2216 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2218 rtx fnaddr, mem, a_tramp;
2220 emit_block_move (m_tramp, assemble_trampoline_template (),
2221 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2223 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2224 emit_move_insn (mem, chain_value);
2226 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2227 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2228 emit_move_insn (mem, fnaddr);
2230 a_tramp = XEXP (m_tramp, 0);
2231 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2232 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2233 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2236 /* Thumb trampolines should be entered in thumb mode, so set
2237 the bottom bit of the address. */
2239 static rtx
2240 arm_trampoline_adjust_address (rtx addr)
2242 if (TARGET_THUMB)
2243 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2244 NULL, 0, OPTAB_LIB_WIDEN);
2245 return addr;
2248 /* Return 1 if it is possible to return using a single instruction.
2249 If SIBLING is non-null, this is a test for a return before a sibling
2250 call. SIBLING is the call insn, so we can examine its register usage. */
2253 use_return_insn (int iscond, rtx sibling)
2255 int regno;
2256 unsigned int func_type;
2257 unsigned long saved_int_regs;
2258 unsigned HOST_WIDE_INT stack_adjust;
2259 arm_stack_offsets *offsets;
2261 /* Never use a return instruction before reload has run. */
2262 if (!reload_completed)
2263 return 0;
2265 func_type = arm_current_func_type ();
2267 /* Naked, volatile and stack alignment functions need special
2268 consideration. */
2269 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2270 return 0;
2272 /* So do interrupt functions that use the frame pointer and Thumb
2273 interrupt functions. */
2274 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2275 return 0;
2277 offsets = arm_get_frame_offsets ();
2278 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2280 /* As do variadic functions. */
2281 if (crtl->args.pretend_args_size
2282 || cfun->machine->uses_anonymous_args
2283 /* Or if the function calls __builtin_eh_return () */
2284 || crtl->calls_eh_return
2285 /* Or if the function calls alloca */
2286 || cfun->calls_alloca
2287 /* Or if there is a stack adjustment. However, if the stack pointer
2288 is saved on the stack, we can use a pre-incrementing stack load. */
2289 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2290 && stack_adjust == 4)))
2291 return 0;
2293 saved_int_regs = offsets->saved_regs_mask;
2295 /* Unfortunately, the insn
2297 ldmib sp, {..., sp, ...}
2299 triggers a bug on most SA-110 based devices, such that the stack
2300 pointer won't be correctly restored if the instruction takes a
2301 page fault. We work around this problem by popping r3 along with
2302 the other registers, since that is never slower than executing
2303 another instruction.
2305 We test for !arm_arch5 here, because code for any architecture
2306 less than this could potentially be run on one of the buggy
2307 chips. */
2308 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2310 /* Validate that r3 is a call-clobbered register (always true in
2311 the default abi) ... */
2312 if (!call_used_regs[3])
2313 return 0;
2315 /* ... that it isn't being used for a return value ... */
2316 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2317 return 0;
2319 /* ... or for a tail-call argument ... */
2320 if (sibling)
2322 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2324 if (find_regno_fusage (sibling, USE, 3))
2325 return 0;
2328 /* ... and that there are no call-saved registers in r0-r2
2329 (always true in the default ABI). */
2330 if (saved_int_regs & 0x7)
2331 return 0;
2334 /* Can't be done if interworking with Thumb, and any registers have been
2335 stacked. */
2336 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2337 return 0;
2339 /* On StrongARM, conditional returns are expensive if they aren't
2340 taken and multiple registers have been stacked. */
2341 if (iscond && arm_tune_strongarm)
2343 /* Conditional return when just the LR is stored is a simple
2344 conditional-load instruction, that's not expensive. */
2345 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2346 return 0;
2348 if (flag_pic
2349 && arm_pic_register != INVALID_REGNUM
2350 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2351 return 0;
2354 /* If there are saved registers but the LR isn't saved, then we need
2355 two instructions for the return. */
2356 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2357 return 0;
2359 /* Can't be done if any of the FPA regs are pushed,
2360 since this also requires an insn. */
2361 if (TARGET_HARD_FLOAT && TARGET_FPA)
2362 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2363 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2364 return 0;
2366 /* Likewise VFP regs. */
2367 if (TARGET_HARD_FLOAT && TARGET_VFP)
2368 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2369 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2370 return 0;
2372 if (TARGET_REALLY_IWMMXT)
2373 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2374 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2375 return 0;
2377 return 1;
2380 /* Return TRUE if int I is a valid immediate ARM constant. */
2383 const_ok_for_arm (HOST_WIDE_INT i)
2385 int lowbit;
2387 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2388 be all zero, or all one. */
2389 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2390 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2391 != ((~(unsigned HOST_WIDE_INT) 0)
2392 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2393 return FALSE;
2395 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2397 /* Fast return for 0 and small values. We must do this for zero, since
2398 the code below can't handle that one case. */
2399 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2400 return TRUE;
2402 /* Get the number of trailing zeros. */
2403 lowbit = ffs((int) i) - 1;
2405 /* Only even shifts are allowed in ARM mode so round down to the
2406 nearest even number. */
2407 if (TARGET_ARM)
2408 lowbit &= ~1;
2410 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2411 return TRUE;
2413 if (TARGET_ARM)
2415 /* Allow rotated constants in ARM mode. */
2416 if (lowbit <= 4
2417 && ((i & ~0xc000003f) == 0
2418 || (i & ~0xf000000f) == 0
2419 || (i & ~0xfc000003) == 0))
2420 return TRUE;
2422 else
2424 HOST_WIDE_INT v;
2426 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2427 v = i & 0xff;
2428 v |= v << 16;
2429 if (i == v || i == (v | (v << 8)))
2430 return TRUE;
2432 /* Allow repeated pattern 0xXY00XY00. */
2433 v = i & 0xff00;
2434 v |= v << 16;
2435 if (i == v)
2436 return TRUE;
2439 return FALSE;
2442 /* Return true if I is a valid constant for the operation CODE. */
2443 static int
2444 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2446 if (const_ok_for_arm (i))
2447 return 1;
2449 switch (code)
2451 case PLUS:
2452 case COMPARE:
2453 case EQ:
2454 case NE:
2455 case GT:
2456 case LE:
2457 case LT:
2458 case GE:
2459 case GEU:
2460 case LTU:
2461 case GTU:
2462 case LEU:
2463 case UNORDERED:
2464 case ORDERED:
2465 case UNEQ:
2466 case UNGE:
2467 case UNLT:
2468 case UNGT:
2469 case UNLE:
2470 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2472 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2473 case XOR:
2474 return 0;
2476 case IOR:
2477 if (TARGET_THUMB2)
2478 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2479 return 0;
2481 case AND:
2482 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2484 default:
2485 gcc_unreachable ();
2489 /* Emit a sequence of insns to handle a large constant.
2490 CODE is the code of the operation required, it can be any of SET, PLUS,
2491 IOR, AND, XOR, MINUS;
2492 MODE is the mode in which the operation is being performed;
2493 VAL is the integer to operate on;
2494 SOURCE is the other operand (a register, or a null-pointer for SET);
2495 SUBTARGETS means it is safe to create scratch registers if that will
2496 either produce a simpler sequence, or we will want to cse the values.
2497 Return value is the number of insns emitted. */
2499 /* ??? Tweak this for thumb2. */
2501 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2502 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2504 rtx cond;
2506 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2507 cond = COND_EXEC_TEST (PATTERN (insn));
2508 else
2509 cond = NULL_RTX;
2511 if (subtargets || code == SET
2512 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2513 && REGNO (target) != REGNO (source)))
2515 /* After arm_reorg has been called, we can't fix up expensive
2516 constants by pushing them into memory so we must synthesize
2517 them in-line, regardless of the cost. This is only likely to
2518 be more costly on chips that have load delay slots and we are
2519 compiling without running the scheduler (so no splitting
2520 occurred before the final instruction emission).
2522 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2524 if (!after_arm_reorg
2525 && !cond
2526 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2527 1, 0)
2528 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2529 + (code != SET))))
2531 if (code == SET)
2533 /* Currently SET is the only monadic value for CODE, all
2534 the rest are diadic. */
2535 if (TARGET_USE_MOVT)
2536 arm_emit_movpair (target, GEN_INT (val));
2537 else
2538 emit_set_insn (target, GEN_INT (val));
2540 return 1;
2542 else
2544 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2546 if (TARGET_USE_MOVT)
2547 arm_emit_movpair (temp, GEN_INT (val));
2548 else
2549 emit_set_insn (temp, GEN_INT (val));
2551 /* For MINUS, the value is subtracted from, since we never
2552 have subtraction of a constant. */
2553 if (code == MINUS)
2554 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2555 else
2556 emit_set_insn (target,
2557 gen_rtx_fmt_ee (code, mode, source, temp));
2558 return 2;
2563 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2567 /* Return the number of instructions required to synthesize the given
2568 constant, if we start emitting them from bit-position I. */
2569 static int
2570 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2572 HOST_WIDE_INT temp1;
2573 int step_size = TARGET_ARM ? 2 : 1;
2574 int num_insns = 0;
2576 gcc_assert (TARGET_ARM || i == 0);
2580 int end;
2582 if (i <= 0)
2583 i += 32;
2584 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2586 end = i - 8;
2587 if (end < 0)
2588 end += 32;
2589 temp1 = remainder & ((0x0ff << end)
2590 | ((i < end) ? (0xff >> (32 - end)) : 0));
2591 remainder &= ~temp1;
2592 num_insns++;
2593 i -= 8 - step_size;
2595 i -= step_size;
2596 } while (remainder);
2597 return num_insns;
2600 static int
2601 find_best_start (unsigned HOST_WIDE_INT remainder)
2603 int best_consecutive_zeros = 0;
2604 int i;
2605 int best_start = 0;
2607 /* If we aren't targetting ARM, the best place to start is always at
2608 the bottom. */
2609 if (! TARGET_ARM)
2610 return 0;
2612 for (i = 0; i < 32; i += 2)
2614 int consecutive_zeros = 0;
2616 if (!(remainder & (3 << i)))
2618 while ((i < 32) && !(remainder & (3 << i)))
2620 consecutive_zeros += 2;
2621 i += 2;
2623 if (consecutive_zeros > best_consecutive_zeros)
2625 best_consecutive_zeros = consecutive_zeros;
2626 best_start = i - consecutive_zeros;
2628 i -= 2;
2632 /* So long as it won't require any more insns to do so, it's
2633 desirable to emit a small constant (in bits 0...9) in the last
2634 insn. This way there is more chance that it can be combined with
2635 a later addressing insn to form a pre-indexed load or store
2636 operation. Consider:
2638 *((volatile int *)0xe0000100) = 1;
2639 *((volatile int *)0xe0000110) = 2;
2641 We want this to wind up as:
2643 mov rA, #0xe0000000
2644 mov rB, #1
2645 str rB, [rA, #0x100]
2646 mov rB, #2
2647 str rB, [rA, #0x110]
2649 rather than having to synthesize both large constants from scratch.
2651 Therefore, we calculate how many insns would be required to emit
2652 the constant starting from `best_start', and also starting from
2653 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2654 yield a shorter sequence, we may as well use zero. */
2655 if (best_start != 0
2656 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2657 && (count_insns_for_constant (remainder, 0) <=
2658 count_insns_for_constant (remainder, best_start)))
2659 best_start = 0;
2661 return best_start;
2664 /* Emit an instruction with the indicated PATTERN. If COND is
2665 non-NULL, conditionalize the execution of the instruction on COND
2666 being true. */
2668 static void
2669 emit_constant_insn (rtx cond, rtx pattern)
2671 if (cond)
2672 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2673 emit_insn (pattern);
2676 /* As above, but extra parameter GENERATE which, if clear, suppresses
2677 RTL generation. */
2678 /* ??? This needs more work for thumb2. */
2680 static int
2681 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2682 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2683 int generate)
2685 int can_invert = 0;
2686 int can_negate = 0;
2687 int final_invert = 0;
2688 int can_negate_initial = 0;
2689 int i;
2690 int num_bits_set = 0;
2691 int set_sign_bit_copies = 0;
2692 int clear_sign_bit_copies = 0;
2693 int clear_zero_bit_copies = 0;
2694 int set_zero_bit_copies = 0;
2695 int insns = 0;
2696 unsigned HOST_WIDE_INT temp1, temp2;
2697 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2698 int step_size = TARGET_ARM ? 2 : 1;
2700 /* Find out which operations are safe for a given CODE. Also do a quick
2701 check for degenerate cases; these can occur when DImode operations
2702 are split. */
2703 switch (code)
2705 case SET:
2706 can_invert = 1;
2707 can_negate = 1;
2708 break;
2710 case PLUS:
2711 can_negate = 1;
2712 can_negate_initial = 1;
2713 break;
2715 case IOR:
2716 if (remainder == 0xffffffff)
2718 if (generate)
2719 emit_constant_insn (cond,
2720 gen_rtx_SET (VOIDmode, target,
2721 GEN_INT (ARM_SIGN_EXTEND (val))));
2722 return 1;
2725 if (remainder == 0)
2727 if (reload_completed && rtx_equal_p (target, source))
2728 return 0;
2730 if (generate)
2731 emit_constant_insn (cond,
2732 gen_rtx_SET (VOIDmode, target, source));
2733 return 1;
2736 if (TARGET_THUMB2)
2737 can_invert = 1;
2738 break;
2740 case AND:
2741 if (remainder == 0)
2743 if (generate)
2744 emit_constant_insn (cond,
2745 gen_rtx_SET (VOIDmode, target, const0_rtx));
2746 return 1;
2748 if (remainder == 0xffffffff)
2750 if (reload_completed && rtx_equal_p (target, source))
2751 return 0;
2752 if (generate)
2753 emit_constant_insn (cond,
2754 gen_rtx_SET (VOIDmode, target, source));
2755 return 1;
2757 can_invert = 1;
2758 break;
2760 case XOR:
2761 if (remainder == 0)
2763 if (reload_completed && rtx_equal_p (target, source))
2764 return 0;
2765 if (generate)
2766 emit_constant_insn (cond,
2767 gen_rtx_SET (VOIDmode, target, source));
2768 return 1;
2771 if (remainder == 0xffffffff)
2773 if (generate)
2774 emit_constant_insn (cond,
2775 gen_rtx_SET (VOIDmode, target,
2776 gen_rtx_NOT (mode, source)));
2777 return 1;
2779 break;
2781 case MINUS:
2782 /* We treat MINUS as (val - source), since (source - val) is always
2783 passed as (source + (-val)). */
2784 if (remainder == 0)
2786 if (generate)
2787 emit_constant_insn (cond,
2788 gen_rtx_SET (VOIDmode, target,
2789 gen_rtx_NEG (mode, source)));
2790 return 1;
2792 if (const_ok_for_arm (val))
2794 if (generate)
2795 emit_constant_insn (cond,
2796 gen_rtx_SET (VOIDmode, target,
2797 gen_rtx_MINUS (mode, GEN_INT (val),
2798 source)));
2799 return 1;
2801 can_negate = 1;
2803 break;
2805 default:
2806 gcc_unreachable ();
2809 /* If we can do it in one insn get out quickly. */
2810 if (const_ok_for_arm (val)
2811 || (can_negate_initial && const_ok_for_arm (-val))
2812 || (can_invert && const_ok_for_arm (~val)))
2814 if (generate)
2815 emit_constant_insn (cond,
2816 gen_rtx_SET (VOIDmode, target,
2817 (source
2818 ? gen_rtx_fmt_ee (code, mode, source,
2819 GEN_INT (val))
2820 : GEN_INT (val))));
2821 return 1;
2824 /* Calculate a few attributes that may be useful for specific
2825 optimizations. */
2826 /* Count number of leading zeros. */
2827 for (i = 31; i >= 0; i--)
2829 if ((remainder & (1 << i)) == 0)
2830 clear_sign_bit_copies++;
2831 else
2832 break;
2835 /* Count number of leading 1's. */
2836 for (i = 31; i >= 0; i--)
2838 if ((remainder & (1 << i)) != 0)
2839 set_sign_bit_copies++;
2840 else
2841 break;
2844 /* Count number of trailing zero's. */
2845 for (i = 0; i <= 31; i++)
2847 if ((remainder & (1 << i)) == 0)
2848 clear_zero_bit_copies++;
2849 else
2850 break;
2853 /* Count number of trailing 1's. */
2854 for (i = 0; i <= 31; i++)
2856 if ((remainder & (1 << i)) != 0)
2857 set_zero_bit_copies++;
2858 else
2859 break;
2862 switch (code)
2864 case SET:
2865 /* See if we can use movw. */
2866 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2868 if (generate)
2869 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2870 GEN_INT (val)));
2871 return 1;
2874 /* See if we can do this by sign_extending a constant that is known
2875 to be negative. This is a good, way of doing it, since the shift
2876 may well merge into a subsequent insn. */
2877 if (set_sign_bit_copies > 1)
2879 if (const_ok_for_arm
2880 (temp1 = ARM_SIGN_EXTEND (remainder
2881 << (set_sign_bit_copies - 1))))
2883 if (generate)
2885 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2886 emit_constant_insn (cond,
2887 gen_rtx_SET (VOIDmode, new_src,
2888 GEN_INT (temp1)));
2889 emit_constant_insn (cond,
2890 gen_ashrsi3 (target, new_src,
2891 GEN_INT (set_sign_bit_copies - 1)));
2893 return 2;
2895 /* For an inverted constant, we will need to set the low bits,
2896 these will be shifted out of harm's way. */
2897 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2898 if (const_ok_for_arm (~temp1))
2900 if (generate)
2902 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2903 emit_constant_insn (cond,
2904 gen_rtx_SET (VOIDmode, new_src,
2905 GEN_INT (temp1)));
2906 emit_constant_insn (cond,
2907 gen_ashrsi3 (target, new_src,
2908 GEN_INT (set_sign_bit_copies - 1)));
2910 return 2;
2914 /* See if we can calculate the value as the difference between two
2915 valid immediates. */
2916 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2918 int topshift = clear_sign_bit_copies & ~1;
2920 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2921 & (0xff000000 >> topshift));
2923 /* If temp1 is zero, then that means the 9 most significant
2924 bits of remainder were 1 and we've caused it to overflow.
2925 When topshift is 0 we don't need to do anything since we
2926 can borrow from 'bit 32'. */
2927 if (temp1 == 0 && topshift != 0)
2928 temp1 = 0x80000000 >> (topshift - 1);
2930 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2932 if (const_ok_for_arm (temp2))
2934 if (generate)
2936 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2937 emit_constant_insn (cond,
2938 gen_rtx_SET (VOIDmode, new_src,
2939 GEN_INT (temp1)));
2940 emit_constant_insn (cond,
2941 gen_addsi3 (target, new_src,
2942 GEN_INT (-temp2)));
2945 return 2;
2949 /* See if we can generate this by setting the bottom (or the top)
2950 16 bits, and then shifting these into the other half of the
2951 word. We only look for the simplest cases, to do more would cost
2952 too much. Be careful, however, not to generate this when the
2953 alternative would take fewer insns. */
2954 if (val & 0xffff0000)
2956 temp1 = remainder & 0xffff0000;
2957 temp2 = remainder & 0x0000ffff;
2959 /* Overlaps outside this range are best done using other methods. */
2960 for (i = 9; i < 24; i++)
2962 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2963 && !const_ok_for_arm (temp2))
2965 rtx new_src = (subtargets
2966 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2967 : target);
2968 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2969 source, subtargets, generate);
2970 source = new_src;
2971 if (generate)
2972 emit_constant_insn
2973 (cond,
2974 gen_rtx_SET
2975 (VOIDmode, target,
2976 gen_rtx_IOR (mode,
2977 gen_rtx_ASHIFT (mode, source,
2978 GEN_INT (i)),
2979 source)));
2980 return insns + 1;
2984 /* Don't duplicate cases already considered. */
2985 for (i = 17; i < 24; i++)
2987 if (((temp1 | (temp1 >> i)) == remainder)
2988 && !const_ok_for_arm (temp1))
2990 rtx new_src = (subtargets
2991 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2992 : target);
2993 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2994 source, subtargets, generate);
2995 source = new_src;
2996 if (generate)
2997 emit_constant_insn
2998 (cond,
2999 gen_rtx_SET (VOIDmode, target,
3000 gen_rtx_IOR
3001 (mode,
3002 gen_rtx_LSHIFTRT (mode, source,
3003 GEN_INT (i)),
3004 source)));
3005 return insns + 1;
3009 break;
3011 case IOR:
3012 case XOR:
3013 /* If we have IOR or XOR, and the constant can be loaded in a
3014 single instruction, and we can find a temporary to put it in,
3015 then this can be done in two instructions instead of 3-4. */
3016 if (subtargets
3017 /* TARGET can't be NULL if SUBTARGETS is 0 */
3018 || (reload_completed && !reg_mentioned_p (target, source)))
3020 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3022 if (generate)
3024 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3026 emit_constant_insn (cond,
3027 gen_rtx_SET (VOIDmode, sub,
3028 GEN_INT (val)));
3029 emit_constant_insn (cond,
3030 gen_rtx_SET (VOIDmode, target,
3031 gen_rtx_fmt_ee (code, mode,
3032 source, sub)));
3034 return 2;
3038 if (code == XOR)
3039 break;
3041 /* Convert.
3042 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3043 and the remainder 0s for e.g. 0xfff00000)
3044 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3046 This can be done in 2 instructions by using shifts with mov or mvn.
3047 e.g. for
3048 x = x | 0xfff00000;
3049 we generate.
3050 mvn r0, r0, asl #12
3051 mvn r0, r0, lsr #12 */
3052 if (set_sign_bit_copies > 8
3053 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3055 if (generate)
3057 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3058 rtx shift = GEN_INT (set_sign_bit_copies);
3060 emit_constant_insn
3061 (cond,
3062 gen_rtx_SET (VOIDmode, sub,
3063 gen_rtx_NOT (mode,
3064 gen_rtx_ASHIFT (mode,
3065 source,
3066 shift))));
3067 emit_constant_insn
3068 (cond,
3069 gen_rtx_SET (VOIDmode, target,
3070 gen_rtx_NOT (mode,
3071 gen_rtx_LSHIFTRT (mode, sub,
3072 shift))));
3074 return 2;
3077 /* Convert
3078 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3080 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3082 For eg. r0 = r0 | 0xfff
3083 mvn r0, r0, lsr #12
3084 mvn r0, r0, asl #12
3087 if (set_zero_bit_copies > 8
3088 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3090 if (generate)
3092 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3093 rtx shift = GEN_INT (set_zero_bit_copies);
3095 emit_constant_insn
3096 (cond,
3097 gen_rtx_SET (VOIDmode, sub,
3098 gen_rtx_NOT (mode,
3099 gen_rtx_LSHIFTRT (mode,
3100 source,
3101 shift))));
3102 emit_constant_insn
3103 (cond,
3104 gen_rtx_SET (VOIDmode, target,
3105 gen_rtx_NOT (mode,
3106 gen_rtx_ASHIFT (mode, sub,
3107 shift))));
3109 return 2;
3112 /* This will never be reached for Thumb2 because orn is a valid
3113 instruction. This is for Thumb1 and the ARM 32 bit cases.
3115 x = y | constant (such that ~constant is a valid constant)
3116 Transform this to
3117 x = ~(~y & ~constant).
3119 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3121 if (generate)
3123 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3124 emit_constant_insn (cond,
3125 gen_rtx_SET (VOIDmode, sub,
3126 gen_rtx_NOT (mode, source)));
3127 source = sub;
3128 if (subtargets)
3129 sub = gen_reg_rtx (mode);
3130 emit_constant_insn (cond,
3131 gen_rtx_SET (VOIDmode, sub,
3132 gen_rtx_AND (mode, source,
3133 GEN_INT (temp1))));
3134 emit_constant_insn (cond,
3135 gen_rtx_SET (VOIDmode, target,
3136 gen_rtx_NOT (mode, sub)));
3138 return 3;
3140 break;
3142 case AND:
3143 /* See if two shifts will do 2 or more insn's worth of work. */
3144 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3146 HOST_WIDE_INT shift_mask = ((0xffffffff
3147 << (32 - clear_sign_bit_copies))
3148 & 0xffffffff);
3150 if ((remainder | shift_mask) != 0xffffffff)
3152 if (generate)
3154 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3155 insns = arm_gen_constant (AND, mode, cond,
3156 remainder | shift_mask,
3157 new_src, source, subtargets, 1);
3158 source = new_src;
3160 else
3162 rtx targ = subtargets ? NULL_RTX : target;
3163 insns = arm_gen_constant (AND, mode, cond,
3164 remainder | shift_mask,
3165 targ, source, subtargets, 0);
3169 if (generate)
3171 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3172 rtx shift = GEN_INT (clear_sign_bit_copies);
3174 emit_insn (gen_ashlsi3 (new_src, source, shift));
3175 emit_insn (gen_lshrsi3 (target, new_src, shift));
3178 return insns + 2;
3181 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3183 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3185 if ((remainder | shift_mask) != 0xffffffff)
3187 if (generate)
3189 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3191 insns = arm_gen_constant (AND, mode, cond,
3192 remainder | shift_mask,
3193 new_src, source, subtargets, 1);
3194 source = new_src;
3196 else
3198 rtx targ = subtargets ? NULL_RTX : target;
3200 insns = arm_gen_constant (AND, mode, cond,
3201 remainder | shift_mask,
3202 targ, source, subtargets, 0);
3206 if (generate)
3208 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3209 rtx shift = GEN_INT (clear_zero_bit_copies);
3211 emit_insn (gen_lshrsi3 (new_src, source, shift));
3212 emit_insn (gen_ashlsi3 (target, new_src, shift));
3215 return insns + 2;
3218 break;
3220 default:
3221 break;
3224 for (i = 0; i < 32; i++)
3225 if (remainder & (1 << i))
3226 num_bits_set++;
3228 if ((code == AND)
3229 || (code != IOR && can_invert && num_bits_set > 16))
3230 remainder ^= 0xffffffff;
3231 else if (code == PLUS && num_bits_set > 16)
3232 remainder = (-remainder) & 0xffffffff;
3234 /* For XOR, if more than half the bits are set and there's a sequence
3235 of more than 8 consecutive ones in the pattern then we can XOR by the
3236 inverted constant and then invert the final result; this may save an
3237 instruction and might also lead to the final mvn being merged with
3238 some other operation. */
3239 else if (code == XOR && num_bits_set > 16
3240 && (count_insns_for_constant (remainder ^ 0xffffffff,
3241 find_best_start
3242 (remainder ^ 0xffffffff))
3243 < count_insns_for_constant (remainder,
3244 find_best_start (remainder))))
3246 remainder ^= 0xffffffff;
3247 final_invert = 1;
3249 else
3251 can_invert = 0;
3252 can_negate = 0;
3255 /* Now try and find a way of doing the job in either two or three
3256 instructions.
3257 We start by looking for the largest block of zeros that are aligned on
3258 a 2-bit boundary, we then fill up the temps, wrapping around to the
3259 top of the word when we drop off the bottom.
3260 In the worst case this code should produce no more than four insns.
3261 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3262 best place to start. */
3264 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3265 the same. */
3267 /* Now start emitting the insns. */
3268 i = find_best_start (remainder);
3271 int end;
3273 if (i <= 0)
3274 i += 32;
3275 if (remainder & (3 << (i - 2)))
3277 end = i - 8;
3278 if (end < 0)
3279 end += 32;
3280 temp1 = remainder & ((0x0ff << end)
3281 | ((i < end) ? (0xff >> (32 - end)) : 0));
3282 remainder &= ~temp1;
3284 if (generate)
3286 rtx new_src, temp1_rtx;
3288 if (code == SET || code == MINUS)
3290 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3291 if (can_invert && code != MINUS)
3292 temp1 = ~temp1;
3294 else
3296 if ((final_invert || remainder) && subtargets)
3297 new_src = gen_reg_rtx (mode);
3298 else
3299 new_src = target;
3300 if (can_invert)
3301 temp1 = ~temp1;
3302 else if (can_negate)
3303 temp1 = -temp1;
3306 temp1 = trunc_int_for_mode (temp1, mode);
3307 temp1_rtx = GEN_INT (temp1);
3309 if (code == SET)
3311 else if (code == MINUS)
3312 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3313 else
3314 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3316 emit_constant_insn (cond,
3317 gen_rtx_SET (VOIDmode, new_src,
3318 temp1_rtx));
3319 source = new_src;
3322 if (code == SET)
3324 can_invert = 0;
3325 code = PLUS;
3327 else if (code == MINUS)
3328 code = PLUS;
3330 insns++;
3331 i -= 8 - step_size;
3333 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3334 shifts. */
3335 i -= step_size;
3337 while (remainder);
3340 if (final_invert)
3342 if (generate)
3343 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3344 gen_rtx_NOT (mode, source)));
3345 insns++;
3348 return insns;
3351 /* Canonicalize a comparison so that we are more likely to recognize it.
3352 This can be done for a few constant compares, where we can make the
3353 immediate value easier to load. */
3355 enum rtx_code
3356 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3358 enum machine_mode mode;
3359 unsigned HOST_WIDE_INT i, maxval;
3361 mode = GET_MODE (*op0);
3362 if (mode == VOIDmode)
3363 mode = GET_MODE (*op1);
3365 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3367 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3368 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3369 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3370 for GTU/LEU in Thumb mode. */
3371 if (mode == DImode)
3373 rtx tem;
3375 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3376 available. */
3377 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3378 return code;
3380 if (code == GT || code == LE
3381 || (!TARGET_ARM && (code == GTU || code == LEU)))
3383 /* Missing comparison. First try to use an available
3384 comparison. */
3385 if (GET_CODE (*op1) == CONST_INT)
3387 i = INTVAL (*op1);
3388 switch (code)
3390 case GT:
3391 case LE:
3392 if (i != maxval
3393 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3395 *op1 = GEN_INT (i + 1);
3396 return code == GT ? GE : LT;
3398 break;
3399 case GTU:
3400 case LEU:
3401 if (i != ~((unsigned HOST_WIDE_INT) 0)
3402 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3404 *op1 = GEN_INT (i + 1);
3405 return code == GTU ? GEU : LTU;
3407 break;
3408 default:
3409 gcc_unreachable ();
3413 /* If that did not work, reverse the condition. */
3414 tem = *op0;
3415 *op0 = *op1;
3416 *op1 = tem;
3417 return swap_condition (code);
3420 return code;
3423 /* Comparisons smaller than DImode. Only adjust comparisons against
3424 an out-of-range constant. */
3425 if (GET_CODE (*op1) != CONST_INT
3426 || const_ok_for_arm (INTVAL (*op1))
3427 || const_ok_for_arm (- INTVAL (*op1)))
3428 return code;
3430 i = INTVAL (*op1);
3432 switch (code)
3434 case EQ:
3435 case NE:
3436 return code;
3438 case GT:
3439 case LE:
3440 if (i != maxval
3441 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3443 *op1 = GEN_INT (i + 1);
3444 return code == GT ? GE : LT;
3446 break;
3448 case GE:
3449 case LT:
3450 if (i != ~maxval
3451 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3453 *op1 = GEN_INT (i - 1);
3454 return code == GE ? GT : LE;
3456 break;
3458 case GTU:
3459 case LEU:
3460 if (i != ~((unsigned HOST_WIDE_INT) 0)
3461 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3463 *op1 = GEN_INT (i + 1);
3464 return code == GTU ? GEU : LTU;
3466 break;
3468 case GEU:
3469 case LTU:
3470 if (i != 0
3471 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3473 *op1 = GEN_INT (i - 1);
3474 return code == GEU ? GTU : LEU;
3476 break;
3478 default:
3479 gcc_unreachable ();
3482 return code;
3486 /* Define how to find the value returned by a function. */
3488 static rtx
3489 arm_function_value(const_tree type, const_tree func,
3490 bool outgoing ATTRIBUTE_UNUSED)
3492 enum machine_mode mode;
3493 int unsignedp ATTRIBUTE_UNUSED;
3494 rtx r ATTRIBUTE_UNUSED;
3496 mode = TYPE_MODE (type);
3498 if (TARGET_AAPCS_BASED)
3499 return aapcs_allocate_return_reg (mode, type, func);
3501 /* Promote integer types. */
3502 if (INTEGRAL_TYPE_P (type))
3503 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3505 /* Promotes small structs returned in a register to full-word size
3506 for big-endian AAPCS. */
3507 if (arm_return_in_msb (type))
3509 HOST_WIDE_INT size = int_size_in_bytes (type);
3510 if (size % UNITS_PER_WORD != 0)
3512 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3513 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3517 return LIBCALL_VALUE (mode);
3520 static int
3521 libcall_eq (const void *p1, const void *p2)
3523 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3526 static hashval_t
3527 libcall_hash (const void *p1)
3529 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3532 static void
3533 add_libcall (htab_t htab, rtx libcall)
3535 *htab_find_slot (htab, libcall, INSERT) = libcall;
3538 static bool
3539 arm_libcall_uses_aapcs_base (const_rtx libcall)
3541 static bool init_done = false;
3542 static htab_t libcall_htab;
3544 if (!init_done)
3546 init_done = true;
3548 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3549 NULL);
3550 add_libcall (libcall_htab,
3551 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3552 add_libcall (libcall_htab,
3553 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3554 add_libcall (libcall_htab,
3555 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3556 add_libcall (libcall_htab,
3557 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3559 add_libcall (libcall_htab,
3560 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3561 add_libcall (libcall_htab,
3562 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3563 add_libcall (libcall_htab,
3564 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3565 add_libcall (libcall_htab,
3566 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3568 add_libcall (libcall_htab,
3569 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3570 add_libcall (libcall_htab,
3571 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3572 add_libcall (libcall_htab,
3573 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3574 add_libcall (libcall_htab,
3575 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3576 add_libcall (libcall_htab,
3577 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3578 add_libcall (libcall_htab,
3579 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3582 return libcall && htab_find (libcall_htab, libcall) != NULL;
3586 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3588 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3589 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3591 /* The following libcalls return their result in integer registers,
3592 even though they return a floating point value. */
3593 if (arm_libcall_uses_aapcs_base (libcall))
3594 return gen_rtx_REG (mode, ARG_REGISTER(1));
3598 return LIBCALL_VALUE (mode);
3601 /* Determine the amount of memory needed to store the possible return
3602 registers of an untyped call. */
3604 arm_apply_result_size (void)
3606 int size = 16;
3608 if (TARGET_32BIT)
3610 if (TARGET_HARD_FLOAT_ABI)
3612 if (TARGET_VFP)
3613 size += 32;
3614 if (TARGET_FPA)
3615 size += 12;
3616 if (TARGET_MAVERICK)
3617 size += 8;
3619 if (TARGET_IWMMXT_ABI)
3620 size += 8;
3623 return size;
3626 /* Decide whether TYPE should be returned in memory (true)
3627 or in a register (false). FNTYPE is the type of the function making
3628 the call. */
3629 static bool
3630 arm_return_in_memory (const_tree type, const_tree fntype)
3632 HOST_WIDE_INT size;
3634 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3636 if (TARGET_AAPCS_BASED)
3638 /* Simple, non-aggregate types (ie not including vectors and
3639 complex) are always returned in a register (or registers).
3640 We don't care about which register here, so we can short-cut
3641 some of the detail. */
3642 if (!AGGREGATE_TYPE_P (type)
3643 && TREE_CODE (type) != VECTOR_TYPE
3644 && TREE_CODE (type) != COMPLEX_TYPE)
3645 return false;
3647 /* Any return value that is no larger than one word can be
3648 returned in r0. */
3649 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3650 return false;
3652 /* Check any available co-processors to see if they accept the
3653 type as a register candidate (VFP, for example, can return
3654 some aggregates in consecutive registers). These aren't
3655 available if the call is variadic. */
3656 if (aapcs_select_return_coproc (type, fntype) >= 0)
3657 return false;
3659 /* Vector values should be returned using ARM registers, not
3660 memory (unless they're over 16 bytes, which will break since
3661 we only have four call-clobbered registers to play with). */
3662 if (TREE_CODE (type) == VECTOR_TYPE)
3663 return (size < 0 || size > (4 * UNITS_PER_WORD));
3665 /* The rest go in memory. */
3666 return true;
3669 if (TREE_CODE (type) == VECTOR_TYPE)
3670 return (size < 0 || size > (4 * UNITS_PER_WORD));
3672 if (!AGGREGATE_TYPE_P (type) &&
3673 (TREE_CODE (type) != VECTOR_TYPE))
3674 /* All simple types are returned in registers. */
3675 return false;
3677 if (arm_abi != ARM_ABI_APCS)
3679 /* ATPCS and later return aggregate types in memory only if they are
3680 larger than a word (or are variable size). */
3681 return (size < 0 || size > UNITS_PER_WORD);
3684 /* For the arm-wince targets we choose to be compatible with Microsoft's
3685 ARM and Thumb compilers, which always return aggregates in memory. */
3686 #ifndef ARM_WINCE
3687 /* All structures/unions bigger than one word are returned in memory.
3688 Also catch the case where int_size_in_bytes returns -1. In this case
3689 the aggregate is either huge or of variable size, and in either case
3690 we will want to return it via memory and not in a register. */
3691 if (size < 0 || size > UNITS_PER_WORD)
3692 return true;
3694 if (TREE_CODE (type) == RECORD_TYPE)
3696 tree field;
3698 /* For a struct the APCS says that we only return in a register
3699 if the type is 'integer like' and every addressable element
3700 has an offset of zero. For practical purposes this means
3701 that the structure can have at most one non bit-field element
3702 and that this element must be the first one in the structure. */
3704 /* Find the first field, ignoring non FIELD_DECL things which will
3705 have been created by C++. */
3706 for (field = TYPE_FIELDS (type);
3707 field && TREE_CODE (field) != FIELD_DECL;
3708 field = DECL_CHAIN (field))
3709 continue;
3711 if (field == NULL)
3712 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3714 /* Check that the first field is valid for returning in a register. */
3716 /* ... Floats are not allowed */
3717 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3718 return true;
3720 /* ... Aggregates that are not themselves valid for returning in
3721 a register are not allowed. */
3722 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3723 return true;
3725 /* Now check the remaining fields, if any. Only bitfields are allowed,
3726 since they are not addressable. */
3727 for (field = DECL_CHAIN (field);
3728 field;
3729 field = DECL_CHAIN (field))
3731 if (TREE_CODE (field) != FIELD_DECL)
3732 continue;
3734 if (!DECL_BIT_FIELD_TYPE (field))
3735 return true;
3738 return false;
3741 if (TREE_CODE (type) == UNION_TYPE)
3743 tree field;
3745 /* Unions can be returned in registers if every element is
3746 integral, or can be returned in an integer register. */
3747 for (field = TYPE_FIELDS (type);
3748 field;
3749 field = DECL_CHAIN (field))
3751 if (TREE_CODE (field) != FIELD_DECL)
3752 continue;
3754 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3755 return true;
3757 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3758 return true;
3761 return false;
3763 #endif /* not ARM_WINCE */
3765 /* Return all other types in memory. */
3766 return true;
3769 /* Indicate whether or not words of a double are in big-endian order. */
3772 arm_float_words_big_endian (void)
3774 if (TARGET_MAVERICK)
3775 return 0;
3777 /* For FPA, float words are always big-endian. For VFP, floats words
3778 follow the memory system mode. */
3780 if (TARGET_FPA)
3782 return 1;
3785 if (TARGET_VFP)
3786 return (TARGET_BIG_END ? 1 : 0);
3788 return 1;
3791 const struct pcs_attribute_arg
3793 const char *arg;
3794 enum arm_pcs value;
3795 } pcs_attribute_args[] =
3797 {"aapcs", ARM_PCS_AAPCS},
3798 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3799 #if 0
3800 /* We could recognize these, but changes would be needed elsewhere
3801 * to implement them. */
3802 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3803 {"atpcs", ARM_PCS_ATPCS},
3804 {"apcs", ARM_PCS_APCS},
3805 #endif
3806 {NULL, ARM_PCS_UNKNOWN}
3809 static enum arm_pcs
3810 arm_pcs_from_attribute (tree attr)
3812 const struct pcs_attribute_arg *ptr;
3813 const char *arg;
3815 /* Get the value of the argument. */
3816 if (TREE_VALUE (attr) == NULL_TREE
3817 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3818 return ARM_PCS_UNKNOWN;
3820 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3822 /* Check it against the list of known arguments. */
3823 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3824 if (streq (arg, ptr->arg))
3825 return ptr->value;
3827 /* An unrecognized interrupt type. */
3828 return ARM_PCS_UNKNOWN;
3831 /* Get the PCS variant to use for this call. TYPE is the function's type
3832 specification, DECL is the specific declartion. DECL may be null if
3833 the call could be indirect or if this is a library call. */
3834 static enum arm_pcs
3835 arm_get_pcs_model (const_tree type, const_tree decl)
3837 bool user_convention = false;
3838 enum arm_pcs user_pcs = arm_pcs_default;
3839 tree attr;
3841 gcc_assert (type);
3843 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3844 if (attr)
3846 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3847 user_convention = true;
3850 if (TARGET_AAPCS_BASED)
3852 /* Detect varargs functions. These always use the base rules
3853 (no argument is ever a candidate for a co-processor
3854 register). */
3855 bool base_rules = stdarg_p (type);
3857 if (user_convention)
3859 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3860 sorry ("non-AAPCS derived PCS variant");
3861 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3862 error ("variadic functions must use the base AAPCS variant");
3865 if (base_rules)
3866 return ARM_PCS_AAPCS;
3867 else if (user_convention)
3868 return user_pcs;
3869 else if (decl && flag_unit_at_a_time)
3871 /* Local functions never leak outside this compilation unit,
3872 so we are free to use whatever conventions are
3873 appropriate. */
3874 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3875 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3876 if (i && i->local)
3877 return ARM_PCS_AAPCS_LOCAL;
3880 else if (user_convention && user_pcs != arm_pcs_default)
3881 sorry ("PCS variant");
3883 /* For everything else we use the target's default. */
3884 return arm_pcs_default;
3888 static void
3889 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3890 const_tree fntype ATTRIBUTE_UNUSED,
3891 rtx libcall ATTRIBUTE_UNUSED,
3892 const_tree fndecl ATTRIBUTE_UNUSED)
3894 /* Record the unallocated VFP registers. */
3895 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3896 pcum->aapcs_vfp_reg_alloc = 0;
3899 /* Walk down the type tree of TYPE counting consecutive base elements.
3900 If *MODEP is VOIDmode, then set it to the first valid floating point
3901 type. If a non-floating point type is found, or if a floating point
3902 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3903 otherwise return the count in the sub-tree. */
3904 static int
3905 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3907 enum machine_mode mode;
3908 HOST_WIDE_INT size;
3910 switch (TREE_CODE (type))
3912 case REAL_TYPE:
3913 mode = TYPE_MODE (type);
3914 if (mode != DFmode && mode != SFmode)
3915 return -1;
3917 if (*modep == VOIDmode)
3918 *modep = mode;
3920 if (*modep == mode)
3921 return 1;
3923 break;
3925 case COMPLEX_TYPE:
3926 mode = TYPE_MODE (TREE_TYPE (type));
3927 if (mode != DFmode && mode != SFmode)
3928 return -1;
3930 if (*modep == VOIDmode)
3931 *modep = mode;
3933 if (*modep == mode)
3934 return 2;
3936 break;
3938 case VECTOR_TYPE:
3939 /* Use V2SImode and V4SImode as representatives of all 64-bit
3940 and 128-bit vector types, whether or not those modes are
3941 supported with the present options. */
3942 size = int_size_in_bytes (type);
3943 switch (size)
3945 case 8:
3946 mode = V2SImode;
3947 break;
3948 case 16:
3949 mode = V4SImode;
3950 break;
3951 default:
3952 return -1;
3955 if (*modep == VOIDmode)
3956 *modep = mode;
3958 /* Vector modes are considered to be opaque: two vectors are
3959 equivalent for the purposes of being homogeneous aggregates
3960 if they are the same size. */
3961 if (*modep == mode)
3962 return 1;
3964 break;
3966 case ARRAY_TYPE:
3968 int count;
3969 tree index = TYPE_DOMAIN (type);
3971 /* Can't handle incomplete types. */
3972 if (!COMPLETE_TYPE_P(type))
3973 return -1;
3975 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3976 if (count == -1
3977 || !index
3978 || !TYPE_MAX_VALUE (index)
3979 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3980 || !TYPE_MIN_VALUE (index)
3981 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3982 || count < 0)
3983 return -1;
3985 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3986 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3988 /* There must be no padding. */
3989 if (!host_integerp (TYPE_SIZE (type), 1)
3990 || (tree_low_cst (TYPE_SIZE (type), 1)
3991 != count * GET_MODE_BITSIZE (*modep)))
3992 return -1;
3994 return count;
3997 case RECORD_TYPE:
3999 int count = 0;
4000 int sub_count;
4001 tree field;
4003 /* Can't handle incomplete types. */
4004 if (!COMPLETE_TYPE_P(type))
4005 return -1;
4007 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4009 if (TREE_CODE (field) != FIELD_DECL)
4010 continue;
4012 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4013 if (sub_count < 0)
4014 return -1;
4015 count += sub_count;
4018 /* There must be no padding. */
4019 if (!host_integerp (TYPE_SIZE (type), 1)
4020 || (tree_low_cst (TYPE_SIZE (type), 1)
4021 != count * GET_MODE_BITSIZE (*modep)))
4022 return -1;
4024 return count;
4027 case UNION_TYPE:
4028 case QUAL_UNION_TYPE:
4030 /* These aren't very interesting except in a degenerate case. */
4031 int count = 0;
4032 int sub_count;
4033 tree field;
4035 /* Can't handle incomplete types. */
4036 if (!COMPLETE_TYPE_P(type))
4037 return -1;
4039 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4041 if (TREE_CODE (field) != FIELD_DECL)
4042 continue;
4044 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4045 if (sub_count < 0)
4046 return -1;
4047 count = count > sub_count ? count : sub_count;
4050 /* There must be no padding. */
4051 if (!host_integerp (TYPE_SIZE (type), 1)
4052 || (tree_low_cst (TYPE_SIZE (type), 1)
4053 != count * GET_MODE_BITSIZE (*modep)))
4054 return -1;
4056 return count;
4059 default:
4060 break;
4063 return -1;
4066 /* Return true if PCS_VARIANT should use VFP registers. */
4067 static bool
4068 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4070 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4072 static bool seen_thumb1_vfp = false;
4074 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4076 sorry ("Thumb-1 hard-float VFP ABI");
4077 /* sorry() is not immediately fatal, so only display this once. */
4078 seen_thumb1_vfp = true;
4081 return true;
4084 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4085 return false;
4087 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4088 (TARGET_VFP_DOUBLE || !is_double));
4091 static bool
4092 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4093 enum machine_mode mode, const_tree type,
4094 enum machine_mode *base_mode, int *count)
4096 enum machine_mode new_mode = VOIDmode;
4098 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4099 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4100 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4102 *count = 1;
4103 new_mode = mode;
4105 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4107 *count = 2;
4108 new_mode = (mode == DCmode ? DFmode : SFmode);
4110 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4112 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4114 if (ag_count > 0 && ag_count <= 4)
4115 *count = ag_count;
4116 else
4117 return false;
4119 else
4120 return false;
4123 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4124 return false;
4126 *base_mode = new_mode;
4127 return true;
4130 static bool
4131 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4132 enum machine_mode mode, const_tree type)
4134 int count ATTRIBUTE_UNUSED;
4135 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4137 if (!use_vfp_abi (pcs_variant, false))
4138 return false;
4139 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4140 &ag_mode, &count);
4143 static bool
4144 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4145 const_tree type)
4147 if (!use_vfp_abi (pcum->pcs_variant, false))
4148 return false;
4150 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4151 &pcum->aapcs_vfp_rmode,
4152 &pcum->aapcs_vfp_rcount);
4155 static bool
4156 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4157 const_tree type ATTRIBUTE_UNUSED)
4159 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4160 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4161 int regno;
4163 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4164 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4166 pcum->aapcs_vfp_reg_alloc = mask << regno;
4167 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4169 int i;
4170 int rcount = pcum->aapcs_vfp_rcount;
4171 int rshift = shift;
4172 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4173 rtx par;
4174 if (!TARGET_NEON)
4176 /* Avoid using unsupported vector modes. */
4177 if (rmode == V2SImode)
4178 rmode = DImode;
4179 else if (rmode == V4SImode)
4181 rmode = DImode;
4182 rcount *= 2;
4183 rshift /= 2;
4186 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4187 for (i = 0; i < rcount; i++)
4189 rtx tmp = gen_rtx_REG (rmode,
4190 FIRST_VFP_REGNUM + regno + i * rshift);
4191 tmp = gen_rtx_EXPR_LIST
4192 (VOIDmode, tmp,
4193 GEN_INT (i * GET_MODE_SIZE (rmode)));
4194 XVECEXP (par, 0, i) = tmp;
4197 pcum->aapcs_reg = par;
4199 else
4200 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4201 return true;
4203 return false;
4206 static rtx
4207 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4208 enum machine_mode mode,
4209 const_tree type ATTRIBUTE_UNUSED)
4211 if (!use_vfp_abi (pcs_variant, false))
4212 return false;
4214 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4216 int count;
4217 enum machine_mode ag_mode;
4218 int i;
4219 rtx par;
4220 int shift;
4222 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4223 &ag_mode, &count);
4225 if (!TARGET_NEON)
4227 if (ag_mode == V2SImode)
4228 ag_mode = DImode;
4229 else if (ag_mode == V4SImode)
4231 ag_mode = DImode;
4232 count *= 2;
4235 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4236 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4237 for (i = 0; i < count; i++)
4239 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4240 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4241 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4242 XVECEXP (par, 0, i) = tmp;
4245 return par;
4248 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4251 static void
4252 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4253 enum machine_mode mode ATTRIBUTE_UNUSED,
4254 const_tree type ATTRIBUTE_UNUSED)
4256 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4257 pcum->aapcs_vfp_reg_alloc = 0;
4258 return;
4261 #define AAPCS_CP(X) \
4263 aapcs_ ## X ## _cum_init, \
4264 aapcs_ ## X ## _is_call_candidate, \
4265 aapcs_ ## X ## _allocate, \
4266 aapcs_ ## X ## _is_return_candidate, \
4267 aapcs_ ## X ## _allocate_return_reg, \
4268 aapcs_ ## X ## _advance \
4271 /* Table of co-processors that can be used to pass arguments in
4272 registers. Idealy no arugment should be a candidate for more than
4273 one co-processor table entry, but the table is processed in order
4274 and stops after the first match. If that entry then fails to put
4275 the argument into a co-processor register, the argument will go on
4276 the stack. */
4277 static struct
4279 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4280 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4282 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4283 BLKmode) is a candidate for this co-processor's registers; this
4284 function should ignore any position-dependent state in
4285 CUMULATIVE_ARGS and only use call-type dependent information. */
4286 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4288 /* Return true if the argument does get a co-processor register; it
4289 should set aapcs_reg to an RTX of the register allocated as is
4290 required for a return from FUNCTION_ARG. */
4291 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4293 /* Return true if a result of mode MODE (or type TYPE if MODE is
4294 BLKmode) is can be returned in this co-processor's registers. */
4295 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4297 /* Allocate and return an RTX element to hold the return type of a
4298 call, this routine must not fail and will only be called if
4299 is_return_candidate returned true with the same parameters. */
4300 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4302 /* Finish processing this argument and prepare to start processing
4303 the next one. */
4304 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4305 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4307 AAPCS_CP(vfp)
4310 #undef AAPCS_CP
4312 static int
4313 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4314 const_tree type)
4316 int i;
4318 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4319 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4320 return i;
4322 return -1;
4325 static int
4326 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4328 /* We aren't passed a decl, so we can't check that a call is local.
4329 However, it isn't clear that that would be a win anyway, since it
4330 might limit some tail-calling opportunities. */
4331 enum arm_pcs pcs_variant;
4333 if (fntype)
4335 const_tree fndecl = NULL_TREE;
4337 if (TREE_CODE (fntype) == FUNCTION_DECL)
4339 fndecl = fntype;
4340 fntype = TREE_TYPE (fntype);
4343 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4345 else
4346 pcs_variant = arm_pcs_default;
4348 if (pcs_variant != ARM_PCS_AAPCS)
4350 int i;
4352 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4353 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4354 TYPE_MODE (type),
4355 type))
4356 return i;
4358 return -1;
4361 static rtx
4362 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4363 const_tree fntype)
4365 /* We aren't passed a decl, so we can't check that a call is local.
4366 However, it isn't clear that that would be a win anyway, since it
4367 might limit some tail-calling opportunities. */
4368 enum arm_pcs pcs_variant;
4369 int unsignedp ATTRIBUTE_UNUSED;
4371 if (fntype)
4373 const_tree fndecl = NULL_TREE;
4375 if (TREE_CODE (fntype) == FUNCTION_DECL)
4377 fndecl = fntype;
4378 fntype = TREE_TYPE (fntype);
4381 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4383 else
4384 pcs_variant = arm_pcs_default;
4386 /* Promote integer types. */
4387 if (type && INTEGRAL_TYPE_P (type))
4388 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4390 if (pcs_variant != ARM_PCS_AAPCS)
4392 int i;
4394 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4395 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4396 type))
4397 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4398 mode, type);
4401 /* Promotes small structs returned in a register to full-word size
4402 for big-endian AAPCS. */
4403 if (type && arm_return_in_msb (type))
4405 HOST_WIDE_INT size = int_size_in_bytes (type);
4406 if (size % UNITS_PER_WORD != 0)
4408 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4409 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4413 return gen_rtx_REG (mode, R0_REGNUM);
4417 aapcs_libcall_value (enum machine_mode mode)
4419 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4422 /* Lay out a function argument using the AAPCS rules. The rule
4423 numbers referred to here are those in the AAPCS. */
4424 static void
4425 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4426 const_tree type, bool named)
4428 int nregs, nregs2;
4429 int ncrn;
4431 /* We only need to do this once per argument. */
4432 if (pcum->aapcs_arg_processed)
4433 return;
4435 pcum->aapcs_arg_processed = true;
4437 /* Special case: if named is false then we are handling an incoming
4438 anonymous argument which is on the stack. */
4439 if (!named)
4440 return;
4442 /* Is this a potential co-processor register candidate? */
4443 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4445 int slot = aapcs_select_call_coproc (pcum, mode, type);
4446 pcum->aapcs_cprc_slot = slot;
4448 /* We don't have to apply any of the rules from part B of the
4449 preparation phase, these are handled elsewhere in the
4450 compiler. */
4452 if (slot >= 0)
4454 /* A Co-processor register candidate goes either in its own
4455 class of registers or on the stack. */
4456 if (!pcum->aapcs_cprc_failed[slot])
4458 /* C1.cp - Try to allocate the argument to co-processor
4459 registers. */
4460 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4461 return;
4463 /* C2.cp - Put the argument on the stack and note that we
4464 can't assign any more candidates in this slot. We also
4465 need to note that we have allocated stack space, so that
4466 we won't later try to split a non-cprc candidate between
4467 core registers and the stack. */
4468 pcum->aapcs_cprc_failed[slot] = true;
4469 pcum->can_split = false;
4472 /* We didn't get a register, so this argument goes on the
4473 stack. */
4474 gcc_assert (pcum->can_split == false);
4475 return;
4479 /* C3 - For double-word aligned arguments, round the NCRN up to the
4480 next even number. */
4481 ncrn = pcum->aapcs_ncrn;
4482 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4483 ncrn++;
4485 nregs = ARM_NUM_REGS2(mode, type);
4487 /* Sigh, this test should really assert that nregs > 0, but a GCC
4488 extension allows empty structs and then gives them empty size; it
4489 then allows such a structure to be passed by value. For some of
4490 the code below we have to pretend that such an argument has
4491 non-zero size so that we 'locate' it correctly either in
4492 registers or on the stack. */
4493 gcc_assert (nregs >= 0);
4495 nregs2 = nregs ? nregs : 1;
4497 /* C4 - Argument fits entirely in core registers. */
4498 if (ncrn + nregs2 <= NUM_ARG_REGS)
4500 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4501 pcum->aapcs_next_ncrn = ncrn + nregs;
4502 return;
4505 /* C5 - Some core registers left and there are no arguments already
4506 on the stack: split this argument between the remaining core
4507 registers and the stack. */
4508 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4510 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4511 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4512 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4513 return;
4516 /* C6 - NCRN is set to 4. */
4517 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4519 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4520 return;
4523 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4524 for a call to a function whose data type is FNTYPE.
4525 For a library call, FNTYPE is NULL. */
4526 void
4527 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4528 rtx libname,
4529 tree fndecl ATTRIBUTE_UNUSED)
4531 /* Long call handling. */
4532 if (fntype)
4533 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4534 else
4535 pcum->pcs_variant = arm_pcs_default;
4537 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4539 if (arm_libcall_uses_aapcs_base (libname))
4540 pcum->pcs_variant = ARM_PCS_AAPCS;
4542 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4543 pcum->aapcs_reg = NULL_RTX;
4544 pcum->aapcs_partial = 0;
4545 pcum->aapcs_arg_processed = false;
4546 pcum->aapcs_cprc_slot = -1;
4547 pcum->can_split = true;
4549 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4551 int i;
4553 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4555 pcum->aapcs_cprc_failed[i] = false;
4556 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4559 return;
4562 /* Legacy ABIs */
4564 /* On the ARM, the offset starts at 0. */
4565 pcum->nregs = 0;
4566 pcum->iwmmxt_nregs = 0;
4567 pcum->can_split = true;
4569 /* Varargs vectors are treated the same as long long.
4570 named_count avoids having to change the way arm handles 'named' */
4571 pcum->named_count = 0;
4572 pcum->nargs = 0;
4574 if (TARGET_REALLY_IWMMXT && fntype)
4576 tree fn_arg;
4578 for (fn_arg = TYPE_ARG_TYPES (fntype);
4579 fn_arg;
4580 fn_arg = TREE_CHAIN (fn_arg))
4581 pcum->named_count += 1;
4583 if (! pcum->named_count)
4584 pcum->named_count = INT_MAX;
4589 /* Return true if mode/type need doubleword alignment. */
4590 static bool
4591 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4593 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4594 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4598 /* Determine where to put an argument to a function.
4599 Value is zero to push the argument on the stack,
4600 or a hard register in which to store the argument.
4602 MODE is the argument's machine mode.
4603 TYPE is the data type of the argument (as a tree).
4604 This is null for libcalls where that information may
4605 not be available.
4606 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4607 the preceding args and about the function being called.
4608 NAMED is nonzero if this argument is a named parameter
4609 (otherwise it is an extra parameter matching an ellipsis).
4611 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4612 other arguments are passed on the stack. If (NAMED == 0) (which happens
4613 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4614 defined), say it is passed in the stack (function_prologue will
4615 indeed make it pass in the stack if necessary). */
4617 static rtx
4618 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4619 const_tree type, bool named)
4621 int nregs;
4623 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4624 a call insn (op3 of a call_value insn). */
4625 if (mode == VOIDmode)
4626 return const0_rtx;
4628 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4630 aapcs_layout_arg (pcum, mode, type, named);
4631 return pcum->aapcs_reg;
4634 /* Varargs vectors are treated the same as long long.
4635 named_count avoids having to change the way arm handles 'named' */
4636 if (TARGET_IWMMXT_ABI
4637 && arm_vector_mode_supported_p (mode)
4638 && pcum->named_count > pcum->nargs + 1)
4640 if (pcum->iwmmxt_nregs <= 9)
4641 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4642 else
4644 pcum->can_split = false;
4645 return NULL_RTX;
4649 /* Put doubleword aligned quantities in even register pairs. */
4650 if (pcum->nregs & 1
4651 && ARM_DOUBLEWORD_ALIGN
4652 && arm_needs_doubleword_align (mode, type))
4653 pcum->nregs++;
4655 /* Only allow splitting an arg between regs and memory if all preceding
4656 args were allocated to regs. For args passed by reference we only count
4657 the reference pointer. */
4658 if (pcum->can_split)
4659 nregs = 1;
4660 else
4661 nregs = ARM_NUM_REGS2 (mode, type);
4663 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4664 return NULL_RTX;
4666 return gen_rtx_REG (mode, pcum->nregs);
4669 static unsigned int
4670 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4672 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4673 ? DOUBLEWORD_ALIGNMENT
4674 : PARM_BOUNDARY);
4677 static int
4678 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4679 tree type, bool named)
4681 int nregs = pcum->nregs;
4683 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4685 aapcs_layout_arg (pcum, mode, type, named);
4686 return pcum->aapcs_partial;
4689 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4690 return 0;
4692 if (NUM_ARG_REGS > nregs
4693 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4694 && pcum->can_split)
4695 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4697 return 0;
4700 /* Update the data in PCUM to advance over an argument
4701 of mode MODE and data type TYPE.
4702 (TYPE is null for libcalls where that information may not be available.) */
4704 static void
4705 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4706 const_tree type, bool named)
4708 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4710 aapcs_layout_arg (pcum, mode, type, named);
4712 if (pcum->aapcs_cprc_slot >= 0)
4714 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4715 type);
4716 pcum->aapcs_cprc_slot = -1;
4719 /* Generic stuff. */
4720 pcum->aapcs_arg_processed = false;
4721 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4722 pcum->aapcs_reg = NULL_RTX;
4723 pcum->aapcs_partial = 0;
4725 else
4727 pcum->nargs += 1;
4728 if (arm_vector_mode_supported_p (mode)
4729 && pcum->named_count > pcum->nargs
4730 && TARGET_IWMMXT_ABI)
4731 pcum->iwmmxt_nregs += 1;
4732 else
4733 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4737 /* Variable sized types are passed by reference. This is a GCC
4738 extension to the ARM ABI. */
4740 static bool
4741 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4742 enum machine_mode mode ATTRIBUTE_UNUSED,
4743 const_tree type, bool named ATTRIBUTE_UNUSED)
4745 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4748 /* Encode the current state of the #pragma [no_]long_calls. */
4749 typedef enum
4751 OFF, /* No #pragma [no_]long_calls is in effect. */
4752 LONG, /* #pragma long_calls is in effect. */
4753 SHORT /* #pragma no_long_calls is in effect. */
4754 } arm_pragma_enum;
4756 static arm_pragma_enum arm_pragma_long_calls = OFF;
4758 void
4759 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4761 arm_pragma_long_calls = LONG;
4764 void
4765 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4767 arm_pragma_long_calls = SHORT;
4770 void
4771 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4773 arm_pragma_long_calls = OFF;
4776 /* Handle an attribute requiring a FUNCTION_DECL;
4777 arguments as in struct attribute_spec.handler. */
4778 static tree
4779 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4780 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4782 if (TREE_CODE (*node) != FUNCTION_DECL)
4784 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4785 name);
4786 *no_add_attrs = true;
4789 return NULL_TREE;
4792 /* Handle an "interrupt" or "isr" attribute;
4793 arguments as in struct attribute_spec.handler. */
4794 static tree
4795 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4796 bool *no_add_attrs)
4798 if (DECL_P (*node))
4800 if (TREE_CODE (*node) != FUNCTION_DECL)
4802 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4803 name);
4804 *no_add_attrs = true;
4806 /* FIXME: the argument if any is checked for type attributes;
4807 should it be checked for decl ones? */
4809 else
4811 if (TREE_CODE (*node) == FUNCTION_TYPE
4812 || TREE_CODE (*node) == METHOD_TYPE)
4814 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4816 warning (OPT_Wattributes, "%qE attribute ignored",
4817 name);
4818 *no_add_attrs = true;
4821 else if (TREE_CODE (*node) == POINTER_TYPE
4822 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4823 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4824 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4826 *node = build_variant_type_copy (*node);
4827 TREE_TYPE (*node) = build_type_attribute_variant
4828 (TREE_TYPE (*node),
4829 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4830 *no_add_attrs = true;
4832 else
4834 /* Possibly pass this attribute on from the type to a decl. */
4835 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4836 | (int) ATTR_FLAG_FUNCTION_NEXT
4837 | (int) ATTR_FLAG_ARRAY_NEXT))
4839 *no_add_attrs = true;
4840 return tree_cons (name, args, NULL_TREE);
4842 else
4844 warning (OPT_Wattributes, "%qE attribute ignored",
4845 name);
4850 return NULL_TREE;
4853 /* Handle a "pcs" attribute; arguments as in struct
4854 attribute_spec.handler. */
4855 static tree
4856 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4857 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4859 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4861 warning (OPT_Wattributes, "%qE attribute ignored", name);
4862 *no_add_attrs = true;
4864 return NULL_TREE;
4867 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4868 /* Handle the "notshared" attribute. This attribute is another way of
4869 requesting hidden visibility. ARM's compiler supports
4870 "__declspec(notshared)"; we support the same thing via an
4871 attribute. */
4873 static tree
4874 arm_handle_notshared_attribute (tree *node,
4875 tree name ATTRIBUTE_UNUSED,
4876 tree args ATTRIBUTE_UNUSED,
4877 int flags ATTRIBUTE_UNUSED,
4878 bool *no_add_attrs)
4880 tree decl = TYPE_NAME (*node);
4882 if (decl)
4884 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4885 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4886 *no_add_attrs = false;
4888 return NULL_TREE;
4890 #endif
4892 /* Return 0 if the attributes for two types are incompatible, 1 if they
4893 are compatible, and 2 if they are nearly compatible (which causes a
4894 warning to be generated). */
4895 static int
4896 arm_comp_type_attributes (const_tree type1, const_tree type2)
4898 int l1, l2, s1, s2;
4900 /* Check for mismatch of non-default calling convention. */
4901 if (TREE_CODE (type1) != FUNCTION_TYPE)
4902 return 1;
4904 /* Check for mismatched call attributes. */
4905 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4906 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4907 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4908 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4910 /* Only bother to check if an attribute is defined. */
4911 if (l1 | l2 | s1 | s2)
4913 /* If one type has an attribute, the other must have the same attribute. */
4914 if ((l1 != l2) || (s1 != s2))
4915 return 0;
4917 /* Disallow mixed attributes. */
4918 if ((l1 & s2) || (l2 & s1))
4919 return 0;
4922 /* Check for mismatched ISR attribute. */
4923 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4924 if (! l1)
4925 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4926 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4927 if (! l2)
4928 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4929 if (l1 != l2)
4930 return 0;
4932 return 1;
4935 /* Assigns default attributes to newly defined type. This is used to
4936 set short_call/long_call attributes for function types of
4937 functions defined inside corresponding #pragma scopes. */
4938 static void
4939 arm_set_default_type_attributes (tree type)
4941 /* Add __attribute__ ((long_call)) to all functions, when
4942 inside #pragma long_calls or __attribute__ ((short_call)),
4943 when inside #pragma no_long_calls. */
4944 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4946 tree type_attr_list, attr_name;
4947 type_attr_list = TYPE_ATTRIBUTES (type);
4949 if (arm_pragma_long_calls == LONG)
4950 attr_name = get_identifier ("long_call");
4951 else if (arm_pragma_long_calls == SHORT)
4952 attr_name = get_identifier ("short_call");
4953 else
4954 return;
4956 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4957 TYPE_ATTRIBUTES (type) = type_attr_list;
4961 /* Return true if DECL is known to be linked into section SECTION. */
4963 static bool
4964 arm_function_in_section_p (tree decl, section *section)
4966 /* We can only be certain about functions defined in the same
4967 compilation unit. */
4968 if (!TREE_STATIC (decl))
4969 return false;
4971 /* Make sure that SYMBOL always binds to the definition in this
4972 compilation unit. */
4973 if (!targetm.binds_local_p (decl))
4974 return false;
4976 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4977 if (!DECL_SECTION_NAME (decl))
4979 /* Make sure that we will not create a unique section for DECL. */
4980 if (flag_function_sections || DECL_ONE_ONLY (decl))
4981 return false;
4984 return function_section (decl) == section;
4987 /* Return nonzero if a 32-bit "long_call" should be generated for
4988 a call from the current function to DECL. We generate a long_call
4989 if the function:
4991 a. has an __attribute__((long call))
4992 or b. is within the scope of a #pragma long_calls
4993 or c. the -mlong-calls command line switch has been specified
4995 However we do not generate a long call if the function:
4997 d. has an __attribute__ ((short_call))
4998 or e. is inside the scope of a #pragma no_long_calls
4999 or f. is defined in the same section as the current function. */
5001 bool
5002 arm_is_long_call_p (tree decl)
5004 tree attrs;
5006 if (!decl)
5007 return TARGET_LONG_CALLS;
5009 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5010 if (lookup_attribute ("short_call", attrs))
5011 return false;
5013 /* For "f", be conservative, and only cater for cases in which the
5014 whole of the current function is placed in the same section. */
5015 if (!flag_reorder_blocks_and_partition
5016 && TREE_CODE (decl) == FUNCTION_DECL
5017 && arm_function_in_section_p (decl, current_function_section ()))
5018 return false;
5020 if (lookup_attribute ("long_call", attrs))
5021 return true;
5023 return TARGET_LONG_CALLS;
5026 /* Return nonzero if it is ok to make a tail-call to DECL. */
5027 static bool
5028 arm_function_ok_for_sibcall (tree decl, tree exp)
5030 unsigned long func_type;
5032 if (cfun->machine->sibcall_blocked)
5033 return false;
5035 /* Never tailcall something for which we have no decl, or if we
5036 are generating code for Thumb-1. */
5037 if (decl == NULL || TARGET_THUMB1)
5038 return false;
5040 /* The PIC register is live on entry to VxWorks PLT entries, so we
5041 must make the call before restoring the PIC register. */
5042 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5043 return false;
5045 /* Cannot tail-call to long calls, since these are out of range of
5046 a branch instruction. */
5047 if (arm_is_long_call_p (decl))
5048 return false;
5050 /* If we are interworking and the function is not declared static
5051 then we can't tail-call it unless we know that it exists in this
5052 compilation unit (since it might be a Thumb routine). */
5053 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5054 return false;
5056 func_type = arm_current_func_type ();
5057 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5058 if (IS_INTERRUPT (func_type))
5059 return false;
5061 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5063 /* Check that the return value locations are the same. For
5064 example that we aren't returning a value from the sibling in
5065 a VFP register but then need to transfer it to a core
5066 register. */
5067 rtx a, b;
5069 a = arm_function_value (TREE_TYPE (exp), decl, false);
5070 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5071 cfun->decl, false);
5072 if (!rtx_equal_p (a, b))
5073 return false;
5076 /* Never tailcall if function may be called with a misaligned SP. */
5077 if (IS_STACKALIGN (func_type))
5078 return false;
5080 /* Everything else is ok. */
5081 return true;
5085 /* Addressing mode support functions. */
5087 /* Return nonzero if X is a legitimate immediate operand when compiling
5088 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5090 legitimate_pic_operand_p (rtx x)
5092 if (GET_CODE (x) == SYMBOL_REF
5093 || (GET_CODE (x) == CONST
5094 && GET_CODE (XEXP (x, 0)) == PLUS
5095 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5096 return 0;
5098 return 1;
5101 /* Record that the current function needs a PIC register. Initialize
5102 cfun->machine->pic_reg if we have not already done so. */
5104 static void
5105 require_pic_register (void)
5107 /* A lot of the logic here is made obscure by the fact that this
5108 routine gets called as part of the rtx cost estimation process.
5109 We don't want those calls to affect any assumptions about the real
5110 function; and further, we can't call entry_of_function() until we
5111 start the real expansion process. */
5112 if (!crtl->uses_pic_offset_table)
5114 gcc_assert (can_create_pseudo_p ());
5115 if (arm_pic_register != INVALID_REGNUM)
5117 if (!cfun->machine->pic_reg)
5118 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5120 /* Play games to avoid marking the function as needing pic
5121 if we are being called as part of the cost-estimation
5122 process. */
5123 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5124 crtl->uses_pic_offset_table = 1;
5126 else
5128 rtx seq, insn;
5130 if (!cfun->machine->pic_reg)
5131 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5133 /* Play games to avoid marking the function as needing pic
5134 if we are being called as part of the cost-estimation
5135 process. */
5136 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5138 crtl->uses_pic_offset_table = 1;
5139 start_sequence ();
5141 arm_load_pic_register (0UL);
5143 seq = get_insns ();
5144 end_sequence ();
5146 for (insn = seq; insn; insn = NEXT_INSN (insn))
5147 if (INSN_P (insn))
5148 INSN_LOCATOR (insn) = prologue_locator;
5150 /* We can be called during expansion of PHI nodes, where
5151 we can't yet emit instructions directly in the final
5152 insn stream. Queue the insns on the entry edge, they will
5153 be committed after everything else is expanded. */
5154 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5161 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5163 if (GET_CODE (orig) == SYMBOL_REF
5164 || GET_CODE (orig) == LABEL_REF)
5166 rtx insn;
5168 if (reg == 0)
5170 gcc_assert (can_create_pseudo_p ());
5171 reg = gen_reg_rtx (Pmode);
5174 /* VxWorks does not impose a fixed gap between segments; the run-time
5175 gap can be different from the object-file gap. We therefore can't
5176 use GOTOFF unless we are absolutely sure that the symbol is in the
5177 same segment as the GOT. Unfortunately, the flexibility of linker
5178 scripts means that we can't be sure of that in general, so assume
5179 that GOTOFF is never valid on VxWorks. */
5180 if ((GET_CODE (orig) == LABEL_REF
5181 || (GET_CODE (orig) == SYMBOL_REF &&
5182 SYMBOL_REF_LOCAL_P (orig)))
5183 && NEED_GOT_RELOC
5184 && !TARGET_VXWORKS_RTP)
5185 insn = arm_pic_static_addr (orig, reg);
5186 else
5188 rtx pat;
5189 rtx mem;
5191 /* If this function doesn't have a pic register, create one now. */
5192 require_pic_register ();
5194 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5196 /* Make the MEM as close to a constant as possible. */
5197 mem = SET_SRC (pat);
5198 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5199 MEM_READONLY_P (mem) = 1;
5200 MEM_NOTRAP_P (mem) = 1;
5202 insn = emit_insn (pat);
5205 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5206 by loop. */
5207 set_unique_reg_note (insn, REG_EQUAL, orig);
5209 return reg;
5211 else if (GET_CODE (orig) == CONST)
5213 rtx base, offset;
5215 if (GET_CODE (XEXP (orig, 0)) == PLUS
5216 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5217 return orig;
5219 /* Handle the case where we have: const (UNSPEC_TLS). */
5220 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5221 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5222 return orig;
5224 /* Handle the case where we have:
5225 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5226 CONST_INT. */
5227 if (GET_CODE (XEXP (orig, 0)) == PLUS
5228 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5229 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5231 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5232 return orig;
5235 if (reg == 0)
5237 gcc_assert (can_create_pseudo_p ());
5238 reg = gen_reg_rtx (Pmode);
5241 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5243 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5244 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5245 base == reg ? 0 : reg);
5247 if (GET_CODE (offset) == CONST_INT)
5249 /* The base register doesn't really matter, we only want to
5250 test the index for the appropriate mode. */
5251 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5253 gcc_assert (can_create_pseudo_p ());
5254 offset = force_reg (Pmode, offset);
5257 if (GET_CODE (offset) == CONST_INT)
5258 return plus_constant (base, INTVAL (offset));
5261 if (GET_MODE_SIZE (mode) > 4
5262 && (GET_MODE_CLASS (mode) == MODE_INT
5263 || TARGET_SOFT_FLOAT))
5265 emit_insn (gen_addsi3 (reg, base, offset));
5266 return reg;
5269 return gen_rtx_PLUS (Pmode, base, offset);
5272 return orig;
5276 /* Find a spare register to use during the prolog of a function. */
5278 static int
5279 thumb_find_work_register (unsigned long pushed_regs_mask)
5281 int reg;
5283 /* Check the argument registers first as these are call-used. The
5284 register allocation order means that sometimes r3 might be used
5285 but earlier argument registers might not, so check them all. */
5286 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5287 if (!df_regs_ever_live_p (reg))
5288 return reg;
5290 /* Before going on to check the call-saved registers we can try a couple
5291 more ways of deducing that r3 is available. The first is when we are
5292 pushing anonymous arguments onto the stack and we have less than 4
5293 registers worth of fixed arguments(*). In this case r3 will be part of
5294 the variable argument list and so we can be sure that it will be
5295 pushed right at the start of the function. Hence it will be available
5296 for the rest of the prologue.
5297 (*): ie crtl->args.pretend_args_size is greater than 0. */
5298 if (cfun->machine->uses_anonymous_args
5299 && crtl->args.pretend_args_size > 0)
5300 return LAST_ARG_REGNUM;
5302 /* The other case is when we have fixed arguments but less than 4 registers
5303 worth. In this case r3 might be used in the body of the function, but
5304 it is not being used to convey an argument into the function. In theory
5305 we could just check crtl->args.size to see how many bytes are
5306 being passed in argument registers, but it seems that it is unreliable.
5307 Sometimes it will have the value 0 when in fact arguments are being
5308 passed. (See testcase execute/20021111-1.c for an example). So we also
5309 check the args_info.nregs field as well. The problem with this field is
5310 that it makes no allowances for arguments that are passed to the
5311 function but which are not used. Hence we could miss an opportunity
5312 when a function has an unused argument in r3. But it is better to be
5313 safe than to be sorry. */
5314 if (! cfun->machine->uses_anonymous_args
5315 && crtl->args.size >= 0
5316 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5317 && crtl->args.info.nregs < 4)
5318 return LAST_ARG_REGNUM;
5320 /* Otherwise look for a call-saved register that is going to be pushed. */
5321 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5322 if (pushed_regs_mask & (1 << reg))
5323 return reg;
5325 if (TARGET_THUMB2)
5327 /* Thumb-2 can use high regs. */
5328 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5329 if (pushed_regs_mask & (1 << reg))
5330 return reg;
5332 /* Something went wrong - thumb_compute_save_reg_mask()
5333 should have arranged for a suitable register to be pushed. */
5334 gcc_unreachable ();
5337 static GTY(()) int pic_labelno;
5339 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5340 low register. */
5342 void
5343 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5345 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5347 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5348 return;
5350 gcc_assert (flag_pic);
5352 pic_reg = cfun->machine->pic_reg;
5353 if (TARGET_VXWORKS_RTP)
5355 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5356 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5357 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5359 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5361 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5362 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5364 else
5366 /* We use an UNSPEC rather than a LABEL_REF because this label
5367 never appears in the code stream. */
5369 labelno = GEN_INT (pic_labelno++);
5370 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5371 l1 = gen_rtx_CONST (VOIDmode, l1);
5373 /* On the ARM the PC register contains 'dot + 8' at the time of the
5374 addition, on the Thumb it is 'dot + 4'. */
5375 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5376 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5377 UNSPEC_GOTSYM_OFF);
5378 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5380 if (TARGET_32BIT)
5382 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5383 if (TARGET_ARM)
5384 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5385 else
5386 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5388 else /* TARGET_THUMB1 */
5390 if (arm_pic_register != INVALID_REGNUM
5391 && REGNO (pic_reg) > LAST_LO_REGNUM)
5393 /* We will have pushed the pic register, so we should always be
5394 able to find a work register. */
5395 pic_tmp = gen_rtx_REG (SImode,
5396 thumb_find_work_register (saved_regs));
5397 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5398 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5400 else
5401 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5402 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5406 /* Need to emit this whether or not we obey regdecls,
5407 since setjmp/longjmp can cause life info to screw up. */
5408 emit_use (pic_reg);
5411 /* Generate code to load the address of a static var when flag_pic is set. */
5412 static rtx
5413 arm_pic_static_addr (rtx orig, rtx reg)
5415 rtx l1, labelno, offset_rtx, insn;
5417 gcc_assert (flag_pic);
5419 /* We use an UNSPEC rather than a LABEL_REF because this label
5420 never appears in the code stream. */
5421 labelno = GEN_INT (pic_labelno++);
5422 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5423 l1 = gen_rtx_CONST (VOIDmode, l1);
5425 /* On the ARM the PC register contains 'dot + 8' at the time of the
5426 addition, on the Thumb it is 'dot + 4'. */
5427 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5428 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5429 UNSPEC_SYMBOL_OFFSET);
5430 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5432 if (TARGET_32BIT)
5434 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5435 if (TARGET_ARM)
5436 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5437 else
5438 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5440 else /* TARGET_THUMB1 */
5442 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5443 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5446 return insn;
5449 /* Return nonzero if X is valid as an ARM state addressing register. */
5450 static int
5451 arm_address_register_rtx_p (rtx x, int strict_p)
5453 int regno;
5455 if (GET_CODE (x) != REG)
5456 return 0;
5458 regno = REGNO (x);
5460 if (strict_p)
5461 return ARM_REGNO_OK_FOR_BASE_P (regno);
5463 return (regno <= LAST_ARM_REGNUM
5464 || regno >= FIRST_PSEUDO_REGISTER
5465 || regno == FRAME_POINTER_REGNUM
5466 || regno == ARG_POINTER_REGNUM);
5469 /* Return TRUE if this rtx is the difference of a symbol and a label,
5470 and will reduce to a PC-relative relocation in the object file.
5471 Expressions like this can be left alone when generating PIC, rather
5472 than forced through the GOT. */
5473 static int
5474 pcrel_constant_p (rtx x)
5476 if (GET_CODE (x) == MINUS)
5477 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5479 return FALSE;
5482 /* Return true if X will surely end up in an index register after next
5483 splitting pass. */
5484 static bool
5485 will_be_in_index_register (const_rtx x)
5487 /* arm.md: calculate_pic_address will split this into a register. */
5488 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5491 /* Return nonzero if X is a valid ARM state address operand. */
5493 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5494 int strict_p)
5496 bool use_ldrd;
5497 enum rtx_code code = GET_CODE (x);
5499 if (arm_address_register_rtx_p (x, strict_p))
5500 return 1;
5502 use_ldrd = (TARGET_LDRD
5503 && (mode == DImode
5504 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5506 if (code == POST_INC || code == PRE_DEC
5507 || ((code == PRE_INC || code == POST_DEC)
5508 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5509 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5511 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5512 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5513 && GET_CODE (XEXP (x, 1)) == PLUS
5514 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5516 rtx addend = XEXP (XEXP (x, 1), 1);
5518 /* Don't allow ldrd post increment by register because it's hard
5519 to fixup invalid register choices. */
5520 if (use_ldrd
5521 && GET_CODE (x) == POST_MODIFY
5522 && GET_CODE (addend) == REG)
5523 return 0;
5525 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5526 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5529 /* After reload constants split into minipools will have addresses
5530 from a LABEL_REF. */
5531 else if (reload_completed
5532 && (code == LABEL_REF
5533 || (code == CONST
5534 && GET_CODE (XEXP (x, 0)) == PLUS
5535 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5536 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5537 return 1;
5539 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5540 return 0;
5542 else if (code == PLUS)
5544 rtx xop0 = XEXP (x, 0);
5545 rtx xop1 = XEXP (x, 1);
5547 return ((arm_address_register_rtx_p (xop0, strict_p)
5548 && ((GET_CODE(xop1) == CONST_INT
5549 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5550 || (!strict_p && will_be_in_index_register (xop1))))
5551 || (arm_address_register_rtx_p (xop1, strict_p)
5552 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5555 #if 0
5556 /* Reload currently can't handle MINUS, so disable this for now */
5557 else if (GET_CODE (x) == MINUS)
5559 rtx xop0 = XEXP (x, 0);
5560 rtx xop1 = XEXP (x, 1);
5562 return (arm_address_register_rtx_p (xop0, strict_p)
5563 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5565 #endif
5567 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5568 && code == SYMBOL_REF
5569 && CONSTANT_POOL_ADDRESS_P (x)
5570 && ! (flag_pic
5571 && symbol_mentioned_p (get_pool_constant (x))
5572 && ! pcrel_constant_p (get_pool_constant (x))))
5573 return 1;
5575 return 0;
5578 /* Return nonzero if X is a valid Thumb-2 address operand. */
5579 static int
5580 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5582 bool use_ldrd;
5583 enum rtx_code code = GET_CODE (x);
5585 if (arm_address_register_rtx_p (x, strict_p))
5586 return 1;
5588 use_ldrd = (TARGET_LDRD
5589 && (mode == DImode
5590 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5592 if (code == POST_INC || code == PRE_DEC
5593 || ((code == PRE_INC || code == POST_DEC)
5594 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5595 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5597 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5598 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5599 && GET_CODE (XEXP (x, 1)) == PLUS
5600 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5602 /* Thumb-2 only has autoincrement by constant. */
5603 rtx addend = XEXP (XEXP (x, 1), 1);
5604 HOST_WIDE_INT offset;
5606 if (GET_CODE (addend) != CONST_INT)
5607 return 0;
5609 offset = INTVAL(addend);
5610 if (GET_MODE_SIZE (mode) <= 4)
5611 return (offset > -256 && offset < 256);
5613 return (use_ldrd && offset > -1024 && offset < 1024
5614 && (offset & 3) == 0);
5617 /* After reload constants split into minipools will have addresses
5618 from a LABEL_REF. */
5619 else if (reload_completed
5620 && (code == LABEL_REF
5621 || (code == CONST
5622 && GET_CODE (XEXP (x, 0)) == PLUS
5623 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5624 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5625 return 1;
5627 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5628 return 0;
5630 else if (code == PLUS)
5632 rtx xop0 = XEXP (x, 0);
5633 rtx xop1 = XEXP (x, 1);
5635 return ((arm_address_register_rtx_p (xop0, strict_p)
5636 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5637 || (!strict_p && will_be_in_index_register (xop1))))
5638 || (arm_address_register_rtx_p (xop1, strict_p)
5639 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5642 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5643 && code == SYMBOL_REF
5644 && CONSTANT_POOL_ADDRESS_P (x)
5645 && ! (flag_pic
5646 && symbol_mentioned_p (get_pool_constant (x))
5647 && ! pcrel_constant_p (get_pool_constant (x))))
5648 return 1;
5650 return 0;
5653 /* Return nonzero if INDEX is valid for an address index operand in
5654 ARM state. */
5655 static int
5656 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5657 int strict_p)
5659 HOST_WIDE_INT range;
5660 enum rtx_code code = GET_CODE (index);
5662 /* Standard coprocessor addressing modes. */
5663 if (TARGET_HARD_FLOAT
5664 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5665 && (mode == SFmode || mode == DFmode
5666 || (TARGET_MAVERICK && mode == DImode)))
5667 return (code == CONST_INT && INTVAL (index) < 1024
5668 && INTVAL (index) > -1024
5669 && (INTVAL (index) & 3) == 0);
5671 /* For quad modes, we restrict the constant offset to be slightly less
5672 than what the instruction format permits. We do this because for
5673 quad mode moves, we will actually decompose them into two separate
5674 double-mode reads or writes. INDEX must therefore be a valid
5675 (double-mode) offset and so should INDEX+8. */
5676 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5677 return (code == CONST_INT
5678 && INTVAL (index) < 1016
5679 && INTVAL (index) > -1024
5680 && (INTVAL (index) & 3) == 0);
5682 /* We have no such constraint on double mode offsets, so we permit the
5683 full range of the instruction format. */
5684 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5685 return (code == CONST_INT
5686 && INTVAL (index) < 1024
5687 && INTVAL (index) > -1024
5688 && (INTVAL (index) & 3) == 0);
5690 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5691 return (code == CONST_INT
5692 && INTVAL (index) < 1024
5693 && INTVAL (index) > -1024
5694 && (INTVAL (index) & 3) == 0);
5696 if (arm_address_register_rtx_p (index, strict_p)
5697 && (GET_MODE_SIZE (mode) <= 4))
5698 return 1;
5700 if (mode == DImode || mode == DFmode)
5702 if (code == CONST_INT)
5704 HOST_WIDE_INT val = INTVAL (index);
5706 if (TARGET_LDRD)
5707 return val > -256 && val < 256;
5708 else
5709 return val > -4096 && val < 4092;
5712 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5715 if (GET_MODE_SIZE (mode) <= 4
5716 && ! (arm_arch4
5717 && (mode == HImode
5718 || mode == HFmode
5719 || (mode == QImode && outer == SIGN_EXTEND))))
5721 if (code == MULT)
5723 rtx xiop0 = XEXP (index, 0);
5724 rtx xiop1 = XEXP (index, 1);
5726 return ((arm_address_register_rtx_p (xiop0, strict_p)
5727 && power_of_two_operand (xiop1, SImode))
5728 || (arm_address_register_rtx_p (xiop1, strict_p)
5729 && power_of_two_operand (xiop0, SImode)));
5731 else if (code == LSHIFTRT || code == ASHIFTRT
5732 || code == ASHIFT || code == ROTATERT)
5734 rtx op = XEXP (index, 1);
5736 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5737 && GET_CODE (op) == CONST_INT
5738 && INTVAL (op) > 0
5739 && INTVAL (op) <= 31);
5743 /* For ARM v4 we may be doing a sign-extend operation during the
5744 load. */
5745 if (arm_arch4)
5747 if (mode == HImode
5748 || mode == HFmode
5749 || (outer == SIGN_EXTEND && mode == QImode))
5750 range = 256;
5751 else
5752 range = 4096;
5754 else
5755 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5757 return (code == CONST_INT
5758 && INTVAL (index) < range
5759 && INTVAL (index) > -range);
5762 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5763 index operand. i.e. 1, 2, 4 or 8. */
5764 static bool
5765 thumb2_index_mul_operand (rtx op)
5767 HOST_WIDE_INT val;
5769 if (GET_CODE(op) != CONST_INT)
5770 return false;
5772 val = INTVAL(op);
5773 return (val == 1 || val == 2 || val == 4 || val == 8);
5776 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5777 static int
5778 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5780 enum rtx_code code = GET_CODE (index);
5782 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5783 /* Standard coprocessor addressing modes. */
5784 if (TARGET_HARD_FLOAT
5785 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5786 && (mode == SFmode || mode == DFmode
5787 || (TARGET_MAVERICK && mode == DImode)))
5788 return (code == CONST_INT && INTVAL (index) < 1024
5789 /* Thumb-2 allows only > -256 index range for it's core register
5790 load/stores. Since we allow SF/DF in core registers, we have
5791 to use the intersection between -256~4096 (core) and -1024~1024
5792 (coprocessor). */
5793 && INTVAL (index) > -256
5794 && (INTVAL (index) & 3) == 0);
5796 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5798 /* For DImode assume values will usually live in core regs
5799 and only allow LDRD addressing modes. */
5800 if (!TARGET_LDRD || mode != DImode)
5801 return (code == CONST_INT
5802 && INTVAL (index) < 1024
5803 && INTVAL (index) > -1024
5804 && (INTVAL (index) & 3) == 0);
5807 /* For quad modes, we restrict the constant offset to be slightly less
5808 than what the instruction format permits. We do this because for
5809 quad mode moves, we will actually decompose them into two separate
5810 double-mode reads or writes. INDEX must therefore be a valid
5811 (double-mode) offset and so should INDEX+8. */
5812 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5813 return (code == CONST_INT
5814 && INTVAL (index) < 1016
5815 && INTVAL (index) > -1024
5816 && (INTVAL (index) & 3) == 0);
5818 /* We have no such constraint on double mode offsets, so we permit the
5819 full range of the instruction format. */
5820 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5821 return (code == CONST_INT
5822 && INTVAL (index) < 1024
5823 && INTVAL (index) > -1024
5824 && (INTVAL (index) & 3) == 0);
5826 if (arm_address_register_rtx_p (index, strict_p)
5827 && (GET_MODE_SIZE (mode) <= 4))
5828 return 1;
5830 if (mode == DImode || mode == DFmode)
5832 if (code == CONST_INT)
5834 HOST_WIDE_INT val = INTVAL (index);
5835 /* ??? Can we assume ldrd for thumb2? */
5836 /* Thumb-2 ldrd only has reg+const addressing modes. */
5837 /* ldrd supports offsets of +-1020.
5838 However the ldr fallback does not. */
5839 return val > -256 && val < 256 && (val & 3) == 0;
5841 else
5842 return 0;
5845 if (code == MULT)
5847 rtx xiop0 = XEXP (index, 0);
5848 rtx xiop1 = XEXP (index, 1);
5850 return ((arm_address_register_rtx_p (xiop0, strict_p)
5851 && thumb2_index_mul_operand (xiop1))
5852 || (arm_address_register_rtx_p (xiop1, strict_p)
5853 && thumb2_index_mul_operand (xiop0)));
5855 else if (code == ASHIFT)
5857 rtx op = XEXP (index, 1);
5859 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5860 && GET_CODE (op) == CONST_INT
5861 && INTVAL (op) > 0
5862 && INTVAL (op) <= 3);
5865 return (code == CONST_INT
5866 && INTVAL (index) < 4096
5867 && INTVAL (index) > -256);
5870 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5871 static int
5872 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5874 int regno;
5876 if (GET_CODE (x) != REG)
5877 return 0;
5879 regno = REGNO (x);
5881 if (strict_p)
5882 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5884 return (regno <= LAST_LO_REGNUM
5885 || regno > LAST_VIRTUAL_REGISTER
5886 || regno == FRAME_POINTER_REGNUM
5887 || (GET_MODE_SIZE (mode) >= 4
5888 && (regno == STACK_POINTER_REGNUM
5889 || regno >= FIRST_PSEUDO_REGISTER
5890 || x == hard_frame_pointer_rtx
5891 || x == arg_pointer_rtx)));
5894 /* Return nonzero if x is a legitimate index register. This is the case
5895 for any base register that can access a QImode object. */
5896 inline static int
5897 thumb1_index_register_rtx_p (rtx x, int strict_p)
5899 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5902 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5904 The AP may be eliminated to either the SP or the FP, so we use the
5905 least common denominator, e.g. SImode, and offsets from 0 to 64.
5907 ??? Verify whether the above is the right approach.
5909 ??? Also, the FP may be eliminated to the SP, so perhaps that
5910 needs special handling also.
5912 ??? Look at how the mips16 port solves this problem. It probably uses
5913 better ways to solve some of these problems.
5915 Although it is not incorrect, we don't accept QImode and HImode
5916 addresses based on the frame pointer or arg pointer until the
5917 reload pass starts. This is so that eliminating such addresses
5918 into stack based ones won't produce impossible code. */
5919 static int
5920 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5922 /* ??? Not clear if this is right. Experiment. */
5923 if (GET_MODE_SIZE (mode) < 4
5924 && !(reload_in_progress || reload_completed)
5925 && (reg_mentioned_p (frame_pointer_rtx, x)
5926 || reg_mentioned_p (arg_pointer_rtx, x)
5927 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5928 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5929 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5930 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5931 return 0;
5933 /* Accept any base register. SP only in SImode or larger. */
5934 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5935 return 1;
5937 /* This is PC relative data before arm_reorg runs. */
5938 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5939 && GET_CODE (x) == SYMBOL_REF
5940 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5941 return 1;
5943 /* This is PC relative data after arm_reorg runs. */
5944 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5945 && reload_completed
5946 && (GET_CODE (x) == LABEL_REF
5947 || (GET_CODE (x) == CONST
5948 && GET_CODE (XEXP (x, 0)) == PLUS
5949 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5950 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5951 return 1;
5953 /* Post-inc indexing only supported for SImode and larger. */
5954 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5955 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5956 return 1;
5958 else if (GET_CODE (x) == PLUS)
5960 /* REG+REG address can be any two index registers. */
5961 /* We disallow FRAME+REG addressing since we know that FRAME
5962 will be replaced with STACK, and SP relative addressing only
5963 permits SP+OFFSET. */
5964 if (GET_MODE_SIZE (mode) <= 4
5965 && XEXP (x, 0) != frame_pointer_rtx
5966 && XEXP (x, 1) != frame_pointer_rtx
5967 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5968 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5969 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5970 return 1;
5972 /* REG+const has 5-7 bit offset for non-SP registers. */
5973 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5974 || XEXP (x, 0) == arg_pointer_rtx)
5975 && GET_CODE (XEXP (x, 1)) == CONST_INT
5976 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5977 return 1;
5979 /* REG+const has 10-bit offset for SP, but only SImode and
5980 larger is supported. */
5981 /* ??? Should probably check for DI/DFmode overflow here
5982 just like GO_IF_LEGITIMATE_OFFSET does. */
5983 else if (GET_CODE (XEXP (x, 0)) == REG
5984 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5985 && GET_MODE_SIZE (mode) >= 4
5986 && GET_CODE (XEXP (x, 1)) == CONST_INT
5987 && INTVAL (XEXP (x, 1)) >= 0
5988 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5989 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5990 return 1;
5992 else if (GET_CODE (XEXP (x, 0)) == REG
5993 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5994 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5995 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5996 && REGNO (XEXP (x, 0))
5997 <= LAST_VIRTUAL_POINTER_REGISTER))
5998 && GET_MODE_SIZE (mode) >= 4
5999 && GET_CODE (XEXP (x, 1)) == CONST_INT
6000 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6001 return 1;
6004 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6005 && GET_MODE_SIZE (mode) == 4
6006 && GET_CODE (x) == SYMBOL_REF
6007 && CONSTANT_POOL_ADDRESS_P (x)
6008 && ! (flag_pic
6009 && symbol_mentioned_p (get_pool_constant (x))
6010 && ! pcrel_constant_p (get_pool_constant (x))))
6011 return 1;
6013 return 0;
6016 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6017 instruction of mode MODE. */
6019 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6021 switch (GET_MODE_SIZE (mode))
6023 case 1:
6024 return val >= 0 && val < 32;
6026 case 2:
6027 return val >= 0 && val < 64 && (val & 1) == 0;
6029 default:
6030 return (val >= 0
6031 && (val + GET_MODE_SIZE (mode)) <= 128
6032 && (val & 3) == 0);
6036 bool
6037 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6039 if (TARGET_ARM)
6040 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6041 else if (TARGET_THUMB2)
6042 return thumb2_legitimate_address_p (mode, x, strict_p);
6043 else /* if (TARGET_THUMB1) */
6044 return thumb1_legitimate_address_p (mode, x, strict_p);
6047 /* Build the SYMBOL_REF for __tls_get_addr. */
6049 static GTY(()) rtx tls_get_addr_libfunc;
6051 static rtx
6052 get_tls_get_addr (void)
6054 if (!tls_get_addr_libfunc)
6055 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6056 return tls_get_addr_libfunc;
6059 static rtx
6060 arm_load_tp (rtx target)
6062 if (!target)
6063 target = gen_reg_rtx (SImode);
6065 if (TARGET_HARD_TP)
6067 /* Can return in any reg. */
6068 emit_insn (gen_load_tp_hard (target));
6070 else
6072 /* Always returned in r0. Immediately copy the result into a pseudo,
6073 otherwise other uses of r0 (e.g. setting up function arguments) may
6074 clobber the value. */
6076 rtx tmp;
6078 emit_insn (gen_load_tp_soft ());
6080 tmp = gen_rtx_REG (SImode, 0);
6081 emit_move_insn (target, tmp);
6083 return target;
6086 static rtx
6087 load_tls_operand (rtx x, rtx reg)
6089 rtx tmp;
6091 if (reg == NULL_RTX)
6092 reg = gen_reg_rtx (SImode);
6094 tmp = gen_rtx_CONST (SImode, x);
6096 emit_move_insn (reg, tmp);
6098 return reg;
6101 static rtx
6102 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6104 rtx insns, label, labelno, sum;
6106 start_sequence ();
6108 labelno = GEN_INT (pic_labelno++);
6109 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6110 label = gen_rtx_CONST (VOIDmode, label);
6112 sum = gen_rtx_UNSPEC (Pmode,
6113 gen_rtvec (4, x, GEN_INT (reloc), label,
6114 GEN_INT (TARGET_ARM ? 8 : 4)),
6115 UNSPEC_TLS);
6116 reg = load_tls_operand (sum, reg);
6118 if (TARGET_ARM)
6119 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6120 else if (TARGET_THUMB2)
6121 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6122 else /* TARGET_THUMB1 */
6123 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6125 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6126 Pmode, 1, reg, Pmode);
6128 insns = get_insns ();
6129 end_sequence ();
6131 return insns;
6135 legitimize_tls_address (rtx x, rtx reg)
6137 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6138 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6140 switch (model)
6142 case TLS_MODEL_GLOBAL_DYNAMIC:
6143 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6144 dest = gen_reg_rtx (Pmode);
6145 emit_libcall_block (insns, dest, ret, x);
6146 return dest;
6148 case TLS_MODEL_LOCAL_DYNAMIC:
6149 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6151 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6152 share the LDM result with other LD model accesses. */
6153 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6154 UNSPEC_TLS);
6155 dest = gen_reg_rtx (Pmode);
6156 emit_libcall_block (insns, dest, ret, eqv);
6158 /* Load the addend. */
6159 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6160 UNSPEC_TLS);
6161 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6162 return gen_rtx_PLUS (Pmode, dest, addend);
6164 case TLS_MODEL_INITIAL_EXEC:
6165 labelno = GEN_INT (pic_labelno++);
6166 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6167 label = gen_rtx_CONST (VOIDmode, label);
6168 sum = gen_rtx_UNSPEC (Pmode,
6169 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6170 GEN_INT (TARGET_ARM ? 8 : 4)),
6171 UNSPEC_TLS);
6172 reg = load_tls_operand (sum, reg);
6174 if (TARGET_ARM)
6175 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6176 else if (TARGET_THUMB2)
6177 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6178 else
6180 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6181 emit_move_insn (reg, gen_const_mem (SImode, reg));
6184 tp = arm_load_tp (NULL_RTX);
6186 return gen_rtx_PLUS (Pmode, tp, reg);
6188 case TLS_MODEL_LOCAL_EXEC:
6189 tp = arm_load_tp (NULL_RTX);
6191 reg = gen_rtx_UNSPEC (Pmode,
6192 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6193 UNSPEC_TLS);
6194 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6196 return gen_rtx_PLUS (Pmode, tp, reg);
6198 default:
6199 abort ();
6203 /* Try machine-dependent ways of modifying an illegitimate address
6204 to be legitimate. If we find one, return the new, valid address. */
6206 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6208 if (!TARGET_ARM)
6210 /* TODO: legitimize_address for Thumb2. */
6211 if (TARGET_THUMB2)
6212 return x;
6213 return thumb_legitimize_address (x, orig_x, mode);
6216 if (arm_tls_symbol_p (x))
6217 return legitimize_tls_address (x, NULL_RTX);
6219 if (GET_CODE (x) == PLUS)
6221 rtx xop0 = XEXP (x, 0);
6222 rtx xop1 = XEXP (x, 1);
6224 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6225 xop0 = force_reg (SImode, xop0);
6227 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6228 xop1 = force_reg (SImode, xop1);
6230 if (ARM_BASE_REGISTER_RTX_P (xop0)
6231 && GET_CODE (xop1) == CONST_INT)
6233 HOST_WIDE_INT n, low_n;
6234 rtx base_reg, val;
6235 n = INTVAL (xop1);
6237 /* VFP addressing modes actually allow greater offsets, but for
6238 now we just stick with the lowest common denominator. */
6239 if (mode == DImode
6240 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6242 low_n = n & 0x0f;
6243 n &= ~0x0f;
6244 if (low_n > 4)
6246 n += 16;
6247 low_n -= 16;
6250 else
6252 low_n = ((mode) == TImode ? 0
6253 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6254 n -= low_n;
6257 base_reg = gen_reg_rtx (SImode);
6258 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6259 emit_move_insn (base_reg, val);
6260 x = plus_constant (base_reg, low_n);
6262 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6263 x = gen_rtx_PLUS (SImode, xop0, xop1);
6266 /* XXX We don't allow MINUS any more -- see comment in
6267 arm_legitimate_address_outer_p (). */
6268 else if (GET_CODE (x) == MINUS)
6270 rtx xop0 = XEXP (x, 0);
6271 rtx xop1 = XEXP (x, 1);
6273 if (CONSTANT_P (xop0))
6274 xop0 = force_reg (SImode, xop0);
6276 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6277 xop1 = force_reg (SImode, xop1);
6279 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6280 x = gen_rtx_MINUS (SImode, xop0, xop1);
6283 /* Make sure to take full advantage of the pre-indexed addressing mode
6284 with absolute addresses which often allows for the base register to
6285 be factorized for multiple adjacent memory references, and it might
6286 even allows for the mini pool to be avoided entirely. */
6287 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6289 unsigned int bits;
6290 HOST_WIDE_INT mask, base, index;
6291 rtx base_reg;
6293 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6294 use a 8-bit index. So let's use a 12-bit index for SImode only and
6295 hope that arm_gen_constant will enable ldrb to use more bits. */
6296 bits = (mode == SImode) ? 12 : 8;
6297 mask = (1 << bits) - 1;
6298 base = INTVAL (x) & ~mask;
6299 index = INTVAL (x) & mask;
6300 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6302 /* It'll most probably be more efficient to generate the base
6303 with more bits set and use a negative index instead. */
6304 base |= mask;
6305 index -= mask;
6307 base_reg = force_reg (SImode, GEN_INT (base));
6308 x = plus_constant (base_reg, index);
6311 if (flag_pic)
6313 /* We need to find and carefully transform any SYMBOL and LABEL
6314 references; so go back to the original address expression. */
6315 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6317 if (new_x != orig_x)
6318 x = new_x;
6321 return x;
6325 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6326 to be legitimate. If we find one, return the new, valid address. */
6328 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6330 if (arm_tls_symbol_p (x))
6331 return legitimize_tls_address (x, NULL_RTX);
6333 if (GET_CODE (x) == PLUS
6334 && GET_CODE (XEXP (x, 1)) == CONST_INT
6335 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6336 || INTVAL (XEXP (x, 1)) < 0))
6338 rtx xop0 = XEXP (x, 0);
6339 rtx xop1 = XEXP (x, 1);
6340 HOST_WIDE_INT offset = INTVAL (xop1);
6342 /* Try and fold the offset into a biasing of the base register and
6343 then offsetting that. Don't do this when optimizing for space
6344 since it can cause too many CSEs. */
6345 if (optimize_size && offset >= 0
6346 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6348 HOST_WIDE_INT delta;
6350 if (offset >= 256)
6351 delta = offset - (256 - GET_MODE_SIZE (mode));
6352 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6353 delta = 31 * GET_MODE_SIZE (mode);
6354 else
6355 delta = offset & (~31 * GET_MODE_SIZE (mode));
6357 xop0 = force_operand (plus_constant (xop0, offset - delta),
6358 NULL_RTX);
6359 x = plus_constant (xop0, delta);
6361 else if (offset < 0 && offset > -256)
6362 /* Small negative offsets are best done with a subtract before the
6363 dereference, forcing these into a register normally takes two
6364 instructions. */
6365 x = force_operand (x, NULL_RTX);
6366 else
6368 /* For the remaining cases, force the constant into a register. */
6369 xop1 = force_reg (SImode, xop1);
6370 x = gen_rtx_PLUS (SImode, xop0, xop1);
6373 else if (GET_CODE (x) == PLUS
6374 && s_register_operand (XEXP (x, 1), SImode)
6375 && !s_register_operand (XEXP (x, 0), SImode))
6377 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6379 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6382 if (flag_pic)
6384 /* We need to find and carefully transform any SYMBOL and LABEL
6385 references; so go back to the original address expression. */
6386 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6388 if (new_x != orig_x)
6389 x = new_x;
6392 return x;
6396 thumb_legitimize_reload_address (rtx *x_p,
6397 enum machine_mode mode,
6398 int opnum, int type,
6399 int ind_levels ATTRIBUTE_UNUSED)
6401 rtx x = *x_p;
6403 if (GET_CODE (x) == PLUS
6404 && GET_MODE_SIZE (mode) < 4
6405 && REG_P (XEXP (x, 0))
6406 && XEXP (x, 0) == stack_pointer_rtx
6407 && GET_CODE (XEXP (x, 1)) == CONST_INT
6408 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6410 rtx orig_x = x;
6412 x = copy_rtx (x);
6413 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6414 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6415 return x;
6418 /* If both registers are hi-regs, then it's better to reload the
6419 entire expression rather than each register individually. That
6420 only requires one reload register rather than two. */
6421 if (GET_CODE (x) == PLUS
6422 && REG_P (XEXP (x, 0))
6423 && REG_P (XEXP (x, 1))
6424 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6425 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6427 rtx orig_x = x;
6429 x = copy_rtx (x);
6430 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6431 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6432 return x;
6435 return NULL;
6438 /* Test for various thread-local symbols. */
6440 /* Return TRUE if X is a thread-local symbol. */
6442 static bool
6443 arm_tls_symbol_p (rtx x)
6445 if (! TARGET_HAVE_TLS)
6446 return false;
6448 if (GET_CODE (x) != SYMBOL_REF)
6449 return false;
6451 return SYMBOL_REF_TLS_MODEL (x) != 0;
6454 /* Helper for arm_tls_referenced_p. */
6456 static int
6457 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6459 if (GET_CODE (*x) == SYMBOL_REF)
6460 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6462 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6463 TLS offsets, not real symbol references. */
6464 if (GET_CODE (*x) == UNSPEC
6465 && XINT (*x, 1) == UNSPEC_TLS)
6466 return -1;
6468 return 0;
6471 /* Return TRUE if X contains any TLS symbol references. */
6473 bool
6474 arm_tls_referenced_p (rtx x)
6476 if (! TARGET_HAVE_TLS)
6477 return false;
6479 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6482 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6484 bool
6485 arm_cannot_force_const_mem (rtx x)
6487 rtx base, offset;
6489 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6491 split_const (x, &base, &offset);
6492 if (GET_CODE (base) == SYMBOL_REF
6493 && !offset_within_block_p (base, INTVAL (offset)))
6494 return true;
6496 return arm_tls_referenced_p (x);
6499 #define REG_OR_SUBREG_REG(X) \
6500 (GET_CODE (X) == REG \
6501 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6503 #define REG_OR_SUBREG_RTX(X) \
6504 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6506 static inline int
6507 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6509 enum machine_mode mode = GET_MODE (x);
6510 int total;
6512 switch (code)
6514 case ASHIFT:
6515 case ASHIFTRT:
6516 case LSHIFTRT:
6517 case ROTATERT:
6518 case PLUS:
6519 case MINUS:
6520 case COMPARE:
6521 case NEG:
6522 case NOT:
6523 return COSTS_N_INSNS (1);
6525 case MULT:
6526 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6528 int cycles = 0;
6529 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6531 while (i)
6533 i >>= 2;
6534 cycles++;
6536 return COSTS_N_INSNS (2) + cycles;
6538 return COSTS_N_INSNS (1) + 16;
6540 case SET:
6541 return (COSTS_N_INSNS (1)
6542 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6543 + GET_CODE (SET_DEST (x)) == MEM));
6545 case CONST_INT:
6546 if (outer == SET)
6548 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6549 return 0;
6550 if (thumb_shiftable_const (INTVAL (x)))
6551 return COSTS_N_INSNS (2);
6552 return COSTS_N_INSNS (3);
6554 else if ((outer == PLUS || outer == COMPARE)
6555 && INTVAL (x) < 256 && INTVAL (x) > -256)
6556 return 0;
6557 else if ((outer == IOR || outer == XOR || outer == AND)
6558 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6559 return COSTS_N_INSNS (1);
6560 else if (outer == AND)
6562 int i;
6563 /* This duplicates the tests in the andsi3 expander. */
6564 for (i = 9; i <= 31; i++)
6565 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6566 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6567 return COSTS_N_INSNS (2);
6569 else if (outer == ASHIFT || outer == ASHIFTRT
6570 || outer == LSHIFTRT)
6571 return 0;
6572 return COSTS_N_INSNS (2);
6574 case CONST:
6575 case CONST_DOUBLE:
6576 case LABEL_REF:
6577 case SYMBOL_REF:
6578 return COSTS_N_INSNS (3);
6580 case UDIV:
6581 case UMOD:
6582 case DIV:
6583 case MOD:
6584 return 100;
6586 case TRUNCATE:
6587 return 99;
6589 case AND:
6590 case XOR:
6591 case IOR:
6592 /* XXX guess. */
6593 return 8;
6595 case MEM:
6596 /* XXX another guess. */
6597 /* Memory costs quite a lot for the first word, but subsequent words
6598 load at the equivalent of a single insn each. */
6599 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6600 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6601 ? 4 : 0));
6603 case IF_THEN_ELSE:
6604 /* XXX a guess. */
6605 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6606 return 14;
6607 return 2;
6609 case SIGN_EXTEND:
6610 case ZERO_EXTEND:
6611 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6612 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6614 if (mode == SImode)
6615 return total;
6617 if (arm_arch6)
6618 return total + COSTS_N_INSNS (1);
6620 /* Assume a two-shift sequence. Increase the cost slightly so
6621 we prefer actual shifts over an extend operation. */
6622 return total + 1 + COSTS_N_INSNS (2);
6624 default:
6625 return 99;
6629 static inline bool
6630 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6632 enum machine_mode mode = GET_MODE (x);
6633 enum rtx_code subcode;
6634 rtx operand;
6635 enum rtx_code code = GET_CODE (x);
6636 *total = 0;
6638 switch (code)
6640 case MEM:
6641 /* Memory costs quite a lot for the first word, but subsequent words
6642 load at the equivalent of a single insn each. */
6643 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6644 return true;
6646 case DIV:
6647 case MOD:
6648 case UDIV:
6649 case UMOD:
6650 if (TARGET_HARD_FLOAT && mode == SFmode)
6651 *total = COSTS_N_INSNS (2);
6652 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6653 *total = COSTS_N_INSNS (4);
6654 else
6655 *total = COSTS_N_INSNS (20);
6656 return false;
6658 case ROTATE:
6659 if (GET_CODE (XEXP (x, 1)) == REG)
6660 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6661 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6662 *total = rtx_cost (XEXP (x, 1), code, speed);
6664 /* Fall through */
6665 case ROTATERT:
6666 if (mode != SImode)
6668 *total += COSTS_N_INSNS (4);
6669 return true;
6672 /* Fall through */
6673 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6674 *total += rtx_cost (XEXP (x, 0), code, speed);
6675 if (mode == DImode)
6677 *total += COSTS_N_INSNS (3);
6678 return true;
6681 *total += COSTS_N_INSNS (1);
6682 /* Increase the cost of complex shifts because they aren't any faster,
6683 and reduce dual issue opportunities. */
6684 if (arm_tune_cortex_a9
6685 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6686 ++*total;
6688 return true;
6690 case MINUS:
6691 if (mode == DImode)
6693 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6694 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6695 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6697 *total += rtx_cost (XEXP (x, 1), code, speed);
6698 return true;
6701 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6702 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6704 *total += rtx_cost (XEXP (x, 0), code, speed);
6705 return true;
6708 return false;
6711 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6713 if (TARGET_HARD_FLOAT
6714 && (mode == SFmode
6715 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6717 *total = COSTS_N_INSNS (1);
6718 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6719 && arm_const_double_rtx (XEXP (x, 0)))
6721 *total += rtx_cost (XEXP (x, 1), code, speed);
6722 return true;
6725 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6726 && arm_const_double_rtx (XEXP (x, 1)))
6728 *total += rtx_cost (XEXP (x, 0), code, speed);
6729 return true;
6732 return false;
6734 *total = COSTS_N_INSNS (20);
6735 return false;
6738 *total = COSTS_N_INSNS (1);
6739 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6740 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6742 *total += rtx_cost (XEXP (x, 1), code, speed);
6743 return true;
6746 subcode = GET_CODE (XEXP (x, 1));
6747 if (subcode == ASHIFT || subcode == ASHIFTRT
6748 || subcode == LSHIFTRT
6749 || subcode == ROTATE || subcode == ROTATERT)
6751 *total += rtx_cost (XEXP (x, 0), code, speed);
6752 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6753 return true;
6756 /* A shift as a part of RSB costs no more than RSB itself. */
6757 if (GET_CODE (XEXP (x, 0)) == MULT
6758 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6760 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6761 *total += rtx_cost (XEXP (x, 1), code, speed);
6762 return true;
6765 if (subcode == MULT
6766 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6768 *total += rtx_cost (XEXP (x, 0), code, speed);
6769 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6770 return true;
6773 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6774 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6776 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6777 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6778 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6779 *total += COSTS_N_INSNS (1);
6781 return true;
6784 /* Fall through */
6786 case PLUS:
6787 if (code == PLUS && arm_arch6 && mode == SImode
6788 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6789 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6791 *total = COSTS_N_INSNS (1);
6792 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6793 speed);
6794 *total += rtx_cost (XEXP (x, 1), code, speed);
6795 return true;
6798 /* MLA: All arguments must be registers. We filter out
6799 multiplication by a power of two, so that we fall down into
6800 the code below. */
6801 if (GET_CODE (XEXP (x, 0)) == MULT
6802 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6804 /* The cost comes from the cost of the multiply. */
6805 return false;
6808 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6810 if (TARGET_HARD_FLOAT
6811 && (mode == SFmode
6812 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6814 *total = COSTS_N_INSNS (1);
6815 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6816 && arm_const_double_rtx (XEXP (x, 1)))
6818 *total += rtx_cost (XEXP (x, 0), code, speed);
6819 return true;
6822 return false;
6825 *total = COSTS_N_INSNS (20);
6826 return false;
6829 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6830 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6832 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6833 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6834 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6835 *total += COSTS_N_INSNS (1);
6836 return true;
6839 /* Fall through */
6841 case AND: case XOR: case IOR:
6843 /* Normally the frame registers will be spilt into reg+const during
6844 reload, so it is a bad idea to combine them with other instructions,
6845 since then they might not be moved outside of loops. As a compromise
6846 we allow integration with ops that have a constant as their second
6847 operand. */
6848 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6849 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6850 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6851 *total = COSTS_N_INSNS (1);
6853 if (mode == DImode)
6855 *total += COSTS_N_INSNS (2);
6856 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6857 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6859 *total += rtx_cost (XEXP (x, 0), code, speed);
6860 return true;
6863 return false;
6866 *total += COSTS_N_INSNS (1);
6867 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6868 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6870 *total += rtx_cost (XEXP (x, 0), code, speed);
6871 return true;
6873 subcode = GET_CODE (XEXP (x, 0));
6874 if (subcode == ASHIFT || subcode == ASHIFTRT
6875 || subcode == LSHIFTRT
6876 || subcode == ROTATE || subcode == ROTATERT)
6878 *total += rtx_cost (XEXP (x, 1), code, speed);
6879 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6880 return true;
6883 if (subcode == MULT
6884 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6886 *total += rtx_cost (XEXP (x, 1), code, speed);
6887 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6888 return true;
6891 if (subcode == UMIN || subcode == UMAX
6892 || subcode == SMIN || subcode == SMAX)
6894 *total = COSTS_N_INSNS (3);
6895 return true;
6898 return false;
6900 case MULT:
6901 /* This should have been handled by the CPU specific routines. */
6902 gcc_unreachable ();
6904 case TRUNCATE:
6905 if (arm_arch3m && mode == SImode
6906 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6907 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6908 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6909 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6910 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6911 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6913 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6914 return true;
6916 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6917 return false;
6919 case NEG:
6920 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6922 if (TARGET_HARD_FLOAT
6923 && (mode == SFmode
6924 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6926 *total = COSTS_N_INSNS (1);
6927 return false;
6929 *total = COSTS_N_INSNS (2);
6930 return false;
6933 /* Fall through */
6934 case NOT:
6935 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6936 if (mode == SImode && code == NOT)
6938 subcode = GET_CODE (XEXP (x, 0));
6939 if (subcode == ASHIFT || subcode == ASHIFTRT
6940 || subcode == LSHIFTRT
6941 || subcode == ROTATE || subcode == ROTATERT
6942 || (subcode == MULT
6943 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6945 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6946 /* Register shifts cost an extra cycle. */
6947 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6948 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6949 subcode, speed);
6950 return true;
6954 return false;
6956 case IF_THEN_ELSE:
6957 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6959 *total = COSTS_N_INSNS (4);
6960 return true;
6963 operand = XEXP (x, 0);
6965 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6966 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6967 && GET_CODE (XEXP (operand, 0)) == REG
6968 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6969 *total += COSTS_N_INSNS (1);
6970 *total += (rtx_cost (XEXP (x, 1), code, speed)
6971 + rtx_cost (XEXP (x, 2), code, speed));
6972 return true;
6974 case NE:
6975 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6977 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6978 return true;
6980 goto scc_insn;
6982 case GE:
6983 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6984 && mode == SImode && XEXP (x, 1) == const0_rtx)
6986 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6987 return true;
6989 goto scc_insn;
6991 case LT:
6992 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6993 && mode == SImode && XEXP (x, 1) == const0_rtx)
6995 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6996 return true;
6998 goto scc_insn;
7000 case EQ:
7001 case GT:
7002 case LE:
7003 case GEU:
7004 case LTU:
7005 case GTU:
7006 case LEU:
7007 case UNORDERED:
7008 case ORDERED:
7009 case UNEQ:
7010 case UNGE:
7011 case UNLT:
7012 case UNGT:
7013 case UNLE:
7014 scc_insn:
7015 /* SCC insns. In the case where the comparison has already been
7016 performed, then they cost 2 instructions. Otherwise they need
7017 an additional comparison before them. */
7018 *total = COSTS_N_INSNS (2);
7019 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7021 return true;
7024 /* Fall through */
7025 case COMPARE:
7026 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7028 *total = 0;
7029 return true;
7032 *total += COSTS_N_INSNS (1);
7033 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7034 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7036 *total += rtx_cost (XEXP (x, 0), code, speed);
7037 return true;
7040 subcode = GET_CODE (XEXP (x, 0));
7041 if (subcode == ASHIFT || subcode == ASHIFTRT
7042 || subcode == LSHIFTRT
7043 || subcode == ROTATE || subcode == ROTATERT)
7045 *total += rtx_cost (XEXP (x, 1), code, speed);
7046 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7047 return true;
7050 if (subcode == MULT
7051 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7053 *total += rtx_cost (XEXP (x, 1), code, speed);
7054 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7055 return true;
7058 return false;
7060 case UMIN:
7061 case UMAX:
7062 case SMIN:
7063 case SMAX:
7064 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7065 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7066 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7067 *total += rtx_cost (XEXP (x, 1), code, speed);
7068 return true;
7070 case ABS:
7071 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7073 if (TARGET_HARD_FLOAT
7074 && (mode == SFmode
7075 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7077 *total = COSTS_N_INSNS (1);
7078 return false;
7080 *total = COSTS_N_INSNS (20);
7081 return false;
7083 *total = COSTS_N_INSNS (1);
7084 if (mode == DImode)
7085 *total += COSTS_N_INSNS (3);
7086 return false;
7088 case SIGN_EXTEND:
7089 case ZERO_EXTEND:
7090 *total = 0;
7091 if (GET_MODE_CLASS (mode) == MODE_INT)
7093 rtx op = XEXP (x, 0);
7094 enum machine_mode opmode = GET_MODE (op);
7096 if (mode == DImode)
7097 *total += COSTS_N_INSNS (1);
7099 if (opmode != SImode)
7101 if (MEM_P (op))
7103 /* If !arm_arch4, we use one of the extendhisi2_mem
7104 or movhi_bytes patterns for HImode. For a QImode
7105 sign extension, we first zero-extend from memory
7106 and then perform a shift sequence. */
7107 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7108 *total += COSTS_N_INSNS (2);
7110 else if (arm_arch6)
7111 *total += COSTS_N_INSNS (1);
7113 /* We don't have the necessary insn, so we need to perform some
7114 other operation. */
7115 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7116 /* An and with constant 255. */
7117 *total += COSTS_N_INSNS (1);
7118 else
7119 /* A shift sequence. Increase costs slightly to avoid
7120 combining two shifts into an extend operation. */
7121 *total += COSTS_N_INSNS (2) + 1;
7124 return false;
7127 switch (GET_MODE (XEXP (x, 0)))
7129 case V8QImode:
7130 case V4HImode:
7131 case V2SImode:
7132 case V4QImode:
7133 case V2HImode:
7134 *total = COSTS_N_INSNS (1);
7135 return false;
7137 default:
7138 gcc_unreachable ();
7140 gcc_unreachable ();
7142 case ZERO_EXTRACT:
7143 case SIGN_EXTRACT:
7144 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7145 return true;
7147 case CONST_INT:
7148 if (const_ok_for_arm (INTVAL (x))
7149 || const_ok_for_arm (~INTVAL (x)))
7150 *total = COSTS_N_INSNS (1);
7151 else
7152 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7153 INTVAL (x), NULL_RTX,
7154 NULL_RTX, 0, 0));
7155 return true;
7157 case CONST:
7158 case LABEL_REF:
7159 case SYMBOL_REF:
7160 *total = COSTS_N_INSNS (3);
7161 return true;
7163 case HIGH:
7164 *total = COSTS_N_INSNS (1);
7165 return true;
7167 case LO_SUM:
7168 *total = COSTS_N_INSNS (1);
7169 *total += rtx_cost (XEXP (x, 0), code, speed);
7170 return true;
7172 case CONST_DOUBLE:
7173 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7174 && (mode == SFmode || !TARGET_VFP_SINGLE))
7175 *total = COSTS_N_INSNS (1);
7176 else
7177 *total = COSTS_N_INSNS (4);
7178 return true;
7180 default:
7181 *total = COSTS_N_INSNS (4);
7182 return false;
7186 /* Estimates the size cost of thumb1 instructions.
7187 For now most of the code is copied from thumb1_rtx_costs. We need more
7188 fine grain tuning when we have more related test cases. */
7189 static inline int
7190 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7192 enum machine_mode mode = GET_MODE (x);
7194 switch (code)
7196 case ASHIFT:
7197 case ASHIFTRT:
7198 case LSHIFTRT:
7199 case ROTATERT:
7200 case PLUS:
7201 case MINUS:
7202 case COMPARE:
7203 case NEG:
7204 case NOT:
7205 return COSTS_N_INSNS (1);
7207 case MULT:
7208 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7210 /* Thumb1 mul instruction can't operate on const. We must Load it
7211 into a register first. */
7212 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7213 return COSTS_N_INSNS (1) + const_size;
7215 return COSTS_N_INSNS (1);
7217 case SET:
7218 return (COSTS_N_INSNS (1)
7219 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7220 + GET_CODE (SET_DEST (x)) == MEM));
7222 case CONST_INT:
7223 if (outer == SET)
7225 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7226 return COSTS_N_INSNS (1);
7227 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7228 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7229 return COSTS_N_INSNS (2);
7230 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7231 if (thumb_shiftable_const (INTVAL (x)))
7232 return COSTS_N_INSNS (2);
7233 return COSTS_N_INSNS (3);
7235 else if ((outer == PLUS || outer == COMPARE)
7236 && INTVAL (x) < 256 && INTVAL (x) > -256)
7237 return 0;
7238 else if ((outer == IOR || outer == XOR || outer == AND)
7239 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7240 return COSTS_N_INSNS (1);
7241 else if (outer == AND)
7243 int i;
7244 /* This duplicates the tests in the andsi3 expander. */
7245 for (i = 9; i <= 31; i++)
7246 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7247 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7248 return COSTS_N_INSNS (2);
7250 else if (outer == ASHIFT || outer == ASHIFTRT
7251 || outer == LSHIFTRT)
7252 return 0;
7253 return COSTS_N_INSNS (2);
7255 case CONST:
7256 case CONST_DOUBLE:
7257 case LABEL_REF:
7258 case SYMBOL_REF:
7259 return COSTS_N_INSNS (3);
7261 case UDIV:
7262 case UMOD:
7263 case DIV:
7264 case MOD:
7265 return 100;
7267 case TRUNCATE:
7268 return 99;
7270 case AND:
7271 case XOR:
7272 case IOR:
7273 /* XXX guess. */
7274 return 8;
7276 case MEM:
7277 /* XXX another guess. */
7278 /* Memory costs quite a lot for the first word, but subsequent words
7279 load at the equivalent of a single insn each. */
7280 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7281 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7282 ? 4 : 0));
7284 case IF_THEN_ELSE:
7285 /* XXX a guess. */
7286 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7287 return 14;
7288 return 2;
7290 case ZERO_EXTEND:
7291 /* XXX still guessing. */
7292 switch (GET_MODE (XEXP (x, 0)))
7294 case QImode:
7295 return (1 + (mode == DImode ? 4 : 0)
7296 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7298 case HImode:
7299 return (4 + (mode == DImode ? 4 : 0)
7300 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7302 case SImode:
7303 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7305 default:
7306 return 99;
7309 default:
7310 return 99;
7314 /* RTX costs when optimizing for size. */
7315 static bool
7316 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7317 int *total)
7319 enum machine_mode mode = GET_MODE (x);
7320 if (TARGET_THUMB1)
7322 *total = thumb1_size_rtx_costs (x, code, outer_code);
7323 return true;
7326 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7327 switch (code)
7329 case MEM:
7330 /* A memory access costs 1 insn if the mode is small, or the address is
7331 a single register, otherwise it costs one insn per word. */
7332 if (REG_P (XEXP (x, 0)))
7333 *total = COSTS_N_INSNS (1);
7334 else if (flag_pic
7335 && GET_CODE (XEXP (x, 0)) == PLUS
7336 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7337 /* This will be split into two instructions.
7338 See arm.md:calculate_pic_address. */
7339 *total = COSTS_N_INSNS (2);
7340 else
7341 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7342 return true;
7344 case DIV:
7345 case MOD:
7346 case UDIV:
7347 case UMOD:
7348 /* Needs a libcall, so it costs about this. */
7349 *total = COSTS_N_INSNS (2);
7350 return false;
7352 case ROTATE:
7353 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7355 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7356 return true;
7358 /* Fall through */
7359 case ROTATERT:
7360 case ASHIFT:
7361 case LSHIFTRT:
7362 case ASHIFTRT:
7363 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7365 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7366 return true;
7368 else if (mode == SImode)
7370 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7371 /* Slightly disparage register shifts, but not by much. */
7372 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7373 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7374 return true;
7377 /* Needs a libcall. */
7378 *total = COSTS_N_INSNS (2);
7379 return false;
7381 case MINUS:
7382 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7383 && (mode == SFmode || !TARGET_VFP_SINGLE))
7385 *total = COSTS_N_INSNS (1);
7386 return false;
7389 if (mode == SImode)
7391 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7392 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7394 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7395 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7396 || subcode1 == ROTATE || subcode1 == ROTATERT
7397 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7398 || subcode1 == ASHIFTRT)
7400 /* It's just the cost of the two operands. */
7401 *total = 0;
7402 return false;
7405 *total = COSTS_N_INSNS (1);
7406 return false;
7409 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7410 return false;
7412 case PLUS:
7413 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7414 && (mode == SFmode || !TARGET_VFP_SINGLE))
7416 *total = COSTS_N_INSNS (1);
7417 return false;
7420 /* A shift as a part of ADD costs nothing. */
7421 if (GET_CODE (XEXP (x, 0)) == MULT
7422 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7424 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7425 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7426 *total += rtx_cost (XEXP (x, 1), code, false);
7427 return true;
7430 /* Fall through */
7431 case AND: case XOR: case IOR:
7432 if (mode == SImode)
7434 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7436 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7437 || subcode == LSHIFTRT || subcode == ASHIFTRT
7438 || (code == AND && subcode == NOT))
7440 /* It's just the cost of the two operands. */
7441 *total = 0;
7442 return false;
7446 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7447 return false;
7449 case MULT:
7450 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7451 return false;
7453 case NEG:
7454 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7455 && (mode == SFmode || !TARGET_VFP_SINGLE))
7457 *total = COSTS_N_INSNS (1);
7458 return false;
7461 /* Fall through */
7462 case NOT:
7463 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7465 return false;
7467 case IF_THEN_ELSE:
7468 *total = 0;
7469 return false;
7471 case COMPARE:
7472 if (cc_register (XEXP (x, 0), VOIDmode))
7473 * total = 0;
7474 else
7475 *total = COSTS_N_INSNS (1);
7476 return false;
7478 case ABS:
7479 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7480 && (mode == SFmode || !TARGET_VFP_SINGLE))
7481 *total = COSTS_N_INSNS (1);
7482 else
7483 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7484 return false;
7486 case SIGN_EXTEND:
7487 case ZERO_EXTEND:
7488 return arm_rtx_costs_1 (x, outer_code, total, 0);
7490 case CONST_INT:
7491 if (const_ok_for_arm (INTVAL (x)))
7492 /* A multiplication by a constant requires another instruction
7493 to load the constant to a register. */
7494 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7495 ? 1 : 0);
7496 else if (const_ok_for_arm (~INTVAL (x)))
7497 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7498 else if (const_ok_for_arm (-INTVAL (x)))
7500 if (outer_code == COMPARE || outer_code == PLUS
7501 || outer_code == MINUS)
7502 *total = 0;
7503 else
7504 *total = COSTS_N_INSNS (1);
7506 else
7507 *total = COSTS_N_INSNS (2);
7508 return true;
7510 case CONST:
7511 case LABEL_REF:
7512 case SYMBOL_REF:
7513 *total = COSTS_N_INSNS (2);
7514 return true;
7516 case CONST_DOUBLE:
7517 *total = COSTS_N_INSNS (4);
7518 return true;
7520 case HIGH:
7521 case LO_SUM:
7522 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7523 cost of these slightly. */
7524 *total = COSTS_N_INSNS (1) + 1;
7525 return true;
7527 default:
7528 if (mode != VOIDmode)
7529 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7530 else
7531 *total = COSTS_N_INSNS (4); /* How knows? */
7532 return false;
7536 /* RTX costs when optimizing for size. */
7537 static bool
7538 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7539 bool speed)
7541 if (!speed)
7542 return arm_size_rtx_costs (x, (enum rtx_code) code,
7543 (enum rtx_code) outer_code, total);
7544 else
7545 return current_tune->rtx_costs (x, (enum rtx_code) code,
7546 (enum rtx_code) outer_code,
7547 total, speed);
7550 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7551 supported on any "slowmul" cores, so it can be ignored. */
7553 static bool
7554 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7555 int *total, bool speed)
7557 enum machine_mode mode = GET_MODE (x);
7559 if (TARGET_THUMB)
7561 *total = thumb1_rtx_costs (x, code, outer_code);
7562 return true;
7565 switch (code)
7567 case MULT:
7568 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7569 || mode == DImode)
7571 *total = COSTS_N_INSNS (20);
7572 return false;
7575 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7577 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7578 & (unsigned HOST_WIDE_INT) 0xffffffff);
7579 int cost, const_ok = const_ok_for_arm (i);
7580 int j, booth_unit_size;
7582 /* Tune as appropriate. */
7583 cost = const_ok ? 4 : 8;
7584 booth_unit_size = 2;
7585 for (j = 0; i && j < 32; j += booth_unit_size)
7587 i >>= booth_unit_size;
7588 cost++;
7591 *total = COSTS_N_INSNS (cost);
7592 *total += rtx_cost (XEXP (x, 0), code, speed);
7593 return true;
7596 *total = COSTS_N_INSNS (20);
7597 return false;
7599 default:
7600 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7605 /* RTX cost for cores with a fast multiply unit (M variants). */
7607 static bool
7608 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7609 int *total, bool speed)
7611 enum machine_mode mode = GET_MODE (x);
7613 if (TARGET_THUMB1)
7615 *total = thumb1_rtx_costs (x, code, outer_code);
7616 return true;
7619 /* ??? should thumb2 use different costs? */
7620 switch (code)
7622 case MULT:
7623 /* There is no point basing this on the tuning, since it is always the
7624 fast variant if it exists at all. */
7625 if (mode == DImode
7626 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7627 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7628 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7630 *total = COSTS_N_INSNS(2);
7631 return false;
7635 if (mode == DImode)
7637 *total = COSTS_N_INSNS (5);
7638 return false;
7641 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7643 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7644 & (unsigned HOST_WIDE_INT) 0xffffffff);
7645 int cost, const_ok = const_ok_for_arm (i);
7646 int j, booth_unit_size;
7648 /* Tune as appropriate. */
7649 cost = const_ok ? 4 : 8;
7650 booth_unit_size = 8;
7651 for (j = 0; i && j < 32; j += booth_unit_size)
7653 i >>= booth_unit_size;
7654 cost++;
7657 *total = COSTS_N_INSNS(cost);
7658 return false;
7661 if (mode == SImode)
7663 *total = COSTS_N_INSNS (4);
7664 return false;
7667 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7669 if (TARGET_HARD_FLOAT
7670 && (mode == SFmode
7671 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7673 *total = COSTS_N_INSNS (1);
7674 return false;
7678 /* Requires a lib call */
7679 *total = COSTS_N_INSNS (20);
7680 return false;
7682 default:
7683 return arm_rtx_costs_1 (x, outer_code, total, speed);
7688 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7689 so it can be ignored. */
7691 static bool
7692 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7693 int *total, bool speed)
7695 enum machine_mode mode = GET_MODE (x);
7697 if (TARGET_THUMB)
7699 *total = thumb1_rtx_costs (x, code, outer_code);
7700 return true;
7703 switch (code)
7705 case COMPARE:
7706 if (GET_CODE (XEXP (x, 0)) != MULT)
7707 return arm_rtx_costs_1 (x, outer_code, total, speed);
7709 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7710 will stall until the multiplication is complete. */
7711 *total = COSTS_N_INSNS (3);
7712 return false;
7714 case MULT:
7715 /* There is no point basing this on the tuning, since it is always the
7716 fast variant if it exists at all. */
7717 if (mode == DImode
7718 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7719 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7720 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7722 *total = COSTS_N_INSNS (2);
7723 return false;
7727 if (mode == DImode)
7729 *total = COSTS_N_INSNS (5);
7730 return false;
7733 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7735 /* If operand 1 is a constant we can more accurately
7736 calculate the cost of the multiply. The multiplier can
7737 retire 15 bits on the first cycle and a further 12 on the
7738 second. We do, of course, have to load the constant into
7739 a register first. */
7740 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7741 /* There's a general overhead of one cycle. */
7742 int cost = 1;
7743 unsigned HOST_WIDE_INT masked_const;
7745 if (i & 0x80000000)
7746 i = ~i;
7748 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7750 masked_const = i & 0xffff8000;
7751 if (masked_const != 0)
7753 cost++;
7754 masked_const = i & 0xf8000000;
7755 if (masked_const != 0)
7756 cost++;
7758 *total = COSTS_N_INSNS (cost);
7759 return false;
7762 if (mode == SImode)
7764 *total = COSTS_N_INSNS (3);
7765 return false;
7768 /* Requires a lib call */
7769 *total = COSTS_N_INSNS (20);
7770 return false;
7772 default:
7773 return arm_rtx_costs_1 (x, outer_code, total, speed);
7778 /* RTX costs for 9e (and later) cores. */
7780 static bool
7781 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7782 int *total, bool speed)
7784 enum machine_mode mode = GET_MODE (x);
7786 if (TARGET_THUMB1)
7788 switch (code)
7790 case MULT:
7791 *total = COSTS_N_INSNS (3);
7792 return true;
7794 default:
7795 *total = thumb1_rtx_costs (x, code, outer_code);
7796 return true;
7800 switch (code)
7802 case MULT:
7803 /* There is no point basing this on the tuning, since it is always the
7804 fast variant if it exists at all. */
7805 if (mode == DImode
7806 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7807 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7808 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7810 *total = COSTS_N_INSNS (2);
7811 return false;
7815 if (mode == DImode)
7817 *total = COSTS_N_INSNS (5);
7818 return false;
7821 if (mode == SImode)
7823 *total = COSTS_N_INSNS (2);
7824 return false;
7827 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7829 if (TARGET_HARD_FLOAT
7830 && (mode == SFmode
7831 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7833 *total = COSTS_N_INSNS (1);
7834 return false;
7838 *total = COSTS_N_INSNS (20);
7839 return false;
7841 default:
7842 return arm_rtx_costs_1 (x, outer_code, total, speed);
7845 /* All address computations that can be done are free, but rtx cost returns
7846 the same for practically all of them. So we weight the different types
7847 of address here in the order (most pref first):
7848 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7849 static inline int
7850 arm_arm_address_cost (rtx x)
7852 enum rtx_code c = GET_CODE (x);
7854 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7855 return 0;
7856 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7857 return 10;
7859 if (c == PLUS)
7861 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7862 return 2;
7864 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7865 return 3;
7867 return 4;
7870 return 6;
7873 static inline int
7874 arm_thumb_address_cost (rtx x)
7876 enum rtx_code c = GET_CODE (x);
7878 if (c == REG)
7879 return 1;
7880 if (c == PLUS
7881 && GET_CODE (XEXP (x, 0)) == REG
7882 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7883 return 1;
7885 return 2;
7888 static int
7889 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7891 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7894 /* Adjust cost hook for XScale. */
7895 static bool
7896 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7898 /* Some true dependencies can have a higher cost depending
7899 on precisely how certain input operands are used. */
7900 if (REG_NOTE_KIND(link) == 0
7901 && recog_memoized (insn) >= 0
7902 && recog_memoized (dep) >= 0)
7904 int shift_opnum = get_attr_shift (insn);
7905 enum attr_type attr_type = get_attr_type (dep);
7907 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7908 operand for INSN. If we have a shifted input operand and the
7909 instruction we depend on is another ALU instruction, then we may
7910 have to account for an additional stall. */
7911 if (shift_opnum != 0
7912 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7914 rtx shifted_operand;
7915 int opno;
7917 /* Get the shifted operand. */
7918 extract_insn (insn);
7919 shifted_operand = recog_data.operand[shift_opnum];
7921 /* Iterate over all the operands in DEP. If we write an operand
7922 that overlaps with SHIFTED_OPERAND, then we have increase the
7923 cost of this dependency. */
7924 extract_insn (dep);
7925 preprocess_constraints ();
7926 for (opno = 0; opno < recog_data.n_operands; opno++)
7928 /* We can ignore strict inputs. */
7929 if (recog_data.operand_type[opno] == OP_IN)
7930 continue;
7932 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7933 shifted_operand))
7935 *cost = 2;
7936 return false;
7941 return true;
7944 /* Adjust cost hook for Cortex A9. */
7945 static bool
7946 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7948 switch (REG_NOTE_KIND (link))
7950 case REG_DEP_ANTI:
7951 *cost = 0;
7952 return false;
7954 case REG_DEP_TRUE:
7955 case REG_DEP_OUTPUT:
7956 if (recog_memoized (insn) >= 0
7957 && recog_memoized (dep) >= 0)
7959 if (GET_CODE (PATTERN (insn)) == SET)
7961 if (GET_MODE_CLASS
7962 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7963 || GET_MODE_CLASS
7964 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7966 enum attr_type attr_type_insn = get_attr_type (insn);
7967 enum attr_type attr_type_dep = get_attr_type (dep);
7969 /* By default all dependencies of the form
7970 s0 = s0 <op> s1
7971 s0 = s0 <op> s2
7972 have an extra latency of 1 cycle because
7973 of the input and output dependency in this
7974 case. However this gets modeled as an true
7975 dependency and hence all these checks. */
7976 if (REG_P (SET_DEST (PATTERN (insn)))
7977 && REG_P (SET_DEST (PATTERN (dep)))
7978 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7979 SET_DEST (PATTERN (dep))))
7981 /* FMACS is a special case where the dependant
7982 instruction can be issued 3 cycles before
7983 the normal latency in case of an output
7984 dependency. */
7985 if ((attr_type_insn == TYPE_FMACS
7986 || attr_type_insn == TYPE_FMACD)
7987 && (attr_type_dep == TYPE_FMACS
7988 || attr_type_dep == TYPE_FMACD))
7990 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7991 *cost = insn_default_latency (dep) - 3;
7992 else
7993 *cost = insn_default_latency (dep);
7994 return false;
7996 else
7998 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7999 *cost = insn_default_latency (dep) + 1;
8000 else
8001 *cost = insn_default_latency (dep);
8003 return false;
8008 break;
8010 default:
8011 gcc_unreachable ();
8014 return true;
8017 /* Adjust cost hook for FA726TE. */
8018 static bool
8019 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8021 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8022 have penalty of 3. */
8023 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8024 && recog_memoized (insn) >= 0
8025 && recog_memoized (dep) >= 0
8026 && get_attr_conds (dep) == CONDS_SET)
8028 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8029 if (get_attr_conds (insn) == CONDS_USE
8030 && get_attr_type (insn) != TYPE_BRANCH)
8032 *cost = 3;
8033 return false;
8036 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8037 || get_attr_conds (insn) == CONDS_USE)
8039 *cost = 0;
8040 return false;
8044 return true;
8047 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8048 It corrects the value of COST based on the relationship between
8049 INSN and DEP through the dependence LINK. It returns the new
8050 value. There is a per-core adjust_cost hook to adjust scheduler costs
8051 and the per-core hook can choose to completely override the generic
8052 adjust_cost function. Only put bits of code into arm_adjust_cost that
8053 are common across all cores. */
8054 static int
8055 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8057 rtx i_pat, d_pat;
8059 /* When generating Thumb-1 code, we want to place flag-setting operations
8060 close to a conditional branch which depends on them, so that we can
8061 omit the comparison. */
8062 if (TARGET_THUMB1
8063 && REG_NOTE_KIND (link) == 0
8064 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8065 && recog_memoized (dep) >= 0
8066 && get_attr_conds (dep) == CONDS_SET)
8067 return 0;
8069 if (current_tune->sched_adjust_cost != NULL)
8071 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8072 return cost;
8075 /* XXX This is not strictly true for the FPA. */
8076 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8077 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8078 return 0;
8080 /* Call insns don't incur a stall, even if they follow a load. */
8081 if (REG_NOTE_KIND (link) == 0
8082 && GET_CODE (insn) == CALL_INSN)
8083 return 1;
8085 if ((i_pat = single_set (insn)) != NULL
8086 && GET_CODE (SET_SRC (i_pat)) == MEM
8087 && (d_pat = single_set (dep)) != NULL
8088 && GET_CODE (SET_DEST (d_pat)) == MEM)
8090 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8091 /* This is a load after a store, there is no conflict if the load reads
8092 from a cached area. Assume that loads from the stack, and from the
8093 constant pool are cached, and that others will miss. This is a
8094 hack. */
8096 if ((GET_CODE (src_mem) == SYMBOL_REF
8097 && CONSTANT_POOL_ADDRESS_P (src_mem))
8098 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8099 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8100 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8101 return 1;
8104 return cost;
8107 static int fp_consts_inited = 0;
8109 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8110 static const char * const strings_fp[8] =
8112 "0", "1", "2", "3",
8113 "4", "5", "0.5", "10"
8116 static REAL_VALUE_TYPE values_fp[8];
8118 static void
8119 init_fp_table (void)
8121 int i;
8122 REAL_VALUE_TYPE r;
8124 if (TARGET_VFP)
8125 fp_consts_inited = 1;
8126 else
8127 fp_consts_inited = 8;
8129 for (i = 0; i < fp_consts_inited; i++)
8131 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8132 values_fp[i] = r;
8136 /* Return TRUE if rtx X is a valid immediate FP constant. */
8138 arm_const_double_rtx (rtx x)
8140 REAL_VALUE_TYPE r;
8141 int i;
8143 if (!fp_consts_inited)
8144 init_fp_table ();
8146 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8147 if (REAL_VALUE_MINUS_ZERO (r))
8148 return 0;
8150 for (i = 0; i < fp_consts_inited; i++)
8151 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8152 return 1;
8154 return 0;
8157 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8159 neg_const_double_rtx_ok_for_fpa (rtx x)
8161 REAL_VALUE_TYPE r;
8162 int i;
8164 if (!fp_consts_inited)
8165 init_fp_table ();
8167 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8168 r = real_value_negate (&r);
8169 if (REAL_VALUE_MINUS_ZERO (r))
8170 return 0;
8172 for (i = 0; i < 8; i++)
8173 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8174 return 1;
8176 return 0;
8180 /* VFPv3 has a fairly wide range of representable immediates, formed from
8181 "quarter-precision" floating-point values. These can be evaluated using this
8182 formula (with ^ for exponentiation):
8184 -1^s * n * 2^-r
8186 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8187 16 <= n <= 31 and 0 <= r <= 7.
8189 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8191 - A (most-significant) is the sign bit.
8192 - BCD are the exponent (encoded as r XOR 3).
8193 - EFGH are the mantissa (encoded as n - 16).
8196 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8197 fconst[sd] instruction, or -1 if X isn't suitable. */
8198 static int
8199 vfp3_const_double_index (rtx x)
8201 REAL_VALUE_TYPE r, m;
8202 int sign, exponent;
8203 unsigned HOST_WIDE_INT mantissa, mant_hi;
8204 unsigned HOST_WIDE_INT mask;
8205 HOST_WIDE_INT m1, m2;
8206 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8208 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8209 return -1;
8211 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8213 /* We can't represent these things, so detect them first. */
8214 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8215 return -1;
8217 /* Extract sign, exponent and mantissa. */
8218 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8219 r = real_value_abs (&r);
8220 exponent = REAL_EXP (&r);
8221 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8222 highest (sign) bit, with a fixed binary point at bit point_pos.
8223 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8224 bits for the mantissa, this may fail (low bits would be lost). */
8225 real_ldexp (&m, &r, point_pos - exponent);
8226 REAL_VALUE_TO_INT (&m1, &m2, m);
8227 mantissa = m1;
8228 mant_hi = m2;
8230 /* If there are bits set in the low part of the mantissa, we can't
8231 represent this value. */
8232 if (mantissa != 0)
8233 return -1;
8235 /* Now make it so that mantissa contains the most-significant bits, and move
8236 the point_pos to indicate that the least-significant bits have been
8237 discarded. */
8238 point_pos -= HOST_BITS_PER_WIDE_INT;
8239 mantissa = mant_hi;
8241 /* We can permit four significant bits of mantissa only, plus a high bit
8242 which is always 1. */
8243 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8244 if ((mantissa & mask) != 0)
8245 return -1;
8247 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8248 mantissa >>= point_pos - 5;
8250 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8251 floating-point immediate zero with Neon using an integer-zero load, but
8252 that case is handled elsewhere.) */
8253 if (mantissa == 0)
8254 return -1;
8256 gcc_assert (mantissa >= 16 && mantissa <= 31);
8258 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8259 normalized significands are in the range [1, 2). (Our mantissa is shifted
8260 left 4 places at this point relative to normalized IEEE754 values). GCC
8261 internally uses [0.5, 1) (see real.c), so the exponent returned from
8262 REAL_EXP must be altered. */
8263 exponent = 5 - exponent;
8265 if (exponent < 0 || exponent > 7)
8266 return -1;
8268 /* Sign, mantissa and exponent are now in the correct form to plug into the
8269 formula described in the comment above. */
8270 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8273 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8275 vfp3_const_double_rtx (rtx x)
8277 if (!TARGET_VFP3)
8278 return 0;
8280 return vfp3_const_double_index (x) != -1;
8283 /* Recognize immediates which can be used in various Neon instructions. Legal
8284 immediates are described by the following table (for VMVN variants, the
8285 bitwise inverse of the constant shown is recognized. In either case, VMOV
8286 is output and the correct instruction to use for a given constant is chosen
8287 by the assembler). The constant shown is replicated across all elements of
8288 the destination vector.
8290 insn elems variant constant (binary)
8291 ---- ----- ------- -----------------
8292 vmov i32 0 00000000 00000000 00000000 abcdefgh
8293 vmov i32 1 00000000 00000000 abcdefgh 00000000
8294 vmov i32 2 00000000 abcdefgh 00000000 00000000
8295 vmov i32 3 abcdefgh 00000000 00000000 00000000
8296 vmov i16 4 00000000 abcdefgh
8297 vmov i16 5 abcdefgh 00000000
8298 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8299 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8300 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8301 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8302 vmvn i16 10 00000000 abcdefgh
8303 vmvn i16 11 abcdefgh 00000000
8304 vmov i32 12 00000000 00000000 abcdefgh 11111111
8305 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8306 vmov i32 14 00000000 abcdefgh 11111111 11111111
8307 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8308 vmov i8 16 abcdefgh
8309 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8310 eeeeeeee ffffffff gggggggg hhhhhhhh
8311 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8313 For case 18, B = !b. Representable values are exactly those accepted by
8314 vfp3_const_double_index, but are output as floating-point numbers rather
8315 than indices.
8317 Variants 0-5 (inclusive) may also be used as immediates for the second
8318 operand of VORR/VBIC instructions.
8320 The INVERSE argument causes the bitwise inverse of the given operand to be
8321 recognized instead (used for recognizing legal immediates for the VAND/VORN
8322 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8323 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8324 output, rather than the real insns vbic/vorr).
8326 INVERSE makes no difference to the recognition of float vectors.
8328 The return value is the variant of immediate as shown in the above table, or
8329 -1 if the given value doesn't match any of the listed patterns.
8331 static int
8332 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8333 rtx *modconst, int *elementwidth)
8335 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8336 matches = 1; \
8337 for (i = 0; i < idx; i += (STRIDE)) \
8338 if (!(TEST)) \
8339 matches = 0; \
8340 if (matches) \
8342 immtype = (CLASS); \
8343 elsize = (ELSIZE); \
8344 break; \
8347 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8348 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8349 unsigned char bytes[16];
8350 int immtype = -1, matches;
8351 unsigned int invmask = inverse ? 0xff : 0;
8353 /* Vectors of float constants. */
8354 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8356 rtx el0 = CONST_VECTOR_ELT (op, 0);
8357 REAL_VALUE_TYPE r0;
8359 if (!vfp3_const_double_rtx (el0))
8360 return -1;
8362 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8364 for (i = 1; i < n_elts; i++)
8366 rtx elt = CONST_VECTOR_ELT (op, i);
8367 REAL_VALUE_TYPE re;
8369 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8371 if (!REAL_VALUES_EQUAL (r0, re))
8372 return -1;
8375 if (modconst)
8376 *modconst = CONST_VECTOR_ELT (op, 0);
8378 if (elementwidth)
8379 *elementwidth = 0;
8381 return 18;
8384 /* Splat vector constant out into a byte vector. */
8385 for (i = 0; i < n_elts; i++)
8387 rtx el = CONST_VECTOR_ELT (op, i);
8388 unsigned HOST_WIDE_INT elpart;
8389 unsigned int part, parts;
8391 if (GET_CODE (el) == CONST_INT)
8393 elpart = INTVAL (el);
8394 parts = 1;
8396 else if (GET_CODE (el) == CONST_DOUBLE)
8398 elpart = CONST_DOUBLE_LOW (el);
8399 parts = 2;
8401 else
8402 gcc_unreachable ();
8404 for (part = 0; part < parts; part++)
8406 unsigned int byte;
8407 for (byte = 0; byte < innersize; byte++)
8409 bytes[idx++] = (elpart & 0xff) ^ invmask;
8410 elpart >>= BITS_PER_UNIT;
8412 if (GET_CODE (el) == CONST_DOUBLE)
8413 elpart = CONST_DOUBLE_HIGH (el);
8417 /* Sanity check. */
8418 gcc_assert (idx == GET_MODE_SIZE (mode));
8422 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8423 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8425 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8426 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8428 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8429 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8431 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8432 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8434 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8436 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8438 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8439 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8441 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8442 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8444 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8445 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8447 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8448 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8450 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8452 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8454 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8455 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8457 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8458 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8460 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8461 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8463 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8464 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8466 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8468 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8469 && bytes[i] == bytes[(i + 8) % idx]);
8471 while (0);
8473 if (immtype == -1)
8474 return -1;
8476 if (elementwidth)
8477 *elementwidth = elsize;
8479 if (modconst)
8481 unsigned HOST_WIDE_INT imm = 0;
8483 /* Un-invert bytes of recognized vector, if necessary. */
8484 if (invmask != 0)
8485 for (i = 0; i < idx; i++)
8486 bytes[i] ^= invmask;
8488 if (immtype == 17)
8490 /* FIXME: Broken on 32-bit H_W_I hosts. */
8491 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8493 for (i = 0; i < 8; i++)
8494 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8495 << (i * BITS_PER_UNIT);
8497 *modconst = GEN_INT (imm);
8499 else
8501 unsigned HOST_WIDE_INT imm = 0;
8503 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8504 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8506 *modconst = GEN_INT (imm);
8510 return immtype;
8511 #undef CHECK
8514 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8515 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8516 float elements), and a modified constant (whatever should be output for a
8517 VMOV) in *MODCONST. */
8520 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8521 rtx *modconst, int *elementwidth)
8523 rtx tmpconst;
8524 int tmpwidth;
8525 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8527 if (retval == -1)
8528 return 0;
8530 if (modconst)
8531 *modconst = tmpconst;
8533 if (elementwidth)
8534 *elementwidth = tmpwidth;
8536 return 1;
8539 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8540 the immediate is valid, write a constant suitable for using as an operand
8541 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8542 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8545 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8546 rtx *modconst, int *elementwidth)
8548 rtx tmpconst;
8549 int tmpwidth;
8550 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8552 if (retval < 0 || retval > 5)
8553 return 0;
8555 if (modconst)
8556 *modconst = tmpconst;
8558 if (elementwidth)
8559 *elementwidth = tmpwidth;
8561 return 1;
8564 /* Return a string suitable for output of Neon immediate logic operation
8565 MNEM. */
8567 char *
8568 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8569 int inverse, int quad)
8571 int width, is_valid;
8572 static char templ[40];
8574 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8576 gcc_assert (is_valid != 0);
8578 if (quad)
8579 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8580 else
8581 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8583 return templ;
8586 /* Output a sequence of pairwise operations to implement a reduction.
8587 NOTE: We do "too much work" here, because pairwise operations work on two
8588 registers-worth of operands in one go. Unfortunately we can't exploit those
8589 extra calculations to do the full operation in fewer steps, I don't think.
8590 Although all vector elements of the result but the first are ignored, we
8591 actually calculate the same result in each of the elements. An alternative
8592 such as initially loading a vector with zero to use as each of the second
8593 operands would use up an additional register and take an extra instruction,
8594 for no particular gain. */
8596 void
8597 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8598 rtx (*reduc) (rtx, rtx, rtx))
8600 enum machine_mode inner = GET_MODE_INNER (mode);
8601 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8602 rtx tmpsum = op1;
8604 for (i = parts / 2; i >= 1; i /= 2)
8606 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8607 emit_insn (reduc (dest, tmpsum, tmpsum));
8608 tmpsum = dest;
8612 /* If VALS is a vector constant that can be loaded into a register
8613 using VDUP, generate instructions to do so and return an RTX to
8614 assign to the register. Otherwise return NULL_RTX. */
8616 static rtx
8617 neon_vdup_constant (rtx vals)
8619 enum machine_mode mode = GET_MODE (vals);
8620 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8621 int n_elts = GET_MODE_NUNITS (mode);
8622 bool all_same = true;
8623 rtx x;
8624 int i;
8626 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8627 return NULL_RTX;
8629 for (i = 0; i < n_elts; ++i)
8631 x = XVECEXP (vals, 0, i);
8632 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8633 all_same = false;
8636 if (!all_same)
8637 /* The elements are not all the same. We could handle repeating
8638 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8639 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8640 vdup.i16). */
8641 return NULL_RTX;
8643 /* We can load this constant by using VDUP and a constant in a
8644 single ARM register. This will be cheaper than a vector
8645 load. */
8647 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8648 return gen_rtx_VEC_DUPLICATE (mode, x);
8651 /* Generate code to load VALS, which is a PARALLEL containing only
8652 constants (for vec_init) or CONST_VECTOR, efficiently into a
8653 register. Returns an RTX to copy into the register, or NULL_RTX
8654 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8657 neon_make_constant (rtx vals)
8659 enum machine_mode mode = GET_MODE (vals);
8660 rtx target;
8661 rtx const_vec = NULL_RTX;
8662 int n_elts = GET_MODE_NUNITS (mode);
8663 int n_const = 0;
8664 int i;
8666 if (GET_CODE (vals) == CONST_VECTOR)
8667 const_vec = vals;
8668 else if (GET_CODE (vals) == PARALLEL)
8670 /* A CONST_VECTOR must contain only CONST_INTs and
8671 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8672 Only store valid constants in a CONST_VECTOR. */
8673 for (i = 0; i < n_elts; ++i)
8675 rtx x = XVECEXP (vals, 0, i);
8676 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8677 n_const++;
8679 if (n_const == n_elts)
8680 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8682 else
8683 gcc_unreachable ();
8685 if (const_vec != NULL
8686 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8687 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8688 return const_vec;
8689 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8690 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8691 pipeline cycle; creating the constant takes one or two ARM
8692 pipeline cycles. */
8693 return target;
8694 else if (const_vec != NULL_RTX)
8695 /* Load from constant pool. On Cortex-A8 this takes two cycles
8696 (for either double or quad vectors). We can not take advantage
8697 of single-cycle VLD1 because we need a PC-relative addressing
8698 mode. */
8699 return const_vec;
8700 else
8701 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8702 We can not construct an initializer. */
8703 return NULL_RTX;
8706 /* Initialize vector TARGET to VALS. */
8708 void
8709 neon_expand_vector_init (rtx target, rtx vals)
8711 enum machine_mode mode = GET_MODE (target);
8712 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8713 int n_elts = GET_MODE_NUNITS (mode);
8714 int n_var = 0, one_var = -1;
8715 bool all_same = true;
8716 rtx x, mem;
8717 int i;
8719 for (i = 0; i < n_elts; ++i)
8721 x = XVECEXP (vals, 0, i);
8722 if (!CONSTANT_P (x))
8723 ++n_var, one_var = i;
8725 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8726 all_same = false;
8729 if (n_var == 0)
8731 rtx constant = neon_make_constant (vals);
8732 if (constant != NULL_RTX)
8734 emit_move_insn (target, constant);
8735 return;
8739 /* Splat a single non-constant element if we can. */
8740 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8742 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8743 emit_insn (gen_rtx_SET (VOIDmode, target,
8744 gen_rtx_VEC_DUPLICATE (mode, x)));
8745 return;
8748 /* One field is non-constant. Load constant then overwrite varying
8749 field. This is more efficient than using the stack. */
8750 if (n_var == 1)
8752 rtx copy = copy_rtx (vals);
8753 rtx index = GEN_INT (one_var);
8755 /* Load constant part of vector, substitute neighboring value for
8756 varying element. */
8757 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8758 neon_expand_vector_init (target, copy);
8760 /* Insert variable. */
8761 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8762 switch (mode)
8764 case V8QImode:
8765 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8766 break;
8767 case V16QImode:
8768 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8769 break;
8770 case V4HImode:
8771 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8772 break;
8773 case V8HImode:
8774 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8775 break;
8776 case V2SImode:
8777 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8778 break;
8779 case V4SImode:
8780 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8781 break;
8782 case V2SFmode:
8783 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8784 break;
8785 case V4SFmode:
8786 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8787 break;
8788 case V2DImode:
8789 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8790 break;
8791 default:
8792 gcc_unreachable ();
8794 return;
8797 /* Construct the vector in memory one field at a time
8798 and load the whole vector. */
8799 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8800 for (i = 0; i < n_elts; i++)
8801 emit_move_insn (adjust_address_nv (mem, inner_mode,
8802 i * GET_MODE_SIZE (inner_mode)),
8803 XVECEXP (vals, 0, i));
8804 emit_move_insn (target, mem);
8807 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8808 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8809 reported source locations are bogus. */
8811 static void
8812 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8813 const char *err)
8815 HOST_WIDE_INT lane;
8817 gcc_assert (GET_CODE (operand) == CONST_INT);
8819 lane = INTVAL (operand);
8821 if (lane < low || lane >= high)
8822 error (err);
8825 /* Bounds-check lanes. */
8827 void
8828 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8830 bounds_check (operand, low, high, "lane out of range");
8833 /* Bounds-check constants. */
8835 void
8836 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8838 bounds_check (operand, low, high, "constant out of range");
8841 HOST_WIDE_INT
8842 neon_element_bits (enum machine_mode mode)
8844 if (mode == DImode)
8845 return GET_MODE_BITSIZE (mode);
8846 else
8847 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8851 /* Predicates for `match_operand' and `match_operator'. */
8853 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8855 cirrus_memory_offset (rtx op)
8857 /* Reject eliminable registers. */
8858 if (! (reload_in_progress || reload_completed)
8859 && ( reg_mentioned_p (frame_pointer_rtx, op)
8860 || reg_mentioned_p (arg_pointer_rtx, op)
8861 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8862 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8863 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8864 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8865 return 0;
8867 if (GET_CODE (op) == MEM)
8869 rtx ind;
8871 ind = XEXP (op, 0);
8873 /* Match: (mem (reg)). */
8874 if (GET_CODE (ind) == REG)
8875 return 1;
8877 /* Match:
8878 (mem (plus (reg)
8879 (const))). */
8880 if (GET_CODE (ind) == PLUS
8881 && GET_CODE (XEXP (ind, 0)) == REG
8882 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8883 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8884 return 1;
8887 return 0;
8890 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8891 WB is true if full writeback address modes are allowed and is false
8892 if limited writeback address modes (POST_INC and PRE_DEC) are
8893 allowed. */
8896 arm_coproc_mem_operand (rtx op, bool wb)
8898 rtx ind;
8900 /* Reject eliminable registers. */
8901 if (! (reload_in_progress || reload_completed)
8902 && ( reg_mentioned_p (frame_pointer_rtx, op)
8903 || reg_mentioned_p (arg_pointer_rtx, op)
8904 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8905 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8906 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8907 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8908 return FALSE;
8910 /* Constants are converted into offsets from labels. */
8911 if (GET_CODE (op) != MEM)
8912 return FALSE;
8914 ind = XEXP (op, 0);
8916 if (reload_completed
8917 && (GET_CODE (ind) == LABEL_REF
8918 || (GET_CODE (ind) == CONST
8919 && GET_CODE (XEXP (ind, 0)) == PLUS
8920 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8921 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8922 return TRUE;
8924 /* Match: (mem (reg)). */
8925 if (GET_CODE (ind) == REG)
8926 return arm_address_register_rtx_p (ind, 0);
8928 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8929 acceptable in any case (subject to verification by
8930 arm_address_register_rtx_p). We need WB to be true to accept
8931 PRE_INC and POST_DEC. */
8932 if (GET_CODE (ind) == POST_INC
8933 || GET_CODE (ind) == PRE_DEC
8934 || (wb
8935 && (GET_CODE (ind) == PRE_INC
8936 || GET_CODE (ind) == POST_DEC)))
8937 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8939 if (wb
8940 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8941 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8942 && GET_CODE (XEXP (ind, 1)) == PLUS
8943 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8944 ind = XEXP (ind, 1);
8946 /* Match:
8947 (plus (reg)
8948 (const)). */
8949 if (GET_CODE (ind) == PLUS
8950 && GET_CODE (XEXP (ind, 0)) == REG
8951 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8952 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8953 && INTVAL (XEXP (ind, 1)) > -1024
8954 && INTVAL (XEXP (ind, 1)) < 1024
8955 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8956 return TRUE;
8958 return FALSE;
8961 /* Return TRUE if OP is a memory operand which we can load or store a vector
8962 to/from. TYPE is one of the following values:
8963 0 - Vector load/stor (vldr)
8964 1 - Core registers (ldm)
8965 2 - Element/structure loads (vld1)
8968 neon_vector_mem_operand (rtx op, int type)
8970 rtx ind;
8972 /* Reject eliminable registers. */
8973 if (! (reload_in_progress || reload_completed)
8974 && ( reg_mentioned_p (frame_pointer_rtx, op)
8975 || reg_mentioned_p (arg_pointer_rtx, op)
8976 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8977 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8978 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8979 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8980 return FALSE;
8982 /* Constants are converted into offsets from labels. */
8983 if (GET_CODE (op) != MEM)
8984 return FALSE;
8986 ind = XEXP (op, 0);
8988 if (reload_completed
8989 && (GET_CODE (ind) == LABEL_REF
8990 || (GET_CODE (ind) == CONST
8991 && GET_CODE (XEXP (ind, 0)) == PLUS
8992 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8993 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8994 return TRUE;
8996 /* Match: (mem (reg)). */
8997 if (GET_CODE (ind) == REG)
8998 return arm_address_register_rtx_p (ind, 0);
9000 /* Allow post-increment with Neon registers. */
9001 if ((type != 1 && GET_CODE (ind) == POST_INC)
9002 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9003 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9005 /* FIXME: vld1 allows register post-modify. */
9007 /* Match:
9008 (plus (reg)
9009 (const)). */
9010 if (type == 0
9011 && GET_CODE (ind) == PLUS
9012 && GET_CODE (XEXP (ind, 0)) == REG
9013 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9014 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9015 && INTVAL (XEXP (ind, 1)) > -1024
9016 && INTVAL (XEXP (ind, 1)) < 1016
9017 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9018 return TRUE;
9020 return FALSE;
9023 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9024 type. */
9026 neon_struct_mem_operand (rtx op)
9028 rtx ind;
9030 /* Reject eliminable registers. */
9031 if (! (reload_in_progress || reload_completed)
9032 && ( reg_mentioned_p (frame_pointer_rtx, op)
9033 || reg_mentioned_p (arg_pointer_rtx, op)
9034 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9035 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9036 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9037 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9038 return FALSE;
9040 /* Constants are converted into offsets from labels. */
9041 if (GET_CODE (op) != MEM)
9042 return FALSE;
9044 ind = XEXP (op, 0);
9046 if (reload_completed
9047 && (GET_CODE (ind) == LABEL_REF
9048 || (GET_CODE (ind) == CONST
9049 && GET_CODE (XEXP (ind, 0)) == PLUS
9050 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9051 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9052 return TRUE;
9054 /* Match: (mem (reg)). */
9055 if (GET_CODE (ind) == REG)
9056 return arm_address_register_rtx_p (ind, 0);
9058 return FALSE;
9061 /* Return true if X is a register that will be eliminated later on. */
9063 arm_eliminable_register (rtx x)
9065 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9066 || REGNO (x) == ARG_POINTER_REGNUM
9067 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9068 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9071 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9072 coprocessor registers. Otherwise return NO_REGS. */
9074 enum reg_class
9075 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9077 if (mode == HFmode)
9079 if (!TARGET_NEON_FP16)
9080 return GENERAL_REGS;
9081 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9082 return NO_REGS;
9083 return GENERAL_REGS;
9086 if (TARGET_NEON
9087 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9088 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9089 && neon_vector_mem_operand (x, 0))
9090 return NO_REGS;
9092 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9093 return NO_REGS;
9095 return GENERAL_REGS;
9098 /* Values which must be returned in the most-significant end of the return
9099 register. */
9101 static bool
9102 arm_return_in_msb (const_tree valtype)
9104 return (TARGET_AAPCS_BASED
9105 && BYTES_BIG_ENDIAN
9106 && (AGGREGATE_TYPE_P (valtype)
9107 || TREE_CODE (valtype) == COMPLEX_TYPE));
9110 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9111 Use by the Cirrus Maverick code which has to workaround
9112 a hardware bug triggered by such instructions. */
9113 static bool
9114 arm_memory_load_p (rtx insn)
9116 rtx body, lhs, rhs;;
9118 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9119 return false;
9121 body = PATTERN (insn);
9123 if (GET_CODE (body) != SET)
9124 return false;
9126 lhs = XEXP (body, 0);
9127 rhs = XEXP (body, 1);
9129 lhs = REG_OR_SUBREG_RTX (lhs);
9131 /* If the destination is not a general purpose
9132 register we do not have to worry. */
9133 if (GET_CODE (lhs) != REG
9134 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9135 return false;
9137 /* As well as loads from memory we also have to react
9138 to loads of invalid constants which will be turned
9139 into loads from the minipool. */
9140 return (GET_CODE (rhs) == MEM
9141 || GET_CODE (rhs) == SYMBOL_REF
9142 || note_invalid_constants (insn, -1, false));
9145 /* Return TRUE if INSN is a Cirrus instruction. */
9146 static bool
9147 arm_cirrus_insn_p (rtx insn)
9149 enum attr_cirrus attr;
9151 /* get_attr cannot accept USE or CLOBBER. */
9152 if (!insn
9153 || GET_CODE (insn) != INSN
9154 || GET_CODE (PATTERN (insn)) == USE
9155 || GET_CODE (PATTERN (insn)) == CLOBBER)
9156 return 0;
9158 attr = get_attr_cirrus (insn);
9160 return attr != CIRRUS_NOT;
9163 /* Cirrus reorg for invalid instruction combinations. */
9164 static void
9165 cirrus_reorg (rtx first)
9167 enum attr_cirrus attr;
9168 rtx body = PATTERN (first);
9169 rtx t;
9170 int nops;
9172 /* Any branch must be followed by 2 non Cirrus instructions. */
9173 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9175 nops = 0;
9176 t = next_nonnote_insn (first);
9178 if (arm_cirrus_insn_p (t))
9179 ++ nops;
9181 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9182 ++ nops;
9184 while (nops --)
9185 emit_insn_after (gen_nop (), first);
9187 return;
9190 /* (float (blah)) is in parallel with a clobber. */
9191 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9192 body = XVECEXP (body, 0, 0);
9194 if (GET_CODE (body) == SET)
9196 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9198 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9199 be followed by a non Cirrus insn. */
9200 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9202 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9203 emit_insn_after (gen_nop (), first);
9205 return;
9207 else if (arm_memory_load_p (first))
9209 unsigned int arm_regno;
9211 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9212 ldr/cfmv64hr combination where the Rd field is the same
9213 in both instructions must be split with a non Cirrus
9214 insn. Example:
9216 ldr r0, blah
9218 cfmvsr mvf0, r0. */
9220 /* Get Arm register number for ldr insn. */
9221 if (GET_CODE (lhs) == REG)
9222 arm_regno = REGNO (lhs);
9223 else
9225 gcc_assert (GET_CODE (rhs) == REG);
9226 arm_regno = REGNO (rhs);
9229 /* Next insn. */
9230 first = next_nonnote_insn (first);
9232 if (! arm_cirrus_insn_p (first))
9233 return;
9235 body = PATTERN (first);
9237 /* (float (blah)) is in parallel with a clobber. */
9238 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9239 body = XVECEXP (body, 0, 0);
9241 if (GET_CODE (body) == FLOAT)
9242 body = XEXP (body, 0);
9244 if (get_attr_cirrus (first) == CIRRUS_MOVE
9245 && GET_CODE (XEXP (body, 1)) == REG
9246 && arm_regno == REGNO (XEXP (body, 1)))
9247 emit_insn_after (gen_nop (), first);
9249 return;
9253 /* get_attr cannot accept USE or CLOBBER. */
9254 if (!first
9255 || GET_CODE (first) != INSN
9256 || GET_CODE (PATTERN (first)) == USE
9257 || GET_CODE (PATTERN (first)) == CLOBBER)
9258 return;
9260 attr = get_attr_cirrus (first);
9262 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9263 must be followed by a non-coprocessor instruction. */
9264 if (attr == CIRRUS_COMPARE)
9266 nops = 0;
9268 t = next_nonnote_insn (first);
9270 if (arm_cirrus_insn_p (t))
9271 ++ nops;
9273 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9274 ++ nops;
9276 while (nops --)
9277 emit_insn_after (gen_nop (), first);
9279 return;
9283 /* Return TRUE if X references a SYMBOL_REF. */
9285 symbol_mentioned_p (rtx x)
9287 const char * fmt;
9288 int i;
9290 if (GET_CODE (x) == SYMBOL_REF)
9291 return 1;
9293 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9294 are constant offsets, not symbols. */
9295 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9296 return 0;
9298 fmt = GET_RTX_FORMAT (GET_CODE (x));
9300 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9302 if (fmt[i] == 'E')
9304 int j;
9306 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9307 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9308 return 1;
9310 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9311 return 1;
9314 return 0;
9317 /* Return TRUE if X references a LABEL_REF. */
9319 label_mentioned_p (rtx x)
9321 const char * fmt;
9322 int i;
9324 if (GET_CODE (x) == LABEL_REF)
9325 return 1;
9327 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9328 instruction, but they are constant offsets, not symbols. */
9329 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9330 return 0;
9332 fmt = GET_RTX_FORMAT (GET_CODE (x));
9333 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9335 if (fmt[i] == 'E')
9337 int j;
9339 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9340 if (label_mentioned_p (XVECEXP (x, i, j)))
9341 return 1;
9343 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9344 return 1;
9347 return 0;
9351 tls_mentioned_p (rtx x)
9353 switch (GET_CODE (x))
9355 case CONST:
9356 return tls_mentioned_p (XEXP (x, 0));
9358 case UNSPEC:
9359 if (XINT (x, 1) == UNSPEC_TLS)
9360 return 1;
9362 default:
9363 return 0;
9367 /* Must not copy any rtx that uses a pc-relative address. */
9369 static int
9370 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9372 if (GET_CODE (*x) == UNSPEC
9373 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9374 return 1;
9375 return 0;
9378 static bool
9379 arm_cannot_copy_insn_p (rtx insn)
9381 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9384 enum rtx_code
9385 minmax_code (rtx x)
9387 enum rtx_code code = GET_CODE (x);
9389 switch (code)
9391 case SMAX:
9392 return GE;
9393 case SMIN:
9394 return LE;
9395 case UMIN:
9396 return LEU;
9397 case UMAX:
9398 return GEU;
9399 default:
9400 gcc_unreachable ();
9404 /* Return 1 if memory locations are adjacent. */
9406 adjacent_mem_locations (rtx a, rtx b)
9408 /* We don't guarantee to preserve the order of these memory refs. */
9409 if (volatile_refs_p (a) || volatile_refs_p (b))
9410 return 0;
9412 if ((GET_CODE (XEXP (a, 0)) == REG
9413 || (GET_CODE (XEXP (a, 0)) == PLUS
9414 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9415 && (GET_CODE (XEXP (b, 0)) == REG
9416 || (GET_CODE (XEXP (b, 0)) == PLUS
9417 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9419 HOST_WIDE_INT val0 = 0, val1 = 0;
9420 rtx reg0, reg1;
9421 int val_diff;
9423 if (GET_CODE (XEXP (a, 0)) == PLUS)
9425 reg0 = XEXP (XEXP (a, 0), 0);
9426 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9428 else
9429 reg0 = XEXP (a, 0);
9431 if (GET_CODE (XEXP (b, 0)) == PLUS)
9433 reg1 = XEXP (XEXP (b, 0), 0);
9434 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9436 else
9437 reg1 = XEXP (b, 0);
9439 /* Don't accept any offset that will require multiple
9440 instructions to handle, since this would cause the
9441 arith_adjacentmem pattern to output an overlong sequence. */
9442 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9443 return 0;
9445 /* Don't allow an eliminable register: register elimination can make
9446 the offset too large. */
9447 if (arm_eliminable_register (reg0))
9448 return 0;
9450 val_diff = val1 - val0;
9452 if (arm_ld_sched)
9454 /* If the target has load delay slots, then there's no benefit
9455 to using an ldm instruction unless the offset is zero and
9456 we are optimizing for size. */
9457 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9458 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9459 && (val_diff == 4 || val_diff == -4));
9462 return ((REGNO (reg0) == REGNO (reg1))
9463 && (val_diff == 4 || val_diff == -4));
9466 return 0;
9469 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9470 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9471 instruction. ADD_OFFSET is nonzero if the base address register needs
9472 to be modified with an add instruction before we can use it. */
9474 static bool
9475 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9476 int nops, HOST_WIDE_INT add_offset)
9478 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9479 if the offset isn't small enough. The reason 2 ldrs are faster
9480 is because these ARMs are able to do more than one cache access
9481 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9482 whilst the ARM8 has a double bandwidth cache. This means that
9483 these cores can do both an instruction fetch and a data fetch in
9484 a single cycle, so the trick of calculating the address into a
9485 scratch register (one of the result regs) and then doing a load
9486 multiple actually becomes slower (and no smaller in code size).
9487 That is the transformation
9489 ldr rd1, [rbase + offset]
9490 ldr rd2, [rbase + offset + 4]
9494 add rd1, rbase, offset
9495 ldmia rd1, {rd1, rd2}
9497 produces worse code -- '3 cycles + any stalls on rd2' instead of
9498 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9499 access per cycle, the first sequence could never complete in less
9500 than 6 cycles, whereas the ldm sequence would only take 5 and
9501 would make better use of sequential accesses if not hitting the
9502 cache.
9504 We cheat here and test 'arm_ld_sched' which we currently know to
9505 only be true for the ARM8, ARM9 and StrongARM. If this ever
9506 changes, then the test below needs to be reworked. */
9507 if (nops == 2 && arm_ld_sched && add_offset != 0)
9508 return false;
9510 /* XScale has load-store double instructions, but they have stricter
9511 alignment requirements than load-store multiple, so we cannot
9512 use them.
9514 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9515 the pipeline until completion.
9517 NREGS CYCLES
9523 An ldr instruction takes 1-3 cycles, but does not block the
9524 pipeline.
9526 NREGS CYCLES
9527 1 1-3
9528 2 2-6
9529 3 3-9
9530 4 4-12
9532 Best case ldr will always win. However, the more ldr instructions
9533 we issue, the less likely we are to be able to schedule them well.
9534 Using ldr instructions also increases code size.
9536 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9537 for counts of 3 or 4 regs. */
9538 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9539 return false;
9540 return true;
9543 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9544 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9545 an array ORDER which describes the sequence to use when accessing the
9546 offsets that produces an ascending order. In this sequence, each
9547 offset must be larger by exactly 4 than the previous one. ORDER[0]
9548 must have been filled in with the lowest offset by the caller.
9549 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9550 we use to verify that ORDER produces an ascending order of registers.
9551 Return true if it was possible to construct such an order, false if
9552 not. */
9554 static bool
9555 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9556 int *unsorted_regs)
9558 int i;
9559 for (i = 1; i < nops; i++)
9561 int j;
9563 order[i] = order[i - 1];
9564 for (j = 0; j < nops; j++)
9565 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9567 /* We must find exactly one offset that is higher than the
9568 previous one by 4. */
9569 if (order[i] != order[i - 1])
9570 return false;
9571 order[i] = j;
9573 if (order[i] == order[i - 1])
9574 return false;
9575 /* The register numbers must be ascending. */
9576 if (unsorted_regs != NULL
9577 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9578 return false;
9580 return true;
9583 /* Used to determine in a peephole whether a sequence of load
9584 instructions can be changed into a load-multiple instruction.
9585 NOPS is the number of separate load instructions we are examining. The
9586 first NOPS entries in OPERANDS are the destination registers, the
9587 next NOPS entries are memory operands. If this function is
9588 successful, *BASE is set to the common base register of the memory
9589 accesses; *LOAD_OFFSET is set to the first memory location's offset
9590 from that base register.
9591 REGS is an array filled in with the destination register numbers.
9592 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9593 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9594 the sequence of registers in REGS matches the loads from ascending memory
9595 locations, and the function verifies that the register numbers are
9596 themselves ascending. If CHECK_REGS is false, the register numbers
9597 are stored in the order they are found in the operands. */
9598 static int
9599 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9600 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9602 int unsorted_regs[MAX_LDM_STM_OPS];
9603 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9604 int order[MAX_LDM_STM_OPS];
9605 rtx base_reg_rtx = NULL;
9606 int base_reg = -1;
9607 int i, ldm_case;
9609 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9610 easily extended if required. */
9611 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9613 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9615 /* Loop over the operands and check that the memory references are
9616 suitable (i.e. immediate offsets from the same base register). At
9617 the same time, extract the target register, and the memory
9618 offsets. */
9619 for (i = 0; i < nops; i++)
9621 rtx reg;
9622 rtx offset;
9624 /* Convert a subreg of a mem into the mem itself. */
9625 if (GET_CODE (operands[nops + i]) == SUBREG)
9626 operands[nops + i] = alter_subreg (operands + (nops + i));
9628 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9630 /* Don't reorder volatile memory references; it doesn't seem worth
9631 looking for the case where the order is ok anyway. */
9632 if (MEM_VOLATILE_P (operands[nops + i]))
9633 return 0;
9635 offset = const0_rtx;
9637 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9638 || (GET_CODE (reg) == SUBREG
9639 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9640 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9641 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9642 == REG)
9643 || (GET_CODE (reg) == SUBREG
9644 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9645 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9646 == CONST_INT)))
9648 if (i == 0)
9650 base_reg = REGNO (reg);
9651 base_reg_rtx = reg;
9652 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9653 return 0;
9655 else if (base_reg != (int) REGNO (reg))
9656 /* Not addressed from the same base register. */
9657 return 0;
9659 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9660 ? REGNO (operands[i])
9661 : REGNO (SUBREG_REG (operands[i])));
9663 /* If it isn't an integer register, or if it overwrites the
9664 base register but isn't the last insn in the list, then
9665 we can't do this. */
9666 if (unsorted_regs[i] < 0
9667 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9668 || unsorted_regs[i] > 14
9669 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9670 return 0;
9672 unsorted_offsets[i] = INTVAL (offset);
9673 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9674 order[0] = i;
9676 else
9677 /* Not a suitable memory address. */
9678 return 0;
9681 /* All the useful information has now been extracted from the
9682 operands into unsorted_regs and unsorted_offsets; additionally,
9683 order[0] has been set to the lowest offset in the list. Sort
9684 the offsets into order, verifying that they are adjacent, and
9685 check that the register numbers are ascending. */
9686 if (!compute_offset_order (nops, unsorted_offsets, order,
9687 check_regs ? unsorted_regs : NULL))
9688 return 0;
9690 if (saved_order)
9691 memcpy (saved_order, order, sizeof order);
9693 if (base)
9695 *base = base_reg;
9697 for (i = 0; i < nops; i++)
9698 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9700 *load_offset = unsorted_offsets[order[0]];
9703 if (TARGET_THUMB1
9704 && !peep2_reg_dead_p (nops, base_reg_rtx))
9705 return 0;
9707 if (unsorted_offsets[order[0]] == 0)
9708 ldm_case = 1; /* ldmia */
9709 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9710 ldm_case = 2; /* ldmib */
9711 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9712 ldm_case = 3; /* ldmda */
9713 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9714 ldm_case = 4; /* ldmdb */
9715 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9716 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9717 ldm_case = 5;
9718 else
9719 return 0;
9721 if (!multiple_operation_profitable_p (false, nops,
9722 ldm_case == 5
9723 ? unsorted_offsets[order[0]] : 0))
9724 return 0;
9726 return ldm_case;
9729 /* Used to determine in a peephole whether a sequence of store instructions can
9730 be changed into a store-multiple instruction.
9731 NOPS is the number of separate store instructions we are examining.
9732 NOPS_TOTAL is the total number of instructions recognized by the peephole
9733 pattern.
9734 The first NOPS entries in OPERANDS are the source registers, the next
9735 NOPS entries are memory operands. If this function is successful, *BASE is
9736 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9737 to the first memory location's offset from that base register. REGS is an
9738 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9739 likewise filled with the corresponding rtx's.
9740 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9741 numbers to to an ascending order of stores.
9742 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9743 from ascending memory locations, and the function verifies that the register
9744 numbers are themselves ascending. If CHECK_REGS is false, the register
9745 numbers are stored in the order they are found in the operands. */
9746 static int
9747 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9748 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9749 HOST_WIDE_INT *load_offset, bool check_regs)
9751 int unsorted_regs[MAX_LDM_STM_OPS];
9752 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9753 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9754 int order[MAX_LDM_STM_OPS];
9755 int base_reg = -1;
9756 rtx base_reg_rtx = NULL;
9757 int i, stm_case;
9759 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9760 easily extended if required. */
9761 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9763 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9765 /* Loop over the operands and check that the memory references are
9766 suitable (i.e. immediate offsets from the same base register). At
9767 the same time, extract the target register, and the memory
9768 offsets. */
9769 for (i = 0; i < nops; i++)
9771 rtx reg;
9772 rtx offset;
9774 /* Convert a subreg of a mem into the mem itself. */
9775 if (GET_CODE (operands[nops + i]) == SUBREG)
9776 operands[nops + i] = alter_subreg (operands + (nops + i));
9778 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9780 /* Don't reorder volatile memory references; it doesn't seem worth
9781 looking for the case where the order is ok anyway. */
9782 if (MEM_VOLATILE_P (operands[nops + i]))
9783 return 0;
9785 offset = const0_rtx;
9787 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9788 || (GET_CODE (reg) == SUBREG
9789 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9790 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9791 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9792 == REG)
9793 || (GET_CODE (reg) == SUBREG
9794 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9795 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9796 == CONST_INT)))
9798 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9799 ? operands[i] : SUBREG_REG (operands[i]));
9800 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9802 if (i == 0)
9804 base_reg = REGNO (reg);
9805 base_reg_rtx = reg;
9806 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9807 return 0;
9809 else if (base_reg != (int) REGNO (reg))
9810 /* Not addressed from the same base register. */
9811 return 0;
9813 /* If it isn't an integer register, then we can't do this. */
9814 if (unsorted_regs[i] < 0
9815 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9816 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9817 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9818 || unsorted_regs[i] > 14)
9819 return 0;
9821 unsorted_offsets[i] = INTVAL (offset);
9822 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9823 order[0] = i;
9825 else
9826 /* Not a suitable memory address. */
9827 return 0;
9830 /* All the useful information has now been extracted from the
9831 operands into unsorted_regs and unsorted_offsets; additionally,
9832 order[0] has been set to the lowest offset in the list. Sort
9833 the offsets into order, verifying that they are adjacent, and
9834 check that the register numbers are ascending. */
9835 if (!compute_offset_order (nops, unsorted_offsets, order,
9836 check_regs ? unsorted_regs : NULL))
9837 return 0;
9839 if (saved_order)
9840 memcpy (saved_order, order, sizeof order);
9842 if (base)
9844 *base = base_reg;
9846 for (i = 0; i < nops; i++)
9848 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9849 if (reg_rtxs)
9850 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9853 *load_offset = unsorted_offsets[order[0]];
9856 if (TARGET_THUMB1
9857 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9858 return 0;
9860 if (unsorted_offsets[order[0]] == 0)
9861 stm_case = 1; /* stmia */
9862 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9863 stm_case = 2; /* stmib */
9864 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9865 stm_case = 3; /* stmda */
9866 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9867 stm_case = 4; /* stmdb */
9868 else
9869 return 0;
9871 if (!multiple_operation_profitable_p (false, nops, 0))
9872 return 0;
9874 return stm_case;
9877 /* Routines for use in generating RTL. */
9879 /* Generate a load-multiple instruction. COUNT is the number of loads in
9880 the instruction; REGS and MEMS are arrays containing the operands.
9881 BASEREG is the base register to be used in addressing the memory operands.
9882 WBACK_OFFSET is nonzero if the instruction should update the base
9883 register. */
9885 static rtx
9886 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9887 HOST_WIDE_INT wback_offset)
9889 int i = 0, j;
9890 rtx result;
9892 if (!multiple_operation_profitable_p (false, count, 0))
9894 rtx seq;
9896 start_sequence ();
9898 for (i = 0; i < count; i++)
9899 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9901 if (wback_offset != 0)
9902 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9904 seq = get_insns ();
9905 end_sequence ();
9907 return seq;
9910 result = gen_rtx_PARALLEL (VOIDmode,
9911 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9912 if (wback_offset != 0)
9914 XVECEXP (result, 0, 0)
9915 = gen_rtx_SET (VOIDmode, basereg,
9916 plus_constant (basereg, wback_offset));
9917 i = 1;
9918 count++;
9921 for (j = 0; i < count; i++, j++)
9922 XVECEXP (result, 0, i)
9923 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9925 return result;
9928 /* Generate a store-multiple instruction. COUNT is the number of stores in
9929 the instruction; REGS and MEMS are arrays containing the operands.
9930 BASEREG is the base register to be used in addressing the memory operands.
9931 WBACK_OFFSET is nonzero if the instruction should update the base
9932 register. */
9934 static rtx
9935 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9936 HOST_WIDE_INT wback_offset)
9938 int i = 0, j;
9939 rtx result;
9941 if (GET_CODE (basereg) == PLUS)
9942 basereg = XEXP (basereg, 0);
9944 if (!multiple_operation_profitable_p (false, count, 0))
9946 rtx seq;
9948 start_sequence ();
9950 for (i = 0; i < count; i++)
9951 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9953 if (wback_offset != 0)
9954 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9956 seq = get_insns ();
9957 end_sequence ();
9959 return seq;
9962 result = gen_rtx_PARALLEL (VOIDmode,
9963 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9964 if (wback_offset != 0)
9966 XVECEXP (result, 0, 0)
9967 = gen_rtx_SET (VOIDmode, basereg,
9968 plus_constant (basereg, wback_offset));
9969 i = 1;
9970 count++;
9973 for (j = 0; i < count; i++, j++)
9974 XVECEXP (result, 0, i)
9975 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9977 return result;
9980 /* Generate either a load-multiple or a store-multiple instruction. This
9981 function can be used in situations where we can start with a single MEM
9982 rtx and adjust its address upwards.
9983 COUNT is the number of operations in the instruction, not counting a
9984 possible update of the base register. REGS is an array containing the
9985 register operands.
9986 BASEREG is the base register to be used in addressing the memory operands,
9987 which are constructed from BASEMEM.
9988 WRITE_BACK specifies whether the generated instruction should include an
9989 update of the base register.
9990 OFFSETP is used to pass an offset to and from this function; this offset
9991 is not used when constructing the address (instead BASEMEM should have an
9992 appropriate offset in its address), it is used only for setting
9993 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9995 static rtx
9996 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9997 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9999 rtx mems[MAX_LDM_STM_OPS];
10000 HOST_WIDE_INT offset = *offsetp;
10001 int i;
10003 gcc_assert (count <= MAX_LDM_STM_OPS);
10005 if (GET_CODE (basereg) == PLUS)
10006 basereg = XEXP (basereg, 0);
10008 for (i = 0; i < count; i++)
10010 rtx addr = plus_constant (basereg, i * 4);
10011 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10012 offset += 4;
10015 if (write_back)
10016 *offsetp = offset;
10018 if (is_load)
10019 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10020 write_back ? 4 * count : 0);
10021 else
10022 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10023 write_back ? 4 * count : 0);
10027 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10028 rtx basemem, HOST_WIDE_INT *offsetp)
10030 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10031 offsetp);
10035 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10036 rtx basemem, HOST_WIDE_INT *offsetp)
10038 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10039 offsetp);
10042 /* Called from a peephole2 expander to turn a sequence of loads into an
10043 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10044 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10045 is true if we can reorder the registers because they are used commutatively
10046 subsequently.
10047 Returns true iff we could generate a new instruction. */
10049 bool
10050 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10052 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10053 rtx mems[MAX_LDM_STM_OPS];
10054 int i, j, base_reg;
10055 rtx base_reg_rtx;
10056 HOST_WIDE_INT offset;
10057 int write_back = FALSE;
10058 int ldm_case;
10059 rtx addr;
10061 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10062 &base_reg, &offset, !sort_regs);
10064 if (ldm_case == 0)
10065 return false;
10067 if (sort_regs)
10068 for (i = 0; i < nops - 1; i++)
10069 for (j = i + 1; j < nops; j++)
10070 if (regs[i] > regs[j])
10072 int t = regs[i];
10073 regs[i] = regs[j];
10074 regs[j] = t;
10076 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10078 if (TARGET_THUMB1)
10080 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10081 gcc_assert (ldm_case == 1 || ldm_case == 5);
10082 write_back = TRUE;
10085 if (ldm_case == 5)
10087 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10088 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10089 offset = 0;
10090 if (!TARGET_THUMB1)
10092 base_reg = regs[0];
10093 base_reg_rtx = newbase;
10097 for (i = 0; i < nops; i++)
10099 addr = plus_constant (base_reg_rtx, offset + i * 4);
10100 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10101 SImode, addr, 0);
10103 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10104 write_back ? offset + i * 4 : 0));
10105 return true;
10108 /* Called from a peephole2 expander to turn a sequence of stores into an
10109 STM instruction. OPERANDS are the operands found by the peephole matcher;
10110 NOPS indicates how many separate stores we are trying to combine.
10111 Returns true iff we could generate a new instruction. */
10113 bool
10114 gen_stm_seq (rtx *operands, int nops)
10116 int i;
10117 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10118 rtx mems[MAX_LDM_STM_OPS];
10119 int base_reg;
10120 rtx base_reg_rtx;
10121 HOST_WIDE_INT offset;
10122 int write_back = FALSE;
10123 int stm_case;
10124 rtx addr;
10125 bool base_reg_dies;
10127 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10128 mem_order, &base_reg, &offset, true);
10130 if (stm_case == 0)
10131 return false;
10133 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10135 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10136 if (TARGET_THUMB1)
10138 gcc_assert (base_reg_dies);
10139 write_back = TRUE;
10142 if (stm_case == 5)
10144 gcc_assert (base_reg_dies);
10145 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10146 offset = 0;
10149 addr = plus_constant (base_reg_rtx, offset);
10151 for (i = 0; i < nops; i++)
10153 addr = plus_constant (base_reg_rtx, offset + i * 4);
10154 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10155 SImode, addr, 0);
10157 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10158 write_back ? offset + i * 4 : 0));
10159 return true;
10162 /* Called from a peephole2 expander to turn a sequence of stores that are
10163 preceded by constant loads into an STM instruction. OPERANDS are the
10164 operands found by the peephole matcher; NOPS indicates how many
10165 separate stores we are trying to combine; there are 2 * NOPS
10166 instructions in the peephole.
10167 Returns true iff we could generate a new instruction. */
10169 bool
10170 gen_const_stm_seq (rtx *operands, int nops)
10172 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10173 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10174 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10175 rtx mems[MAX_LDM_STM_OPS];
10176 int base_reg;
10177 rtx base_reg_rtx;
10178 HOST_WIDE_INT offset;
10179 int write_back = FALSE;
10180 int stm_case;
10181 rtx addr;
10182 bool base_reg_dies;
10183 int i, j;
10184 HARD_REG_SET allocated;
10186 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10187 mem_order, &base_reg, &offset, false);
10189 if (stm_case == 0)
10190 return false;
10192 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10194 /* If the same register is used more than once, try to find a free
10195 register. */
10196 CLEAR_HARD_REG_SET (allocated);
10197 for (i = 0; i < nops; i++)
10199 for (j = i + 1; j < nops; j++)
10200 if (regs[i] == regs[j])
10202 rtx t = peep2_find_free_register (0, nops * 2,
10203 TARGET_THUMB1 ? "l" : "r",
10204 SImode, &allocated);
10205 if (t == NULL_RTX)
10206 return false;
10207 reg_rtxs[i] = t;
10208 regs[i] = REGNO (t);
10212 /* Compute an ordering that maps the register numbers to an ascending
10213 sequence. */
10214 reg_order[0] = 0;
10215 for (i = 0; i < nops; i++)
10216 if (regs[i] < regs[reg_order[0]])
10217 reg_order[0] = i;
10219 for (i = 1; i < nops; i++)
10221 int this_order = reg_order[i - 1];
10222 for (j = 0; j < nops; j++)
10223 if (regs[j] > regs[reg_order[i - 1]]
10224 && (this_order == reg_order[i - 1]
10225 || regs[j] < regs[this_order]))
10226 this_order = j;
10227 reg_order[i] = this_order;
10230 /* Ensure that registers that must be live after the instruction end
10231 up with the correct value. */
10232 for (i = 0; i < nops; i++)
10234 int this_order = reg_order[i];
10235 if ((this_order != mem_order[i]
10236 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10237 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10238 return false;
10241 /* Load the constants. */
10242 for (i = 0; i < nops; i++)
10244 rtx op = operands[2 * nops + mem_order[i]];
10245 sorted_regs[i] = regs[reg_order[i]];
10246 emit_move_insn (reg_rtxs[reg_order[i]], op);
10249 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10251 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10252 if (TARGET_THUMB1)
10254 gcc_assert (base_reg_dies);
10255 write_back = TRUE;
10258 if (stm_case == 5)
10260 gcc_assert (base_reg_dies);
10261 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10262 offset = 0;
10265 addr = plus_constant (base_reg_rtx, offset);
10267 for (i = 0; i < nops; i++)
10269 addr = plus_constant (base_reg_rtx, offset + i * 4);
10270 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10271 SImode, addr, 0);
10273 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10274 write_back ? offset + i * 4 : 0));
10275 return true;
10279 arm_gen_movmemqi (rtx *operands)
10281 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10282 HOST_WIDE_INT srcoffset, dstoffset;
10283 int i;
10284 rtx src, dst, srcbase, dstbase;
10285 rtx part_bytes_reg = NULL;
10286 rtx mem;
10288 if (GET_CODE (operands[2]) != CONST_INT
10289 || GET_CODE (operands[3]) != CONST_INT
10290 || INTVAL (operands[2]) > 64
10291 || INTVAL (operands[3]) & 3)
10292 return 0;
10294 dstbase = operands[0];
10295 srcbase = operands[1];
10297 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10298 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10300 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10301 out_words_to_go = INTVAL (operands[2]) / 4;
10302 last_bytes = INTVAL (operands[2]) & 3;
10303 dstoffset = srcoffset = 0;
10305 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10306 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10308 for (i = 0; in_words_to_go >= 2; i+=4)
10310 if (in_words_to_go > 4)
10311 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10312 TRUE, srcbase, &srcoffset));
10313 else
10314 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10315 src, FALSE, srcbase,
10316 &srcoffset));
10318 if (out_words_to_go)
10320 if (out_words_to_go > 4)
10321 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10322 TRUE, dstbase, &dstoffset));
10323 else if (out_words_to_go != 1)
10324 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10325 out_words_to_go, dst,
10326 (last_bytes == 0
10327 ? FALSE : TRUE),
10328 dstbase, &dstoffset));
10329 else
10331 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10332 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10333 if (last_bytes != 0)
10335 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10336 dstoffset += 4;
10341 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10342 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10345 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10346 if (out_words_to_go)
10348 rtx sreg;
10350 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10351 sreg = copy_to_reg (mem);
10353 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10354 emit_move_insn (mem, sreg);
10355 in_words_to_go--;
10357 gcc_assert (!in_words_to_go); /* Sanity check */
10360 if (in_words_to_go)
10362 gcc_assert (in_words_to_go > 0);
10364 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10365 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10368 gcc_assert (!last_bytes || part_bytes_reg);
10370 if (BYTES_BIG_ENDIAN && last_bytes)
10372 rtx tmp = gen_reg_rtx (SImode);
10374 /* The bytes we want are in the top end of the word. */
10375 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10376 GEN_INT (8 * (4 - last_bytes))));
10377 part_bytes_reg = tmp;
10379 while (last_bytes)
10381 mem = adjust_automodify_address (dstbase, QImode,
10382 plus_constant (dst, last_bytes - 1),
10383 dstoffset + last_bytes - 1);
10384 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10386 if (--last_bytes)
10388 tmp = gen_reg_rtx (SImode);
10389 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10390 part_bytes_reg = tmp;
10395 else
10397 if (last_bytes > 1)
10399 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10400 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10401 last_bytes -= 2;
10402 if (last_bytes)
10404 rtx tmp = gen_reg_rtx (SImode);
10405 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10406 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10407 part_bytes_reg = tmp;
10408 dstoffset += 2;
10412 if (last_bytes)
10414 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10415 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10419 return 1;
10422 /* Select a dominance comparison mode if possible for a test of the general
10423 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10424 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10425 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10426 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10427 In all cases OP will be either EQ or NE, but we don't need to know which
10428 here. If we are unable to support a dominance comparison we return
10429 CC mode. This will then fail to match for the RTL expressions that
10430 generate this call. */
10431 enum machine_mode
10432 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10434 enum rtx_code cond1, cond2;
10435 int swapped = 0;
10437 /* Currently we will probably get the wrong result if the individual
10438 comparisons are not simple. This also ensures that it is safe to
10439 reverse a comparison if necessary. */
10440 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10441 != CCmode)
10442 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10443 != CCmode))
10444 return CCmode;
10446 /* The if_then_else variant of this tests the second condition if the
10447 first passes, but is true if the first fails. Reverse the first
10448 condition to get a true "inclusive-or" expression. */
10449 if (cond_or == DOM_CC_NX_OR_Y)
10450 cond1 = reverse_condition (cond1);
10452 /* If the comparisons are not equal, and one doesn't dominate the other,
10453 then we can't do this. */
10454 if (cond1 != cond2
10455 && !comparison_dominates_p (cond1, cond2)
10456 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10457 return CCmode;
10459 if (swapped)
10461 enum rtx_code temp = cond1;
10462 cond1 = cond2;
10463 cond2 = temp;
10466 switch (cond1)
10468 case EQ:
10469 if (cond_or == DOM_CC_X_AND_Y)
10470 return CC_DEQmode;
10472 switch (cond2)
10474 case EQ: return CC_DEQmode;
10475 case LE: return CC_DLEmode;
10476 case LEU: return CC_DLEUmode;
10477 case GE: return CC_DGEmode;
10478 case GEU: return CC_DGEUmode;
10479 default: gcc_unreachable ();
10482 case LT:
10483 if (cond_or == DOM_CC_X_AND_Y)
10484 return CC_DLTmode;
10486 switch (cond2)
10488 case LT:
10489 return CC_DLTmode;
10490 case LE:
10491 return CC_DLEmode;
10492 case NE:
10493 return CC_DNEmode;
10494 default:
10495 gcc_unreachable ();
10498 case GT:
10499 if (cond_or == DOM_CC_X_AND_Y)
10500 return CC_DGTmode;
10502 switch (cond2)
10504 case GT:
10505 return CC_DGTmode;
10506 case GE:
10507 return CC_DGEmode;
10508 case NE:
10509 return CC_DNEmode;
10510 default:
10511 gcc_unreachable ();
10514 case LTU:
10515 if (cond_or == DOM_CC_X_AND_Y)
10516 return CC_DLTUmode;
10518 switch (cond2)
10520 case LTU:
10521 return CC_DLTUmode;
10522 case LEU:
10523 return CC_DLEUmode;
10524 case NE:
10525 return CC_DNEmode;
10526 default:
10527 gcc_unreachable ();
10530 case GTU:
10531 if (cond_or == DOM_CC_X_AND_Y)
10532 return CC_DGTUmode;
10534 switch (cond2)
10536 case GTU:
10537 return CC_DGTUmode;
10538 case GEU:
10539 return CC_DGEUmode;
10540 case NE:
10541 return CC_DNEmode;
10542 default:
10543 gcc_unreachable ();
10546 /* The remaining cases only occur when both comparisons are the
10547 same. */
10548 case NE:
10549 gcc_assert (cond1 == cond2);
10550 return CC_DNEmode;
10552 case LE:
10553 gcc_assert (cond1 == cond2);
10554 return CC_DLEmode;
10556 case GE:
10557 gcc_assert (cond1 == cond2);
10558 return CC_DGEmode;
10560 case LEU:
10561 gcc_assert (cond1 == cond2);
10562 return CC_DLEUmode;
10564 case GEU:
10565 gcc_assert (cond1 == cond2);
10566 return CC_DGEUmode;
10568 default:
10569 gcc_unreachable ();
10573 enum machine_mode
10574 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10576 /* All floating point compares return CCFP if it is an equality
10577 comparison, and CCFPE otherwise. */
10578 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10580 switch (op)
10582 case EQ:
10583 case NE:
10584 case UNORDERED:
10585 case ORDERED:
10586 case UNLT:
10587 case UNLE:
10588 case UNGT:
10589 case UNGE:
10590 case UNEQ:
10591 case LTGT:
10592 return CCFPmode;
10594 case LT:
10595 case LE:
10596 case GT:
10597 case GE:
10598 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10599 return CCFPmode;
10600 return CCFPEmode;
10602 default:
10603 gcc_unreachable ();
10607 /* A compare with a shifted operand. Because of canonicalization, the
10608 comparison will have to be swapped when we emit the assembler. */
10609 if (GET_MODE (y) == SImode
10610 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10611 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10612 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10613 || GET_CODE (x) == ROTATERT))
10614 return CC_SWPmode;
10616 /* This operation is performed swapped, but since we only rely on the Z
10617 flag we don't need an additional mode. */
10618 if (GET_MODE (y) == SImode
10619 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10620 && GET_CODE (x) == NEG
10621 && (op == EQ || op == NE))
10622 return CC_Zmode;
10624 /* This is a special case that is used by combine to allow a
10625 comparison of a shifted byte load to be split into a zero-extend
10626 followed by a comparison of the shifted integer (only valid for
10627 equalities and unsigned inequalities). */
10628 if (GET_MODE (x) == SImode
10629 && GET_CODE (x) == ASHIFT
10630 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10631 && GET_CODE (XEXP (x, 0)) == SUBREG
10632 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10633 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10634 && (op == EQ || op == NE
10635 || op == GEU || op == GTU || op == LTU || op == LEU)
10636 && GET_CODE (y) == CONST_INT)
10637 return CC_Zmode;
10639 /* A construct for a conditional compare, if the false arm contains
10640 0, then both conditions must be true, otherwise either condition
10641 must be true. Not all conditions are possible, so CCmode is
10642 returned if it can't be done. */
10643 if (GET_CODE (x) == IF_THEN_ELSE
10644 && (XEXP (x, 2) == const0_rtx
10645 || XEXP (x, 2) == const1_rtx)
10646 && COMPARISON_P (XEXP (x, 0))
10647 && COMPARISON_P (XEXP (x, 1)))
10648 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10649 INTVAL (XEXP (x, 2)));
10651 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10652 if (GET_CODE (x) == AND
10653 && (op == EQ || op == NE)
10654 && COMPARISON_P (XEXP (x, 0))
10655 && COMPARISON_P (XEXP (x, 1)))
10656 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10657 DOM_CC_X_AND_Y);
10659 if (GET_CODE (x) == IOR
10660 && (op == EQ || op == NE)
10661 && COMPARISON_P (XEXP (x, 0))
10662 && COMPARISON_P (XEXP (x, 1)))
10663 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10664 DOM_CC_X_OR_Y);
10666 /* An operation (on Thumb) where we want to test for a single bit.
10667 This is done by shifting that bit up into the top bit of a
10668 scratch register; we can then branch on the sign bit. */
10669 if (TARGET_THUMB1
10670 && GET_MODE (x) == SImode
10671 && (op == EQ || op == NE)
10672 && GET_CODE (x) == ZERO_EXTRACT
10673 && XEXP (x, 1) == const1_rtx)
10674 return CC_Nmode;
10676 /* An operation that sets the condition codes as a side-effect, the
10677 V flag is not set correctly, so we can only use comparisons where
10678 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10679 instead.) */
10680 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10681 if (GET_MODE (x) == SImode
10682 && y == const0_rtx
10683 && (op == EQ || op == NE || op == LT || op == GE)
10684 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10685 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10686 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10687 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10688 || GET_CODE (x) == LSHIFTRT
10689 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10690 || GET_CODE (x) == ROTATERT
10691 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10692 return CC_NOOVmode;
10694 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10695 return CC_Zmode;
10697 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10698 && GET_CODE (x) == PLUS
10699 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10700 return CC_Cmode;
10702 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10704 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10705 available. */
10706 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10707 return CCmode;
10709 switch (op)
10711 case EQ:
10712 case NE:
10713 /* A DImode comparison against zero can be implemented by
10714 or'ing the two halves together. */
10715 if (y == const0_rtx)
10716 return CC_Zmode;
10718 /* We can do an equality test in three Thumb instructions. */
10719 if (!TARGET_ARM)
10720 return CC_Zmode;
10722 /* FALLTHROUGH */
10724 case LTU:
10725 case LEU:
10726 case GTU:
10727 case GEU:
10728 /* DImode unsigned comparisons can be implemented by cmp +
10729 cmpeq without a scratch register. Not worth doing in
10730 Thumb-2. */
10731 if (TARGET_ARM)
10732 return CC_CZmode;
10734 /* FALLTHROUGH */
10736 case LT:
10737 case LE:
10738 case GT:
10739 case GE:
10740 /* DImode signed and unsigned comparisons can be implemented
10741 by cmp + sbcs with a scratch register, but that does not
10742 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10743 gcc_assert (op != EQ && op != NE);
10744 return CC_NCVmode;
10746 default:
10747 gcc_unreachable ();
10751 return CCmode;
10754 /* X and Y are two things to compare using CODE. Emit the compare insn and
10755 return the rtx for register 0 in the proper mode. FP means this is a
10756 floating point compare: I don't think that it is needed on the arm. */
10758 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10760 enum machine_mode mode;
10761 rtx cc_reg;
10762 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10764 /* We might have X as a constant, Y as a register because of the predicates
10765 used for cmpdi. If so, force X to a register here. */
10766 if (dimode_comparison && !REG_P (x))
10767 x = force_reg (DImode, x);
10769 mode = SELECT_CC_MODE (code, x, y);
10770 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10772 if (dimode_comparison
10773 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10774 && mode != CC_CZmode)
10776 rtx clobber, set;
10778 /* To compare two non-zero values for equality, XOR them and
10779 then compare against zero. Not used for ARM mode; there
10780 CC_CZmode is cheaper. */
10781 if (mode == CC_Zmode && y != const0_rtx)
10783 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10784 y = const0_rtx;
10786 /* A scratch register is required. */
10787 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10788 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10789 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10791 else
10792 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10794 return cc_reg;
10797 /* Generate a sequence of insns that will generate the correct return
10798 address mask depending on the physical architecture that the program
10799 is running on. */
10801 arm_gen_return_addr_mask (void)
10803 rtx reg = gen_reg_rtx (Pmode);
10805 emit_insn (gen_return_addr_mask (reg));
10806 return reg;
10809 void
10810 arm_reload_in_hi (rtx *operands)
10812 rtx ref = operands[1];
10813 rtx base, scratch;
10814 HOST_WIDE_INT offset = 0;
10816 if (GET_CODE (ref) == SUBREG)
10818 offset = SUBREG_BYTE (ref);
10819 ref = SUBREG_REG (ref);
10822 if (GET_CODE (ref) == REG)
10824 /* We have a pseudo which has been spilt onto the stack; there
10825 are two cases here: the first where there is a simple
10826 stack-slot replacement and a second where the stack-slot is
10827 out of range, or is used as a subreg. */
10828 if (reg_equiv_mem[REGNO (ref)])
10830 ref = reg_equiv_mem[REGNO (ref)];
10831 base = find_replacement (&XEXP (ref, 0));
10833 else
10834 /* The slot is out of range, or was dressed up in a SUBREG. */
10835 base = reg_equiv_address[REGNO (ref)];
10837 else
10838 base = find_replacement (&XEXP (ref, 0));
10840 /* Handle the case where the address is too complex to be offset by 1. */
10841 if (GET_CODE (base) == MINUS
10842 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10844 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10846 emit_set_insn (base_plus, base);
10847 base = base_plus;
10849 else if (GET_CODE (base) == PLUS)
10851 /* The addend must be CONST_INT, or we would have dealt with it above. */
10852 HOST_WIDE_INT hi, lo;
10854 offset += INTVAL (XEXP (base, 1));
10855 base = XEXP (base, 0);
10857 /* Rework the address into a legal sequence of insns. */
10858 /* Valid range for lo is -4095 -> 4095 */
10859 lo = (offset >= 0
10860 ? (offset & 0xfff)
10861 : -((-offset) & 0xfff));
10863 /* Corner case, if lo is the max offset then we would be out of range
10864 once we have added the additional 1 below, so bump the msb into the
10865 pre-loading insn(s). */
10866 if (lo == 4095)
10867 lo &= 0x7ff;
10869 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10870 ^ (HOST_WIDE_INT) 0x80000000)
10871 - (HOST_WIDE_INT) 0x80000000);
10873 gcc_assert (hi + lo == offset);
10875 if (hi != 0)
10877 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10879 /* Get the base address; addsi3 knows how to handle constants
10880 that require more than one insn. */
10881 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10882 base = base_plus;
10883 offset = lo;
10887 /* Operands[2] may overlap operands[0] (though it won't overlap
10888 operands[1]), that's why we asked for a DImode reg -- so we can
10889 use the bit that does not overlap. */
10890 if (REGNO (operands[2]) == REGNO (operands[0]))
10891 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10892 else
10893 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10895 emit_insn (gen_zero_extendqisi2 (scratch,
10896 gen_rtx_MEM (QImode,
10897 plus_constant (base,
10898 offset))));
10899 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10900 gen_rtx_MEM (QImode,
10901 plus_constant (base,
10902 offset + 1))));
10903 if (!BYTES_BIG_ENDIAN)
10904 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10905 gen_rtx_IOR (SImode,
10906 gen_rtx_ASHIFT
10907 (SImode,
10908 gen_rtx_SUBREG (SImode, operands[0], 0),
10909 GEN_INT (8)),
10910 scratch));
10911 else
10912 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10913 gen_rtx_IOR (SImode,
10914 gen_rtx_ASHIFT (SImode, scratch,
10915 GEN_INT (8)),
10916 gen_rtx_SUBREG (SImode, operands[0], 0)));
10919 /* Handle storing a half-word to memory during reload by synthesizing as two
10920 byte stores. Take care not to clobber the input values until after we
10921 have moved them somewhere safe. This code assumes that if the DImode
10922 scratch in operands[2] overlaps either the input value or output address
10923 in some way, then that value must die in this insn (we absolutely need
10924 two scratch registers for some corner cases). */
10925 void
10926 arm_reload_out_hi (rtx *operands)
10928 rtx ref = operands[0];
10929 rtx outval = operands[1];
10930 rtx base, scratch;
10931 HOST_WIDE_INT offset = 0;
10933 if (GET_CODE (ref) == SUBREG)
10935 offset = SUBREG_BYTE (ref);
10936 ref = SUBREG_REG (ref);
10939 if (GET_CODE (ref) == REG)
10941 /* We have a pseudo which has been spilt onto the stack; there
10942 are two cases here: the first where there is a simple
10943 stack-slot replacement and a second where the stack-slot is
10944 out of range, or is used as a subreg. */
10945 if (reg_equiv_mem[REGNO (ref)])
10947 ref = reg_equiv_mem[REGNO (ref)];
10948 base = find_replacement (&XEXP (ref, 0));
10950 else
10951 /* The slot is out of range, or was dressed up in a SUBREG. */
10952 base = reg_equiv_address[REGNO (ref)];
10954 else
10955 base = find_replacement (&XEXP (ref, 0));
10957 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10959 /* Handle the case where the address is too complex to be offset by 1. */
10960 if (GET_CODE (base) == MINUS
10961 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10963 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10965 /* Be careful not to destroy OUTVAL. */
10966 if (reg_overlap_mentioned_p (base_plus, outval))
10968 /* Updating base_plus might destroy outval, see if we can
10969 swap the scratch and base_plus. */
10970 if (!reg_overlap_mentioned_p (scratch, outval))
10972 rtx tmp = scratch;
10973 scratch = base_plus;
10974 base_plus = tmp;
10976 else
10978 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10980 /* Be conservative and copy OUTVAL into the scratch now,
10981 this should only be necessary if outval is a subreg
10982 of something larger than a word. */
10983 /* XXX Might this clobber base? I can't see how it can,
10984 since scratch is known to overlap with OUTVAL, and
10985 must be wider than a word. */
10986 emit_insn (gen_movhi (scratch_hi, outval));
10987 outval = scratch_hi;
10991 emit_set_insn (base_plus, base);
10992 base = base_plus;
10994 else if (GET_CODE (base) == PLUS)
10996 /* The addend must be CONST_INT, or we would have dealt with it above. */
10997 HOST_WIDE_INT hi, lo;
10999 offset += INTVAL (XEXP (base, 1));
11000 base = XEXP (base, 0);
11002 /* Rework the address into a legal sequence of insns. */
11003 /* Valid range for lo is -4095 -> 4095 */
11004 lo = (offset >= 0
11005 ? (offset & 0xfff)
11006 : -((-offset) & 0xfff));
11008 /* Corner case, if lo is the max offset then we would be out of range
11009 once we have added the additional 1 below, so bump the msb into the
11010 pre-loading insn(s). */
11011 if (lo == 4095)
11012 lo &= 0x7ff;
11014 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11015 ^ (HOST_WIDE_INT) 0x80000000)
11016 - (HOST_WIDE_INT) 0x80000000);
11018 gcc_assert (hi + lo == offset);
11020 if (hi != 0)
11022 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11024 /* Be careful not to destroy OUTVAL. */
11025 if (reg_overlap_mentioned_p (base_plus, outval))
11027 /* Updating base_plus might destroy outval, see if we
11028 can swap the scratch and base_plus. */
11029 if (!reg_overlap_mentioned_p (scratch, outval))
11031 rtx tmp = scratch;
11032 scratch = base_plus;
11033 base_plus = tmp;
11035 else
11037 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11039 /* Be conservative and copy outval into scratch now,
11040 this should only be necessary if outval is a
11041 subreg of something larger than a word. */
11042 /* XXX Might this clobber base? I can't see how it
11043 can, since scratch is known to overlap with
11044 outval. */
11045 emit_insn (gen_movhi (scratch_hi, outval));
11046 outval = scratch_hi;
11050 /* Get the base address; addsi3 knows how to handle constants
11051 that require more than one insn. */
11052 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11053 base = base_plus;
11054 offset = lo;
11058 if (BYTES_BIG_ENDIAN)
11060 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11061 plus_constant (base, offset + 1)),
11062 gen_lowpart (QImode, outval)));
11063 emit_insn (gen_lshrsi3 (scratch,
11064 gen_rtx_SUBREG (SImode, outval, 0),
11065 GEN_INT (8)));
11066 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11067 gen_lowpart (QImode, scratch)));
11069 else
11071 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11072 gen_lowpart (QImode, outval)));
11073 emit_insn (gen_lshrsi3 (scratch,
11074 gen_rtx_SUBREG (SImode, outval, 0),
11075 GEN_INT (8)));
11076 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11077 plus_constant (base, offset + 1)),
11078 gen_lowpart (QImode, scratch)));
11082 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11083 (padded to the size of a word) should be passed in a register. */
11085 static bool
11086 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11088 if (TARGET_AAPCS_BASED)
11089 return must_pass_in_stack_var_size (mode, type);
11090 else
11091 return must_pass_in_stack_var_size_or_pad (mode, type);
11095 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11096 Return true if an argument passed on the stack should be padded upwards,
11097 i.e. if the least-significant byte has useful data.
11098 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11099 aggregate types are placed in the lowest memory address. */
11101 bool
11102 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11104 if (!TARGET_AAPCS_BASED)
11105 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11107 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11108 return false;
11110 return true;
11114 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11115 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11116 byte of the register has useful data, and return the opposite if the
11117 most significant byte does.
11118 For AAPCS, small aggregates and small complex types are always padded
11119 upwards. */
11121 bool
11122 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11123 tree type, int first ATTRIBUTE_UNUSED)
11125 if (TARGET_AAPCS_BASED
11126 && BYTES_BIG_ENDIAN
11127 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11128 && int_size_in_bytes (type) <= 4)
11129 return true;
11131 /* Otherwise, use default padding. */
11132 return !BYTES_BIG_ENDIAN;
11136 /* Print a symbolic form of X to the debug file, F. */
11137 static void
11138 arm_print_value (FILE *f, rtx x)
11140 switch (GET_CODE (x))
11142 case CONST_INT:
11143 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11144 return;
11146 case CONST_DOUBLE:
11147 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11148 return;
11150 case CONST_VECTOR:
11152 int i;
11154 fprintf (f, "<");
11155 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11157 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11158 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11159 fputc (',', f);
11161 fprintf (f, ">");
11163 return;
11165 case CONST_STRING:
11166 fprintf (f, "\"%s\"", XSTR (x, 0));
11167 return;
11169 case SYMBOL_REF:
11170 fprintf (f, "`%s'", XSTR (x, 0));
11171 return;
11173 case LABEL_REF:
11174 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11175 return;
11177 case CONST:
11178 arm_print_value (f, XEXP (x, 0));
11179 return;
11181 case PLUS:
11182 arm_print_value (f, XEXP (x, 0));
11183 fprintf (f, "+");
11184 arm_print_value (f, XEXP (x, 1));
11185 return;
11187 case PC:
11188 fprintf (f, "pc");
11189 return;
11191 default:
11192 fprintf (f, "????");
11193 return;
11197 /* Routines for manipulation of the constant pool. */
11199 /* Arm instructions cannot load a large constant directly into a
11200 register; they have to come from a pc relative load. The constant
11201 must therefore be placed in the addressable range of the pc
11202 relative load. Depending on the precise pc relative load
11203 instruction the range is somewhere between 256 bytes and 4k. This
11204 means that we often have to dump a constant inside a function, and
11205 generate code to branch around it.
11207 It is important to minimize this, since the branches will slow
11208 things down and make the code larger.
11210 Normally we can hide the table after an existing unconditional
11211 branch so that there is no interruption of the flow, but in the
11212 worst case the code looks like this:
11214 ldr rn, L1
11216 b L2
11217 align
11218 L1: .long value
11222 ldr rn, L3
11224 b L4
11225 align
11226 L3: .long value
11230 We fix this by performing a scan after scheduling, which notices
11231 which instructions need to have their operands fetched from the
11232 constant table and builds the table.
11234 The algorithm starts by building a table of all the constants that
11235 need fixing up and all the natural barriers in the function (places
11236 where a constant table can be dropped without breaking the flow).
11237 For each fixup we note how far the pc-relative replacement will be
11238 able to reach and the offset of the instruction into the function.
11240 Having built the table we then group the fixes together to form
11241 tables that are as large as possible (subject to addressing
11242 constraints) and emit each table of constants after the last
11243 barrier that is within range of all the instructions in the group.
11244 If a group does not contain a barrier, then we forcibly create one
11245 by inserting a jump instruction into the flow. Once the table has
11246 been inserted, the insns are then modified to reference the
11247 relevant entry in the pool.
11249 Possible enhancements to the algorithm (not implemented) are:
11251 1) For some processors and object formats, there may be benefit in
11252 aligning the pools to the start of cache lines; this alignment
11253 would need to be taken into account when calculating addressability
11254 of a pool. */
11256 /* These typedefs are located at the start of this file, so that
11257 they can be used in the prototypes there. This comment is to
11258 remind readers of that fact so that the following structures
11259 can be understood more easily.
11261 typedef struct minipool_node Mnode;
11262 typedef struct minipool_fixup Mfix; */
11264 struct minipool_node
11266 /* Doubly linked chain of entries. */
11267 Mnode * next;
11268 Mnode * prev;
11269 /* The maximum offset into the code that this entry can be placed. While
11270 pushing fixes for forward references, all entries are sorted in order
11271 of increasing max_address. */
11272 HOST_WIDE_INT max_address;
11273 /* Similarly for an entry inserted for a backwards ref. */
11274 HOST_WIDE_INT min_address;
11275 /* The number of fixes referencing this entry. This can become zero
11276 if we "unpush" an entry. In this case we ignore the entry when we
11277 come to emit the code. */
11278 int refcount;
11279 /* The offset from the start of the minipool. */
11280 HOST_WIDE_INT offset;
11281 /* The value in table. */
11282 rtx value;
11283 /* The mode of value. */
11284 enum machine_mode mode;
11285 /* The size of the value. With iWMMXt enabled
11286 sizes > 4 also imply an alignment of 8-bytes. */
11287 int fix_size;
11290 struct minipool_fixup
11292 Mfix * next;
11293 rtx insn;
11294 HOST_WIDE_INT address;
11295 rtx * loc;
11296 enum machine_mode mode;
11297 int fix_size;
11298 rtx value;
11299 Mnode * minipool;
11300 HOST_WIDE_INT forwards;
11301 HOST_WIDE_INT backwards;
11304 /* Fixes less than a word need padding out to a word boundary. */
11305 #define MINIPOOL_FIX_SIZE(mode) \
11306 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11308 static Mnode * minipool_vector_head;
11309 static Mnode * minipool_vector_tail;
11310 static rtx minipool_vector_label;
11311 static int minipool_pad;
11313 /* The linked list of all minipool fixes required for this function. */
11314 Mfix * minipool_fix_head;
11315 Mfix * minipool_fix_tail;
11316 /* The fix entry for the current minipool, once it has been placed. */
11317 Mfix * minipool_barrier;
11319 /* Determines if INSN is the start of a jump table. Returns the end
11320 of the TABLE or NULL_RTX. */
11321 static rtx
11322 is_jump_table (rtx insn)
11324 rtx table;
11326 if (GET_CODE (insn) == JUMP_INSN
11327 && JUMP_LABEL (insn) != NULL
11328 && ((table = next_real_insn (JUMP_LABEL (insn)))
11329 == next_real_insn (insn))
11330 && table != NULL
11331 && GET_CODE (table) == JUMP_INSN
11332 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11333 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11334 return table;
11336 return NULL_RTX;
11339 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11340 #define JUMP_TABLES_IN_TEXT_SECTION 0
11341 #endif
11343 static HOST_WIDE_INT
11344 get_jump_table_size (rtx insn)
11346 /* ADDR_VECs only take room if read-only data does into the text
11347 section. */
11348 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11350 rtx body = PATTERN (insn);
11351 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11352 HOST_WIDE_INT size;
11353 HOST_WIDE_INT modesize;
11355 modesize = GET_MODE_SIZE (GET_MODE (body));
11356 size = modesize * XVECLEN (body, elt);
11357 switch (modesize)
11359 case 1:
11360 /* Round up size of TBB table to a halfword boundary. */
11361 size = (size + 1) & ~(HOST_WIDE_INT)1;
11362 break;
11363 case 2:
11364 /* No padding necessary for TBH. */
11365 break;
11366 case 4:
11367 /* Add two bytes for alignment on Thumb. */
11368 if (TARGET_THUMB)
11369 size += 2;
11370 break;
11371 default:
11372 gcc_unreachable ();
11374 return size;
11377 return 0;
11380 /* Move a minipool fix MP from its current location to before MAX_MP.
11381 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11382 constraints may need updating. */
11383 static Mnode *
11384 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11385 HOST_WIDE_INT max_address)
11387 /* The code below assumes these are different. */
11388 gcc_assert (mp != max_mp);
11390 if (max_mp == NULL)
11392 if (max_address < mp->max_address)
11393 mp->max_address = max_address;
11395 else
11397 if (max_address > max_mp->max_address - mp->fix_size)
11398 mp->max_address = max_mp->max_address - mp->fix_size;
11399 else
11400 mp->max_address = max_address;
11402 /* Unlink MP from its current position. Since max_mp is non-null,
11403 mp->prev must be non-null. */
11404 mp->prev->next = mp->next;
11405 if (mp->next != NULL)
11406 mp->next->prev = mp->prev;
11407 else
11408 minipool_vector_tail = mp->prev;
11410 /* Re-insert it before MAX_MP. */
11411 mp->next = max_mp;
11412 mp->prev = max_mp->prev;
11413 max_mp->prev = mp;
11415 if (mp->prev != NULL)
11416 mp->prev->next = mp;
11417 else
11418 minipool_vector_head = mp;
11421 /* Save the new entry. */
11422 max_mp = mp;
11424 /* Scan over the preceding entries and adjust their addresses as
11425 required. */
11426 while (mp->prev != NULL
11427 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11429 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11430 mp = mp->prev;
11433 return max_mp;
11436 /* Add a constant to the minipool for a forward reference. Returns the
11437 node added or NULL if the constant will not fit in this pool. */
11438 static Mnode *
11439 add_minipool_forward_ref (Mfix *fix)
11441 /* If set, max_mp is the first pool_entry that has a lower
11442 constraint than the one we are trying to add. */
11443 Mnode * max_mp = NULL;
11444 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11445 Mnode * mp;
11447 /* If the minipool starts before the end of FIX->INSN then this FIX
11448 can not be placed into the current pool. Furthermore, adding the
11449 new constant pool entry may cause the pool to start FIX_SIZE bytes
11450 earlier. */
11451 if (minipool_vector_head &&
11452 (fix->address + get_attr_length (fix->insn)
11453 >= minipool_vector_head->max_address - fix->fix_size))
11454 return NULL;
11456 /* Scan the pool to see if a constant with the same value has
11457 already been added. While we are doing this, also note the
11458 location where we must insert the constant if it doesn't already
11459 exist. */
11460 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11462 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11463 && fix->mode == mp->mode
11464 && (GET_CODE (fix->value) != CODE_LABEL
11465 || (CODE_LABEL_NUMBER (fix->value)
11466 == CODE_LABEL_NUMBER (mp->value)))
11467 && rtx_equal_p (fix->value, mp->value))
11469 /* More than one fix references this entry. */
11470 mp->refcount++;
11471 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11474 /* Note the insertion point if necessary. */
11475 if (max_mp == NULL
11476 && mp->max_address > max_address)
11477 max_mp = mp;
11479 /* If we are inserting an 8-bytes aligned quantity and
11480 we have not already found an insertion point, then
11481 make sure that all such 8-byte aligned quantities are
11482 placed at the start of the pool. */
11483 if (ARM_DOUBLEWORD_ALIGN
11484 && max_mp == NULL
11485 && fix->fix_size >= 8
11486 && mp->fix_size < 8)
11488 max_mp = mp;
11489 max_address = mp->max_address;
11493 /* The value is not currently in the minipool, so we need to create
11494 a new entry for it. If MAX_MP is NULL, the entry will be put on
11495 the end of the list since the placement is less constrained than
11496 any existing entry. Otherwise, we insert the new fix before
11497 MAX_MP and, if necessary, adjust the constraints on the other
11498 entries. */
11499 mp = XNEW (Mnode);
11500 mp->fix_size = fix->fix_size;
11501 mp->mode = fix->mode;
11502 mp->value = fix->value;
11503 mp->refcount = 1;
11504 /* Not yet required for a backwards ref. */
11505 mp->min_address = -65536;
11507 if (max_mp == NULL)
11509 mp->max_address = max_address;
11510 mp->next = NULL;
11511 mp->prev = minipool_vector_tail;
11513 if (mp->prev == NULL)
11515 minipool_vector_head = mp;
11516 minipool_vector_label = gen_label_rtx ();
11518 else
11519 mp->prev->next = mp;
11521 minipool_vector_tail = mp;
11523 else
11525 if (max_address > max_mp->max_address - mp->fix_size)
11526 mp->max_address = max_mp->max_address - mp->fix_size;
11527 else
11528 mp->max_address = max_address;
11530 mp->next = max_mp;
11531 mp->prev = max_mp->prev;
11532 max_mp->prev = mp;
11533 if (mp->prev != NULL)
11534 mp->prev->next = mp;
11535 else
11536 minipool_vector_head = mp;
11539 /* Save the new entry. */
11540 max_mp = mp;
11542 /* Scan over the preceding entries and adjust their addresses as
11543 required. */
11544 while (mp->prev != NULL
11545 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11547 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11548 mp = mp->prev;
11551 return max_mp;
11554 static Mnode *
11555 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11556 HOST_WIDE_INT min_address)
11558 HOST_WIDE_INT offset;
11560 /* The code below assumes these are different. */
11561 gcc_assert (mp != min_mp);
11563 if (min_mp == NULL)
11565 if (min_address > mp->min_address)
11566 mp->min_address = min_address;
11568 else
11570 /* We will adjust this below if it is too loose. */
11571 mp->min_address = min_address;
11573 /* Unlink MP from its current position. Since min_mp is non-null,
11574 mp->next must be non-null. */
11575 mp->next->prev = mp->prev;
11576 if (mp->prev != NULL)
11577 mp->prev->next = mp->next;
11578 else
11579 minipool_vector_head = mp->next;
11581 /* Reinsert it after MIN_MP. */
11582 mp->prev = min_mp;
11583 mp->next = min_mp->next;
11584 min_mp->next = mp;
11585 if (mp->next != NULL)
11586 mp->next->prev = mp;
11587 else
11588 minipool_vector_tail = mp;
11591 min_mp = mp;
11593 offset = 0;
11594 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11596 mp->offset = offset;
11597 if (mp->refcount > 0)
11598 offset += mp->fix_size;
11600 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11601 mp->next->min_address = mp->min_address + mp->fix_size;
11604 return min_mp;
11607 /* Add a constant to the minipool for a backward reference. Returns the
11608 node added or NULL if the constant will not fit in this pool.
11610 Note that the code for insertion for a backwards reference can be
11611 somewhat confusing because the calculated offsets for each fix do
11612 not take into account the size of the pool (which is still under
11613 construction. */
11614 static Mnode *
11615 add_minipool_backward_ref (Mfix *fix)
11617 /* If set, min_mp is the last pool_entry that has a lower constraint
11618 than the one we are trying to add. */
11619 Mnode *min_mp = NULL;
11620 /* This can be negative, since it is only a constraint. */
11621 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11622 Mnode *mp;
11624 /* If we can't reach the current pool from this insn, or if we can't
11625 insert this entry at the end of the pool without pushing other
11626 fixes out of range, then we don't try. This ensures that we
11627 can't fail later on. */
11628 if (min_address >= minipool_barrier->address
11629 || (minipool_vector_tail->min_address + fix->fix_size
11630 >= minipool_barrier->address))
11631 return NULL;
11633 /* Scan the pool to see if a constant with the same value has
11634 already been added. While we are doing this, also note the
11635 location where we must insert the constant if it doesn't already
11636 exist. */
11637 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11639 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11640 && fix->mode == mp->mode
11641 && (GET_CODE (fix->value) != CODE_LABEL
11642 || (CODE_LABEL_NUMBER (fix->value)
11643 == CODE_LABEL_NUMBER (mp->value)))
11644 && rtx_equal_p (fix->value, mp->value)
11645 /* Check that there is enough slack to move this entry to the
11646 end of the table (this is conservative). */
11647 && (mp->max_address
11648 > (minipool_barrier->address
11649 + minipool_vector_tail->offset
11650 + minipool_vector_tail->fix_size)))
11652 mp->refcount++;
11653 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11656 if (min_mp != NULL)
11657 mp->min_address += fix->fix_size;
11658 else
11660 /* Note the insertion point if necessary. */
11661 if (mp->min_address < min_address)
11663 /* For now, we do not allow the insertion of 8-byte alignment
11664 requiring nodes anywhere but at the start of the pool. */
11665 if (ARM_DOUBLEWORD_ALIGN
11666 && fix->fix_size >= 8 && mp->fix_size < 8)
11667 return NULL;
11668 else
11669 min_mp = mp;
11671 else if (mp->max_address
11672 < minipool_barrier->address + mp->offset + fix->fix_size)
11674 /* Inserting before this entry would push the fix beyond
11675 its maximum address (which can happen if we have
11676 re-located a forwards fix); force the new fix to come
11677 after it. */
11678 if (ARM_DOUBLEWORD_ALIGN
11679 && fix->fix_size >= 8 && mp->fix_size < 8)
11680 return NULL;
11681 else
11683 min_mp = mp;
11684 min_address = mp->min_address + fix->fix_size;
11687 /* Do not insert a non-8-byte aligned quantity before 8-byte
11688 aligned quantities. */
11689 else if (ARM_DOUBLEWORD_ALIGN
11690 && fix->fix_size < 8
11691 && mp->fix_size >= 8)
11693 min_mp = mp;
11694 min_address = mp->min_address + fix->fix_size;
11699 /* We need to create a new entry. */
11700 mp = XNEW (Mnode);
11701 mp->fix_size = fix->fix_size;
11702 mp->mode = fix->mode;
11703 mp->value = fix->value;
11704 mp->refcount = 1;
11705 mp->max_address = minipool_barrier->address + 65536;
11707 mp->min_address = min_address;
11709 if (min_mp == NULL)
11711 mp->prev = NULL;
11712 mp->next = minipool_vector_head;
11714 if (mp->next == NULL)
11716 minipool_vector_tail = mp;
11717 minipool_vector_label = gen_label_rtx ();
11719 else
11720 mp->next->prev = mp;
11722 minipool_vector_head = mp;
11724 else
11726 mp->next = min_mp->next;
11727 mp->prev = min_mp;
11728 min_mp->next = mp;
11730 if (mp->next != NULL)
11731 mp->next->prev = mp;
11732 else
11733 minipool_vector_tail = mp;
11736 /* Save the new entry. */
11737 min_mp = mp;
11739 if (mp->prev)
11740 mp = mp->prev;
11741 else
11742 mp->offset = 0;
11744 /* Scan over the following entries and adjust their offsets. */
11745 while (mp->next != NULL)
11747 if (mp->next->min_address < mp->min_address + mp->fix_size)
11748 mp->next->min_address = mp->min_address + mp->fix_size;
11750 if (mp->refcount)
11751 mp->next->offset = mp->offset + mp->fix_size;
11752 else
11753 mp->next->offset = mp->offset;
11755 mp = mp->next;
11758 return min_mp;
11761 static void
11762 assign_minipool_offsets (Mfix *barrier)
11764 HOST_WIDE_INT offset = 0;
11765 Mnode *mp;
11767 minipool_barrier = barrier;
11769 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11771 mp->offset = offset;
11773 if (mp->refcount > 0)
11774 offset += mp->fix_size;
11778 /* Output the literal table */
11779 static void
11780 dump_minipool (rtx scan)
11782 Mnode * mp;
11783 Mnode * nmp;
11784 int align64 = 0;
11786 if (ARM_DOUBLEWORD_ALIGN)
11787 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11788 if (mp->refcount > 0 && mp->fix_size >= 8)
11790 align64 = 1;
11791 break;
11794 if (dump_file)
11795 fprintf (dump_file,
11796 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11797 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11799 scan = emit_label_after (gen_label_rtx (), scan);
11800 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11801 scan = emit_label_after (minipool_vector_label, scan);
11803 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11805 if (mp->refcount > 0)
11807 if (dump_file)
11809 fprintf (dump_file,
11810 ";; Offset %u, min %ld, max %ld ",
11811 (unsigned) mp->offset, (unsigned long) mp->min_address,
11812 (unsigned long) mp->max_address);
11813 arm_print_value (dump_file, mp->value);
11814 fputc ('\n', dump_file);
11817 switch (mp->fix_size)
11819 #ifdef HAVE_consttable_1
11820 case 1:
11821 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11822 break;
11824 #endif
11825 #ifdef HAVE_consttable_2
11826 case 2:
11827 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11828 break;
11830 #endif
11831 #ifdef HAVE_consttable_4
11832 case 4:
11833 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11834 break;
11836 #endif
11837 #ifdef HAVE_consttable_8
11838 case 8:
11839 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11840 break;
11842 #endif
11843 #ifdef HAVE_consttable_16
11844 case 16:
11845 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11846 break;
11848 #endif
11849 default:
11850 gcc_unreachable ();
11854 nmp = mp->next;
11855 free (mp);
11858 minipool_vector_head = minipool_vector_tail = NULL;
11859 scan = emit_insn_after (gen_consttable_end (), scan);
11860 scan = emit_barrier_after (scan);
11863 /* Return the cost of forcibly inserting a barrier after INSN. */
11864 static int
11865 arm_barrier_cost (rtx insn)
11867 /* Basing the location of the pool on the loop depth is preferable,
11868 but at the moment, the basic block information seems to be
11869 corrupt by this stage of the compilation. */
11870 int base_cost = 50;
11871 rtx next = next_nonnote_insn (insn);
11873 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11874 base_cost -= 20;
11876 switch (GET_CODE (insn))
11878 case CODE_LABEL:
11879 /* It will always be better to place the table before the label, rather
11880 than after it. */
11881 return 50;
11883 case INSN:
11884 case CALL_INSN:
11885 return base_cost;
11887 case JUMP_INSN:
11888 return base_cost - 10;
11890 default:
11891 return base_cost + 10;
11895 /* Find the best place in the insn stream in the range
11896 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11897 Create the barrier by inserting a jump and add a new fix entry for
11898 it. */
11899 static Mfix *
11900 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11902 HOST_WIDE_INT count = 0;
11903 rtx barrier;
11904 rtx from = fix->insn;
11905 /* The instruction after which we will insert the jump. */
11906 rtx selected = NULL;
11907 int selected_cost;
11908 /* The address at which the jump instruction will be placed. */
11909 HOST_WIDE_INT selected_address;
11910 Mfix * new_fix;
11911 HOST_WIDE_INT max_count = max_address - fix->address;
11912 rtx label = gen_label_rtx ();
11914 selected_cost = arm_barrier_cost (from);
11915 selected_address = fix->address;
11917 while (from && count < max_count)
11919 rtx tmp;
11920 int new_cost;
11922 /* This code shouldn't have been called if there was a natural barrier
11923 within range. */
11924 gcc_assert (GET_CODE (from) != BARRIER);
11926 /* Count the length of this insn. */
11927 count += get_attr_length (from);
11929 /* If there is a jump table, add its length. */
11930 tmp = is_jump_table (from);
11931 if (tmp != NULL)
11933 count += get_jump_table_size (tmp);
11935 /* Jump tables aren't in a basic block, so base the cost on
11936 the dispatch insn. If we select this location, we will
11937 still put the pool after the table. */
11938 new_cost = arm_barrier_cost (from);
11940 if (count < max_count
11941 && (!selected || new_cost <= selected_cost))
11943 selected = tmp;
11944 selected_cost = new_cost;
11945 selected_address = fix->address + count;
11948 /* Continue after the dispatch table. */
11949 from = NEXT_INSN (tmp);
11950 continue;
11953 new_cost = arm_barrier_cost (from);
11955 if (count < max_count
11956 && (!selected || new_cost <= selected_cost))
11958 selected = from;
11959 selected_cost = new_cost;
11960 selected_address = fix->address + count;
11963 from = NEXT_INSN (from);
11966 /* Make sure that we found a place to insert the jump. */
11967 gcc_assert (selected);
11969 /* Create a new JUMP_INSN that branches around a barrier. */
11970 from = emit_jump_insn_after (gen_jump (label), selected);
11971 JUMP_LABEL (from) = label;
11972 barrier = emit_barrier_after (from);
11973 emit_label_after (label, barrier);
11975 /* Create a minipool barrier entry for the new barrier. */
11976 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11977 new_fix->insn = barrier;
11978 new_fix->address = selected_address;
11979 new_fix->next = fix->next;
11980 fix->next = new_fix;
11982 return new_fix;
11985 /* Record that there is a natural barrier in the insn stream at
11986 ADDRESS. */
11987 static void
11988 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11990 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11992 fix->insn = insn;
11993 fix->address = address;
11995 fix->next = NULL;
11996 if (minipool_fix_head != NULL)
11997 minipool_fix_tail->next = fix;
11998 else
11999 minipool_fix_head = fix;
12001 minipool_fix_tail = fix;
12004 /* Record INSN, which will need fixing up to load a value from the
12005 minipool. ADDRESS is the offset of the insn since the start of the
12006 function; LOC is a pointer to the part of the insn which requires
12007 fixing; VALUE is the constant that must be loaded, which is of type
12008 MODE. */
12009 static void
12010 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12011 enum machine_mode mode, rtx value)
12013 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12015 fix->insn = insn;
12016 fix->address = address;
12017 fix->loc = loc;
12018 fix->mode = mode;
12019 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12020 fix->value = value;
12021 fix->forwards = get_attr_pool_range (insn);
12022 fix->backwards = get_attr_neg_pool_range (insn);
12023 fix->minipool = NULL;
12025 /* If an insn doesn't have a range defined for it, then it isn't
12026 expecting to be reworked by this code. Better to stop now than
12027 to generate duff assembly code. */
12028 gcc_assert (fix->forwards || fix->backwards);
12030 /* If an entry requires 8-byte alignment then assume all constant pools
12031 require 4 bytes of padding. Trying to do this later on a per-pool
12032 basis is awkward because existing pool entries have to be modified. */
12033 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12034 minipool_pad = 4;
12036 if (dump_file)
12038 fprintf (dump_file,
12039 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12040 GET_MODE_NAME (mode),
12041 INSN_UID (insn), (unsigned long) address,
12042 -1 * (long)fix->backwards, (long)fix->forwards);
12043 arm_print_value (dump_file, fix->value);
12044 fprintf (dump_file, "\n");
12047 /* Add it to the chain of fixes. */
12048 fix->next = NULL;
12050 if (minipool_fix_head != NULL)
12051 minipool_fix_tail->next = fix;
12052 else
12053 minipool_fix_head = fix;
12055 minipool_fix_tail = fix;
12058 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12059 Returns the number of insns needed, or 99 if we don't know how to
12060 do it. */
12062 arm_const_double_inline_cost (rtx val)
12064 rtx lowpart, highpart;
12065 enum machine_mode mode;
12067 mode = GET_MODE (val);
12069 if (mode == VOIDmode)
12070 mode = DImode;
12072 gcc_assert (GET_MODE_SIZE (mode) == 8);
12074 lowpart = gen_lowpart (SImode, val);
12075 highpart = gen_highpart_mode (SImode, mode, val);
12077 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12078 gcc_assert (GET_CODE (highpart) == CONST_INT);
12080 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12081 NULL_RTX, NULL_RTX, 0, 0)
12082 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12083 NULL_RTX, NULL_RTX, 0, 0));
12086 /* Return true if it is worthwhile to split a 64-bit constant into two
12087 32-bit operations. This is the case if optimizing for size, or
12088 if we have load delay slots, or if one 32-bit part can be done with
12089 a single data operation. */
12090 bool
12091 arm_const_double_by_parts (rtx val)
12093 enum machine_mode mode = GET_MODE (val);
12094 rtx part;
12096 if (optimize_size || arm_ld_sched)
12097 return true;
12099 if (mode == VOIDmode)
12100 mode = DImode;
12102 part = gen_highpart_mode (SImode, mode, val);
12104 gcc_assert (GET_CODE (part) == CONST_INT);
12106 if (const_ok_for_arm (INTVAL (part))
12107 || const_ok_for_arm (~INTVAL (part)))
12108 return true;
12110 part = gen_lowpart (SImode, val);
12112 gcc_assert (GET_CODE (part) == CONST_INT);
12114 if (const_ok_for_arm (INTVAL (part))
12115 || const_ok_for_arm (~INTVAL (part)))
12116 return true;
12118 return false;
12121 /* Return true if it is possible to inline both the high and low parts
12122 of a 64-bit constant into 32-bit data processing instructions. */
12123 bool
12124 arm_const_double_by_immediates (rtx val)
12126 enum machine_mode mode = GET_MODE (val);
12127 rtx part;
12129 if (mode == VOIDmode)
12130 mode = DImode;
12132 part = gen_highpart_mode (SImode, mode, val);
12134 gcc_assert (GET_CODE (part) == CONST_INT);
12136 if (!const_ok_for_arm (INTVAL (part)))
12137 return false;
12139 part = gen_lowpart (SImode, val);
12141 gcc_assert (GET_CODE (part) == CONST_INT);
12143 if (!const_ok_for_arm (INTVAL (part)))
12144 return false;
12146 return true;
12149 /* Scan INSN and note any of its operands that need fixing.
12150 If DO_PUSHES is false we do not actually push any of the fixups
12151 needed. The function returns TRUE if any fixups were needed/pushed.
12152 This is used by arm_memory_load_p() which needs to know about loads
12153 of constants that will be converted into minipool loads. */
12154 static bool
12155 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12157 bool result = false;
12158 int opno;
12160 extract_insn (insn);
12162 if (!constrain_operands (1))
12163 fatal_insn_not_found (insn);
12165 if (recog_data.n_alternatives == 0)
12166 return false;
12168 /* Fill in recog_op_alt with information about the constraints of
12169 this insn. */
12170 preprocess_constraints ();
12172 for (opno = 0; opno < recog_data.n_operands; opno++)
12174 /* Things we need to fix can only occur in inputs. */
12175 if (recog_data.operand_type[opno] != OP_IN)
12176 continue;
12178 /* If this alternative is a memory reference, then any mention
12179 of constants in this alternative is really to fool reload
12180 into allowing us to accept one there. We need to fix them up
12181 now so that we output the right code. */
12182 if (recog_op_alt[opno][which_alternative].memory_ok)
12184 rtx op = recog_data.operand[opno];
12186 if (CONSTANT_P (op))
12188 if (do_pushes)
12189 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12190 recog_data.operand_mode[opno], op);
12191 result = true;
12193 else if (GET_CODE (op) == MEM
12194 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12195 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12197 if (do_pushes)
12199 rtx cop = avoid_constant_pool_reference (op);
12201 /* Casting the address of something to a mode narrower
12202 than a word can cause avoid_constant_pool_reference()
12203 to return the pool reference itself. That's no good to
12204 us here. Lets just hope that we can use the
12205 constant pool value directly. */
12206 if (op == cop)
12207 cop = get_pool_constant (XEXP (op, 0));
12209 push_minipool_fix (insn, address,
12210 recog_data.operand_loc[opno],
12211 recog_data.operand_mode[opno], cop);
12214 result = true;
12219 return result;
12222 /* Convert instructions to their cc-clobbering variant if possible, since
12223 that allows us to use smaller encodings. */
12225 static void
12226 thumb2_reorg (void)
12228 basic_block bb;
12229 regset_head live;
12231 INIT_REG_SET (&live);
12233 /* We are freeing block_for_insn in the toplev to keep compatibility
12234 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12235 compute_bb_for_insn ();
12236 df_analyze ();
12238 FOR_EACH_BB (bb)
12240 rtx insn;
12242 COPY_REG_SET (&live, DF_LR_OUT (bb));
12243 df_simulate_initialize_backwards (bb, &live);
12244 FOR_BB_INSNS_REVERSE (bb, insn)
12246 if (NONJUMP_INSN_P (insn)
12247 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12249 rtx pat = PATTERN (insn);
12250 if (GET_CODE (pat) == SET
12251 && low_register_operand (XEXP (pat, 0), SImode)
12252 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12253 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12254 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12256 rtx dst = XEXP (pat, 0);
12257 rtx src = XEXP (pat, 1);
12258 rtx op0 = XEXP (src, 0);
12259 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12260 ? XEXP (src, 1) : NULL);
12262 if (rtx_equal_p (dst, op0)
12263 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12265 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12266 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12267 rtvec vec = gen_rtvec (2, pat, clobber);
12269 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12270 INSN_CODE (insn) = -1;
12272 /* We can also handle a commutative operation where the
12273 second operand matches the destination. */
12274 else if (op1 && rtx_equal_p (dst, op1))
12276 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12277 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12278 rtvec vec;
12280 src = copy_rtx (src);
12281 XEXP (src, 0) = op1;
12282 XEXP (src, 1) = op0;
12283 pat = gen_rtx_SET (VOIDmode, dst, src);
12284 vec = gen_rtvec (2, pat, clobber);
12285 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12286 INSN_CODE (insn) = -1;
12291 if (NONDEBUG_INSN_P (insn))
12292 df_simulate_one_insn_backwards (bb, insn, &live);
12296 CLEAR_REG_SET (&live);
12299 /* Gcc puts the pool in the wrong place for ARM, since we can only
12300 load addresses a limited distance around the pc. We do some
12301 special munging to move the constant pool values to the correct
12302 point in the code. */
12303 static void
12304 arm_reorg (void)
12306 rtx insn;
12307 HOST_WIDE_INT address = 0;
12308 Mfix * fix;
12310 if (TARGET_THUMB2)
12311 thumb2_reorg ();
12313 minipool_fix_head = minipool_fix_tail = NULL;
12315 /* The first insn must always be a note, or the code below won't
12316 scan it properly. */
12317 insn = get_insns ();
12318 gcc_assert (GET_CODE (insn) == NOTE);
12319 minipool_pad = 0;
12321 /* Scan all the insns and record the operands that will need fixing. */
12322 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12324 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12325 && (arm_cirrus_insn_p (insn)
12326 || GET_CODE (insn) == JUMP_INSN
12327 || arm_memory_load_p (insn)))
12328 cirrus_reorg (insn);
12330 if (GET_CODE (insn) == BARRIER)
12331 push_minipool_barrier (insn, address);
12332 else if (INSN_P (insn))
12334 rtx table;
12336 note_invalid_constants (insn, address, true);
12337 address += get_attr_length (insn);
12339 /* If the insn is a vector jump, add the size of the table
12340 and skip the table. */
12341 if ((table = is_jump_table (insn)) != NULL)
12343 address += get_jump_table_size (table);
12344 insn = table;
12349 fix = minipool_fix_head;
12351 /* Now scan the fixups and perform the required changes. */
12352 while (fix)
12354 Mfix * ftmp;
12355 Mfix * fdel;
12356 Mfix * last_added_fix;
12357 Mfix * last_barrier = NULL;
12358 Mfix * this_fix;
12360 /* Skip any further barriers before the next fix. */
12361 while (fix && GET_CODE (fix->insn) == BARRIER)
12362 fix = fix->next;
12364 /* No more fixes. */
12365 if (fix == NULL)
12366 break;
12368 last_added_fix = NULL;
12370 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12372 if (GET_CODE (ftmp->insn) == BARRIER)
12374 if (ftmp->address >= minipool_vector_head->max_address)
12375 break;
12377 last_barrier = ftmp;
12379 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12380 break;
12382 last_added_fix = ftmp; /* Keep track of the last fix added. */
12385 /* If we found a barrier, drop back to that; any fixes that we
12386 could have reached but come after the barrier will now go in
12387 the next mini-pool. */
12388 if (last_barrier != NULL)
12390 /* Reduce the refcount for those fixes that won't go into this
12391 pool after all. */
12392 for (fdel = last_barrier->next;
12393 fdel && fdel != ftmp;
12394 fdel = fdel->next)
12396 fdel->minipool->refcount--;
12397 fdel->minipool = NULL;
12400 ftmp = last_barrier;
12402 else
12404 /* ftmp is first fix that we can't fit into this pool and
12405 there no natural barriers that we could use. Insert a
12406 new barrier in the code somewhere between the previous
12407 fix and this one, and arrange to jump around it. */
12408 HOST_WIDE_INT max_address;
12410 /* The last item on the list of fixes must be a barrier, so
12411 we can never run off the end of the list of fixes without
12412 last_barrier being set. */
12413 gcc_assert (ftmp);
12415 max_address = minipool_vector_head->max_address;
12416 /* Check that there isn't another fix that is in range that
12417 we couldn't fit into this pool because the pool was
12418 already too large: we need to put the pool before such an
12419 instruction. The pool itself may come just after the
12420 fix because create_fix_barrier also allows space for a
12421 jump instruction. */
12422 if (ftmp->address < max_address)
12423 max_address = ftmp->address + 1;
12425 last_barrier = create_fix_barrier (last_added_fix, max_address);
12428 assign_minipool_offsets (last_barrier);
12430 while (ftmp)
12432 if (GET_CODE (ftmp->insn) != BARRIER
12433 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12434 == NULL))
12435 break;
12437 ftmp = ftmp->next;
12440 /* Scan over the fixes we have identified for this pool, fixing them
12441 up and adding the constants to the pool itself. */
12442 for (this_fix = fix; this_fix && ftmp != this_fix;
12443 this_fix = this_fix->next)
12444 if (GET_CODE (this_fix->insn) != BARRIER)
12446 rtx addr
12447 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12448 minipool_vector_label),
12449 this_fix->minipool->offset);
12450 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12453 dump_minipool (last_barrier->insn);
12454 fix = ftmp;
12457 /* From now on we must synthesize any constants that we can't handle
12458 directly. This can happen if the RTL gets split during final
12459 instruction generation. */
12460 after_arm_reorg = 1;
12462 /* Free the minipool memory. */
12463 obstack_free (&minipool_obstack, minipool_startobj);
12466 /* Routines to output assembly language. */
12468 /* If the rtx is the correct value then return the string of the number.
12469 In this way we can ensure that valid double constants are generated even
12470 when cross compiling. */
12471 const char *
12472 fp_immediate_constant (rtx x)
12474 REAL_VALUE_TYPE r;
12475 int i;
12477 if (!fp_consts_inited)
12478 init_fp_table ();
12480 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12481 for (i = 0; i < 8; i++)
12482 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12483 return strings_fp[i];
12485 gcc_unreachable ();
12488 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12489 static const char *
12490 fp_const_from_val (REAL_VALUE_TYPE *r)
12492 int i;
12494 if (!fp_consts_inited)
12495 init_fp_table ();
12497 for (i = 0; i < 8; i++)
12498 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12499 return strings_fp[i];
12501 gcc_unreachable ();
12504 /* Output the operands of a LDM/STM instruction to STREAM.
12505 MASK is the ARM register set mask of which only bits 0-15 are important.
12506 REG is the base register, either the frame pointer or the stack pointer,
12507 INSTR is the possibly suffixed load or store instruction.
12508 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12510 static void
12511 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12512 unsigned long mask, int rfe)
12514 unsigned i;
12515 bool not_first = FALSE;
12517 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12518 fputc ('\t', stream);
12519 asm_fprintf (stream, instr, reg);
12520 fputc ('{', stream);
12522 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12523 if (mask & (1 << i))
12525 if (not_first)
12526 fprintf (stream, ", ");
12528 asm_fprintf (stream, "%r", i);
12529 not_first = TRUE;
12532 if (rfe)
12533 fprintf (stream, "}^\n");
12534 else
12535 fprintf (stream, "}\n");
12539 /* Output a FLDMD instruction to STREAM.
12540 BASE if the register containing the address.
12541 REG and COUNT specify the register range.
12542 Extra registers may be added to avoid hardware bugs.
12544 We output FLDMD even for ARMv5 VFP implementations. Although
12545 FLDMD is technically not supported until ARMv6, it is believed
12546 that all VFP implementations support its use in this context. */
12548 static void
12549 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12551 int i;
12553 /* Workaround ARM10 VFPr1 bug. */
12554 if (count == 2 && !arm_arch6)
12556 if (reg == 15)
12557 reg--;
12558 count++;
12561 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12562 load into multiple parts if we have to handle more than 16 registers. */
12563 if (count > 16)
12565 vfp_output_fldmd (stream, base, reg, 16);
12566 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12567 return;
12570 fputc ('\t', stream);
12571 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12573 for (i = reg; i < reg + count; i++)
12575 if (i > reg)
12576 fputs (", ", stream);
12577 asm_fprintf (stream, "d%d", i);
12579 fputs ("}\n", stream);
12584 /* Output the assembly for a store multiple. */
12586 const char *
12587 vfp_output_fstmd (rtx * operands)
12589 char pattern[100];
12590 int p;
12591 int base;
12592 int i;
12594 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12595 p = strlen (pattern);
12597 gcc_assert (GET_CODE (operands[1]) == REG);
12599 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12600 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12602 p += sprintf (&pattern[p], ", d%d", base + i);
12604 strcpy (&pattern[p], "}");
12606 output_asm_insn (pattern, operands);
12607 return "";
12611 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12612 number of bytes pushed. */
12614 static int
12615 vfp_emit_fstmd (int base_reg, int count)
12617 rtx par;
12618 rtx dwarf;
12619 rtx tmp, reg;
12620 int i;
12622 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12623 register pairs are stored by a store multiple insn. We avoid this
12624 by pushing an extra pair. */
12625 if (count == 2 && !arm_arch6)
12627 if (base_reg == LAST_VFP_REGNUM - 3)
12628 base_reg -= 2;
12629 count++;
12632 /* FSTMD may not store more than 16 doubleword registers at once. Split
12633 larger stores into multiple parts (up to a maximum of two, in
12634 practice). */
12635 if (count > 16)
12637 int saved;
12638 /* NOTE: base_reg is an internal register number, so each D register
12639 counts as 2. */
12640 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12641 saved += vfp_emit_fstmd (base_reg, 16);
12642 return saved;
12645 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12646 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12648 reg = gen_rtx_REG (DFmode, base_reg);
12649 base_reg += 2;
12651 XVECEXP (par, 0, 0)
12652 = gen_rtx_SET (VOIDmode,
12653 gen_frame_mem
12654 (BLKmode,
12655 gen_rtx_PRE_MODIFY (Pmode,
12656 stack_pointer_rtx,
12657 plus_constant
12658 (stack_pointer_rtx,
12659 - (count * 8)))
12661 gen_rtx_UNSPEC (BLKmode,
12662 gen_rtvec (1, reg),
12663 UNSPEC_PUSH_MULT));
12665 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12666 plus_constant (stack_pointer_rtx, -(count * 8)));
12667 RTX_FRAME_RELATED_P (tmp) = 1;
12668 XVECEXP (dwarf, 0, 0) = tmp;
12670 tmp = gen_rtx_SET (VOIDmode,
12671 gen_frame_mem (DFmode, stack_pointer_rtx),
12672 reg);
12673 RTX_FRAME_RELATED_P (tmp) = 1;
12674 XVECEXP (dwarf, 0, 1) = tmp;
12676 for (i = 1; i < count; i++)
12678 reg = gen_rtx_REG (DFmode, base_reg);
12679 base_reg += 2;
12680 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12682 tmp = gen_rtx_SET (VOIDmode,
12683 gen_frame_mem (DFmode,
12684 plus_constant (stack_pointer_rtx,
12685 i * 8)),
12686 reg);
12687 RTX_FRAME_RELATED_P (tmp) = 1;
12688 XVECEXP (dwarf, 0, i + 1) = tmp;
12691 par = emit_insn (par);
12692 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12693 RTX_FRAME_RELATED_P (par) = 1;
12695 return count * 8;
12698 /* Emit a call instruction with pattern PAT. ADDR is the address of
12699 the call target. */
12701 void
12702 arm_emit_call_insn (rtx pat, rtx addr)
12704 rtx insn;
12706 insn = emit_call_insn (pat);
12708 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12709 If the call might use such an entry, add a use of the PIC register
12710 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12711 if (TARGET_VXWORKS_RTP
12712 && flag_pic
12713 && GET_CODE (addr) == SYMBOL_REF
12714 && (SYMBOL_REF_DECL (addr)
12715 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12716 : !SYMBOL_REF_LOCAL_P (addr)))
12718 require_pic_register ();
12719 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12723 /* Output a 'call' insn. */
12724 const char *
12725 output_call (rtx *operands)
12727 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12729 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12730 if (REGNO (operands[0]) == LR_REGNUM)
12732 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12733 output_asm_insn ("mov%?\t%0, %|lr", operands);
12736 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12738 if (TARGET_INTERWORK || arm_arch4t)
12739 output_asm_insn ("bx%?\t%0", operands);
12740 else
12741 output_asm_insn ("mov%?\t%|pc, %0", operands);
12743 return "";
12746 /* Output a 'call' insn that is a reference in memory. This is
12747 disabled for ARMv5 and we prefer a blx instead because otherwise
12748 there's a significant performance overhead. */
12749 const char *
12750 output_call_mem (rtx *operands)
12752 gcc_assert (!arm_arch5);
12753 if (TARGET_INTERWORK)
12755 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12756 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12757 output_asm_insn ("bx%?\t%|ip", operands);
12759 else if (regno_use_in (LR_REGNUM, operands[0]))
12761 /* LR is used in the memory address. We load the address in the
12762 first instruction. It's safe to use IP as the target of the
12763 load since the call will kill it anyway. */
12764 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12765 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12766 if (arm_arch4t)
12767 output_asm_insn ("bx%?\t%|ip", operands);
12768 else
12769 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12771 else
12773 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12774 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12777 return "";
12781 /* Output a move from arm registers to an fpa registers.
12782 OPERANDS[0] is an fpa register.
12783 OPERANDS[1] is the first registers of an arm register pair. */
12784 const char *
12785 output_mov_long_double_fpa_from_arm (rtx *operands)
12787 int arm_reg0 = REGNO (operands[1]);
12788 rtx ops[3];
12790 gcc_assert (arm_reg0 != IP_REGNUM);
12792 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12793 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12794 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12796 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12797 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12799 return "";
12802 /* Output a move from an fpa register to arm registers.
12803 OPERANDS[0] is the first registers of an arm register pair.
12804 OPERANDS[1] is an fpa register. */
12805 const char *
12806 output_mov_long_double_arm_from_fpa (rtx *operands)
12808 int arm_reg0 = REGNO (operands[0]);
12809 rtx ops[3];
12811 gcc_assert (arm_reg0 != IP_REGNUM);
12813 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12814 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12815 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12817 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12818 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12819 return "";
12822 /* Output a move from arm registers to arm registers of a long double
12823 OPERANDS[0] is the destination.
12824 OPERANDS[1] is the source. */
12825 const char *
12826 output_mov_long_double_arm_from_arm (rtx *operands)
12828 /* We have to be careful here because the two might overlap. */
12829 int dest_start = REGNO (operands[0]);
12830 int src_start = REGNO (operands[1]);
12831 rtx ops[2];
12832 int i;
12834 if (dest_start < src_start)
12836 for (i = 0; i < 3; i++)
12838 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12839 ops[1] = gen_rtx_REG (SImode, src_start + i);
12840 output_asm_insn ("mov%?\t%0, %1", ops);
12843 else
12845 for (i = 2; i >= 0; i--)
12847 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12848 ops[1] = gen_rtx_REG (SImode, src_start + i);
12849 output_asm_insn ("mov%?\t%0, %1", ops);
12853 return "";
12856 void
12857 arm_emit_movpair (rtx dest, rtx src)
12859 /* If the src is an immediate, simplify it. */
12860 if (CONST_INT_P (src))
12862 HOST_WIDE_INT val = INTVAL (src);
12863 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12864 if ((val >> 16) & 0x0000ffff)
12865 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12866 GEN_INT (16)),
12867 GEN_INT ((val >> 16) & 0x0000ffff));
12868 return;
12870 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12871 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12874 /* Output a move from arm registers to an fpa registers.
12875 OPERANDS[0] is an fpa register.
12876 OPERANDS[1] is the first registers of an arm register pair. */
12877 const char *
12878 output_mov_double_fpa_from_arm (rtx *operands)
12880 int arm_reg0 = REGNO (operands[1]);
12881 rtx ops[2];
12883 gcc_assert (arm_reg0 != IP_REGNUM);
12885 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12886 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12887 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12888 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12889 return "";
12892 /* Output a move from an fpa register to arm registers.
12893 OPERANDS[0] is the first registers of an arm register pair.
12894 OPERANDS[1] is an fpa register. */
12895 const char *
12896 output_mov_double_arm_from_fpa (rtx *operands)
12898 int arm_reg0 = REGNO (operands[0]);
12899 rtx ops[2];
12901 gcc_assert (arm_reg0 != IP_REGNUM);
12903 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12904 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12905 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12906 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12907 return "";
12910 /* Output a move between double words. It must be REG<-MEM
12911 or MEM<-REG. */
12912 const char *
12913 output_move_double (rtx *operands)
12915 enum rtx_code code0 = GET_CODE (operands[0]);
12916 enum rtx_code code1 = GET_CODE (operands[1]);
12917 rtx otherops[3];
12919 if (code0 == REG)
12921 unsigned int reg0 = REGNO (operands[0]);
12923 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12925 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12927 switch (GET_CODE (XEXP (operands[1], 0)))
12929 case REG:
12930 if (TARGET_LDRD
12931 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12932 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12933 else
12934 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12935 break;
12937 case PRE_INC:
12938 gcc_assert (TARGET_LDRD);
12939 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12940 break;
12942 case PRE_DEC:
12943 if (TARGET_LDRD)
12944 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12945 else
12946 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12947 break;
12949 case POST_INC:
12950 if (TARGET_LDRD)
12951 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12952 else
12953 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12954 break;
12956 case POST_DEC:
12957 gcc_assert (TARGET_LDRD);
12958 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12959 break;
12961 case PRE_MODIFY:
12962 case POST_MODIFY:
12963 /* Autoicrement addressing modes should never have overlapping
12964 base and destination registers, and overlapping index registers
12965 are already prohibited, so this doesn't need to worry about
12966 fix_cm3_ldrd. */
12967 otherops[0] = operands[0];
12968 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12969 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12971 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12973 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12975 /* Registers overlap so split out the increment. */
12976 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12977 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12979 else
12981 /* Use a single insn if we can.
12982 FIXME: IWMMXT allows offsets larger than ldrd can
12983 handle, fix these up with a pair of ldr. */
12984 if (TARGET_THUMB2
12985 || GET_CODE (otherops[2]) != CONST_INT
12986 || (INTVAL (otherops[2]) > -256
12987 && INTVAL (otherops[2]) < 256))
12988 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12989 else
12991 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12992 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12996 else
12998 /* Use a single insn if we can.
12999 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13000 fix these up with a pair of ldr. */
13001 if (TARGET_THUMB2
13002 || GET_CODE (otherops[2]) != CONST_INT
13003 || (INTVAL (otherops[2]) > -256
13004 && INTVAL (otherops[2]) < 256))
13005 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13006 else
13008 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13009 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13012 break;
13014 case LABEL_REF:
13015 case CONST:
13016 /* We might be able to use ldrd %0, %1 here. However the range is
13017 different to ldr/adr, and it is broken on some ARMv7-M
13018 implementations. */
13019 /* Use the second register of the pair to avoid problematic
13020 overlap. */
13021 otherops[1] = operands[1];
13022 output_asm_insn ("adr%?\t%0, %1", otherops);
13023 operands[1] = otherops[0];
13024 if (TARGET_LDRD)
13025 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13026 else
13027 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13028 break;
13030 /* ??? This needs checking for thumb2. */
13031 default:
13032 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13033 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13035 otherops[0] = operands[0];
13036 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13037 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13039 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13041 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13043 switch ((int) INTVAL (otherops[2]))
13045 case -8:
13046 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13047 return "";
13048 case -4:
13049 if (TARGET_THUMB2)
13050 break;
13051 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13052 return "";
13053 case 4:
13054 if (TARGET_THUMB2)
13055 break;
13056 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13057 return "";
13060 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13061 operands[1] = otherops[0];
13062 if (TARGET_LDRD
13063 && (GET_CODE (otherops[2]) == REG
13064 || TARGET_THUMB2
13065 || (GET_CODE (otherops[2]) == CONST_INT
13066 && INTVAL (otherops[2]) > -256
13067 && INTVAL (otherops[2]) < 256)))
13069 if (reg_overlap_mentioned_p (operands[0],
13070 otherops[2]))
13072 rtx tmp;
13073 /* Swap base and index registers over to
13074 avoid a conflict. */
13075 tmp = otherops[1];
13076 otherops[1] = otherops[2];
13077 otherops[2] = tmp;
13079 /* If both registers conflict, it will usually
13080 have been fixed by a splitter. */
13081 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13082 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13084 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13085 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13087 else
13089 otherops[0] = operands[0];
13090 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13092 return "";
13095 if (GET_CODE (otherops[2]) == CONST_INT)
13097 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13098 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13099 else
13100 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13102 else
13103 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13105 else
13106 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13108 if (TARGET_LDRD)
13109 return "ldr%(d%)\t%0, [%1]";
13111 return "ldm%(ia%)\t%1, %M0";
13113 else
13115 otherops[1] = adjust_address (operands[1], SImode, 4);
13116 /* Take care of overlapping base/data reg. */
13117 if (reg_mentioned_p (operands[0], operands[1]))
13119 output_asm_insn ("ldr%?\t%0, %1", otherops);
13120 output_asm_insn ("ldr%?\t%0, %1", operands);
13122 else
13124 output_asm_insn ("ldr%?\t%0, %1", operands);
13125 output_asm_insn ("ldr%?\t%0, %1", otherops);
13130 else
13132 /* Constraints should ensure this. */
13133 gcc_assert (code0 == MEM && code1 == REG);
13134 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13136 switch (GET_CODE (XEXP (operands[0], 0)))
13138 case REG:
13139 if (TARGET_LDRD)
13140 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13141 else
13142 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13143 break;
13145 case PRE_INC:
13146 gcc_assert (TARGET_LDRD);
13147 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13148 break;
13150 case PRE_DEC:
13151 if (TARGET_LDRD)
13152 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13153 else
13154 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13155 break;
13157 case POST_INC:
13158 if (TARGET_LDRD)
13159 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13160 else
13161 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13162 break;
13164 case POST_DEC:
13165 gcc_assert (TARGET_LDRD);
13166 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13167 break;
13169 case PRE_MODIFY:
13170 case POST_MODIFY:
13171 otherops[0] = operands[1];
13172 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13173 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13175 /* IWMMXT allows offsets larger than ldrd can handle,
13176 fix these up with a pair of ldr. */
13177 if (!TARGET_THUMB2
13178 && GET_CODE (otherops[2]) == CONST_INT
13179 && (INTVAL(otherops[2]) <= -256
13180 || INTVAL(otherops[2]) >= 256))
13182 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13184 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13185 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13187 else
13189 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13190 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13193 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13194 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13195 else
13196 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13197 break;
13199 case PLUS:
13200 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13201 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13203 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13205 case -8:
13206 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13207 return "";
13209 case -4:
13210 if (TARGET_THUMB2)
13211 break;
13212 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13213 return "";
13215 case 4:
13216 if (TARGET_THUMB2)
13217 break;
13218 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13219 return "";
13222 if (TARGET_LDRD
13223 && (GET_CODE (otherops[2]) == REG
13224 || TARGET_THUMB2
13225 || (GET_CODE (otherops[2]) == CONST_INT
13226 && INTVAL (otherops[2]) > -256
13227 && INTVAL (otherops[2]) < 256)))
13229 otherops[0] = operands[1];
13230 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13231 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13232 return "";
13234 /* Fall through */
13236 default:
13237 otherops[0] = adjust_address (operands[0], SImode, 4);
13238 otherops[1] = operands[1];
13239 output_asm_insn ("str%?\t%1, %0", operands);
13240 output_asm_insn ("str%?\t%H1, %0", otherops);
13244 return "";
13247 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13248 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13250 const char *
13251 output_move_quad (rtx *operands)
13253 if (REG_P (operands[0]))
13255 /* Load, or reg->reg move. */
13257 if (MEM_P (operands[1]))
13259 switch (GET_CODE (XEXP (operands[1], 0)))
13261 case REG:
13262 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13263 break;
13265 case LABEL_REF:
13266 case CONST:
13267 output_asm_insn ("adr%?\t%0, %1", operands);
13268 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13269 break;
13271 default:
13272 gcc_unreachable ();
13275 else
13277 rtx ops[2];
13278 int dest, src, i;
13280 gcc_assert (REG_P (operands[1]));
13282 dest = REGNO (operands[0]);
13283 src = REGNO (operands[1]);
13285 /* This seems pretty dumb, but hopefully GCC won't try to do it
13286 very often. */
13287 if (dest < src)
13288 for (i = 0; i < 4; i++)
13290 ops[0] = gen_rtx_REG (SImode, dest + i);
13291 ops[1] = gen_rtx_REG (SImode, src + i);
13292 output_asm_insn ("mov%?\t%0, %1", ops);
13294 else
13295 for (i = 3; i >= 0; i--)
13297 ops[0] = gen_rtx_REG (SImode, dest + i);
13298 ops[1] = gen_rtx_REG (SImode, src + i);
13299 output_asm_insn ("mov%?\t%0, %1", ops);
13303 else
13305 gcc_assert (MEM_P (operands[0]));
13306 gcc_assert (REG_P (operands[1]));
13307 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13309 switch (GET_CODE (XEXP (operands[0], 0)))
13311 case REG:
13312 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13313 break;
13315 default:
13316 gcc_unreachable ();
13320 return "";
13323 /* Output a VFP load or store instruction. */
13325 const char *
13326 output_move_vfp (rtx *operands)
13328 rtx reg, mem, addr, ops[2];
13329 int load = REG_P (operands[0]);
13330 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13331 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13332 const char *templ;
13333 char buff[50];
13334 enum machine_mode mode;
13336 reg = operands[!load];
13337 mem = operands[load];
13339 mode = GET_MODE (reg);
13341 gcc_assert (REG_P (reg));
13342 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13343 gcc_assert (mode == SFmode
13344 || mode == DFmode
13345 || mode == SImode
13346 || mode == DImode
13347 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13348 gcc_assert (MEM_P (mem));
13350 addr = XEXP (mem, 0);
13352 switch (GET_CODE (addr))
13354 case PRE_DEC:
13355 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13356 ops[0] = XEXP (addr, 0);
13357 ops[1] = reg;
13358 break;
13360 case POST_INC:
13361 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13362 ops[0] = XEXP (addr, 0);
13363 ops[1] = reg;
13364 break;
13366 default:
13367 templ = "f%s%c%%?\t%%%s0, %%1%s";
13368 ops[0] = reg;
13369 ops[1] = mem;
13370 break;
13373 sprintf (buff, templ,
13374 load ? "ld" : "st",
13375 dp ? 'd' : 's',
13376 dp ? "P" : "",
13377 integer_p ? "\t%@ int" : "");
13378 output_asm_insn (buff, ops);
13380 return "";
13383 /* Output a Neon quad-word load or store, or a load or store for
13384 larger structure modes.
13386 WARNING: The ordering of elements is weird in big-endian mode,
13387 because we use VSTM, as required by the EABI. GCC RTL defines
13388 element ordering based on in-memory order. This can be differ
13389 from the architectural ordering of elements within a NEON register.
13390 The intrinsics defined in arm_neon.h use the NEON register element
13391 ordering, not the GCC RTL element ordering.
13393 For example, the in-memory ordering of a big-endian a quadword
13394 vector with 16-bit elements when stored from register pair {d0,d1}
13395 will be (lowest address first, d0[N] is NEON register element N):
13397 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13399 When necessary, quadword registers (dN, dN+1) are moved to ARM
13400 registers from rN in the order:
13402 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13404 So that STM/LDM can be used on vectors in ARM registers, and the
13405 same memory layout will result as if VSTM/VLDM were used. */
13407 const char *
13408 output_move_neon (rtx *operands)
13410 rtx reg, mem, addr, ops[2];
13411 int regno, load = REG_P (operands[0]);
13412 const char *templ;
13413 char buff[50];
13414 enum machine_mode mode;
13416 reg = operands[!load];
13417 mem = operands[load];
13419 mode = GET_MODE (reg);
13421 gcc_assert (REG_P (reg));
13422 regno = REGNO (reg);
13423 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13424 || NEON_REGNO_OK_FOR_QUAD (regno));
13425 gcc_assert (VALID_NEON_DREG_MODE (mode)
13426 || VALID_NEON_QREG_MODE (mode)
13427 || VALID_NEON_STRUCT_MODE (mode));
13428 gcc_assert (MEM_P (mem));
13430 addr = XEXP (mem, 0);
13432 /* Strip off const from addresses like (const (plus (...))). */
13433 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13434 addr = XEXP (addr, 0);
13436 switch (GET_CODE (addr))
13438 case POST_INC:
13439 templ = "v%smia%%?\t%%0!, %%h1";
13440 ops[0] = XEXP (addr, 0);
13441 ops[1] = reg;
13442 break;
13444 case PRE_DEC:
13445 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13446 templ = "v%smdb%%?\t%%0!, %%h1";
13447 ops[0] = XEXP (addr, 0);
13448 ops[1] = reg;
13449 break;
13451 case POST_MODIFY:
13452 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13453 gcc_unreachable ();
13455 case LABEL_REF:
13456 case PLUS:
13458 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13459 int i;
13460 int overlap = -1;
13461 for (i = 0; i < nregs; i++)
13463 /* We're only using DImode here because it's a convenient size. */
13464 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13465 ops[1] = adjust_address (mem, DImode, 8 * i);
13466 if (reg_overlap_mentioned_p (ops[0], mem))
13468 gcc_assert (overlap == -1);
13469 overlap = i;
13471 else
13473 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13474 output_asm_insn (buff, ops);
13477 if (overlap != -1)
13479 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13480 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13481 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13482 output_asm_insn (buff, ops);
13485 return "";
13488 default:
13489 templ = "v%smia%%?\t%%m0, %%h1";
13490 ops[0] = mem;
13491 ops[1] = reg;
13494 sprintf (buff, templ, load ? "ld" : "st");
13495 output_asm_insn (buff, ops);
13497 return "";
13500 /* Compute and return the length of neon_mov<mode>, where <mode> is
13501 one of VSTRUCT modes: EI, OI, CI or XI. */
13503 arm_attr_length_move_neon (rtx insn)
13505 rtx reg, mem, addr;
13506 int load;
13507 enum machine_mode mode;
13509 extract_insn_cached (insn);
13511 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13513 mode = GET_MODE (recog_data.operand[0]);
13514 switch (mode)
13516 case EImode:
13517 case OImode:
13518 return 8;
13519 case CImode:
13520 return 12;
13521 case XImode:
13522 return 16;
13523 default:
13524 gcc_unreachable ();
13528 load = REG_P (recog_data.operand[0]);
13529 reg = recog_data.operand[!load];
13530 mem = recog_data.operand[load];
13532 gcc_assert (MEM_P (mem));
13534 mode = GET_MODE (reg);
13535 addr = XEXP (mem, 0);
13537 /* Strip off const from addresses like (const (plus (...))). */
13538 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13539 addr = XEXP (addr, 0);
13541 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13543 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13544 return insns * 4;
13546 else
13547 return 4;
13550 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13551 return zero. */
13554 arm_address_offset_is_imm (rtx insn)
13556 rtx mem, addr;
13558 extract_insn_cached (insn);
13560 if (REG_P (recog_data.operand[0]))
13561 return 0;
13563 mem = recog_data.operand[0];
13565 gcc_assert (MEM_P (mem));
13567 addr = XEXP (mem, 0);
13569 if (GET_CODE (addr) == REG
13570 || (GET_CODE (addr) == PLUS
13571 && GET_CODE (XEXP (addr, 0)) == REG
13572 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13573 return 1;
13574 else
13575 return 0;
13578 /* Output an ADD r, s, #n where n may be too big for one instruction.
13579 If adding zero to one register, output nothing. */
13580 const char *
13581 output_add_immediate (rtx *operands)
13583 HOST_WIDE_INT n = INTVAL (operands[2]);
13585 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13587 if (n < 0)
13588 output_multi_immediate (operands,
13589 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13590 -n);
13591 else
13592 output_multi_immediate (operands,
13593 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13597 return "";
13600 /* Output a multiple immediate operation.
13601 OPERANDS is the vector of operands referred to in the output patterns.
13602 INSTR1 is the output pattern to use for the first constant.
13603 INSTR2 is the output pattern to use for subsequent constants.
13604 IMMED_OP is the index of the constant slot in OPERANDS.
13605 N is the constant value. */
13606 static const char *
13607 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13608 int immed_op, HOST_WIDE_INT n)
13610 #if HOST_BITS_PER_WIDE_INT > 32
13611 n &= 0xffffffff;
13612 #endif
13614 if (n == 0)
13616 /* Quick and easy output. */
13617 operands[immed_op] = const0_rtx;
13618 output_asm_insn (instr1, operands);
13620 else
13622 int i;
13623 const char * instr = instr1;
13625 /* Note that n is never zero here (which would give no output). */
13626 for (i = 0; i < 32; i += 2)
13628 if (n & (3 << i))
13630 operands[immed_op] = GEN_INT (n & (255 << i));
13631 output_asm_insn (instr, operands);
13632 instr = instr2;
13633 i += 6;
13638 return "";
13641 /* Return the name of a shifter operation. */
13642 static const char *
13643 arm_shift_nmem(enum rtx_code code)
13645 switch (code)
13647 case ASHIFT:
13648 return ARM_LSL_NAME;
13650 case ASHIFTRT:
13651 return "asr";
13653 case LSHIFTRT:
13654 return "lsr";
13656 case ROTATERT:
13657 return "ror";
13659 default:
13660 abort();
13664 /* Return the appropriate ARM instruction for the operation code.
13665 The returned result should not be overwritten. OP is the rtx of the
13666 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13667 was shifted. */
13668 const char *
13669 arithmetic_instr (rtx op, int shift_first_arg)
13671 switch (GET_CODE (op))
13673 case PLUS:
13674 return "add";
13676 case MINUS:
13677 return shift_first_arg ? "rsb" : "sub";
13679 case IOR:
13680 return "orr";
13682 case XOR:
13683 return "eor";
13685 case AND:
13686 return "and";
13688 case ASHIFT:
13689 case ASHIFTRT:
13690 case LSHIFTRT:
13691 case ROTATERT:
13692 return arm_shift_nmem(GET_CODE(op));
13694 default:
13695 gcc_unreachable ();
13699 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13700 for the operation code. The returned result should not be overwritten.
13701 OP is the rtx code of the shift.
13702 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13703 shift. */
13704 static const char *
13705 shift_op (rtx op, HOST_WIDE_INT *amountp)
13707 const char * mnem;
13708 enum rtx_code code = GET_CODE (op);
13710 switch (GET_CODE (XEXP (op, 1)))
13712 case REG:
13713 case SUBREG:
13714 *amountp = -1;
13715 break;
13717 case CONST_INT:
13718 *amountp = INTVAL (XEXP (op, 1));
13719 break;
13721 default:
13722 gcc_unreachable ();
13725 switch (code)
13727 case ROTATE:
13728 gcc_assert (*amountp != -1);
13729 *amountp = 32 - *amountp;
13730 code = ROTATERT;
13732 /* Fall through. */
13734 case ASHIFT:
13735 case ASHIFTRT:
13736 case LSHIFTRT:
13737 case ROTATERT:
13738 mnem = arm_shift_nmem(code);
13739 break;
13741 case MULT:
13742 /* We never have to worry about the amount being other than a
13743 power of 2, since this case can never be reloaded from a reg. */
13744 gcc_assert (*amountp != -1);
13745 *amountp = int_log2 (*amountp);
13746 return ARM_LSL_NAME;
13748 default:
13749 gcc_unreachable ();
13752 if (*amountp != -1)
13754 /* This is not 100% correct, but follows from the desire to merge
13755 multiplication by a power of 2 with the recognizer for a
13756 shift. >=32 is not a valid shift for "lsl", so we must try and
13757 output a shift that produces the correct arithmetical result.
13758 Using lsr #32 is identical except for the fact that the carry bit
13759 is not set correctly if we set the flags; but we never use the
13760 carry bit from such an operation, so we can ignore that. */
13761 if (code == ROTATERT)
13762 /* Rotate is just modulo 32. */
13763 *amountp &= 31;
13764 else if (*amountp != (*amountp & 31))
13766 if (code == ASHIFT)
13767 mnem = "lsr";
13768 *amountp = 32;
13771 /* Shifts of 0 are no-ops. */
13772 if (*amountp == 0)
13773 return NULL;
13776 return mnem;
13779 /* Obtain the shift from the POWER of two. */
13781 static HOST_WIDE_INT
13782 int_log2 (HOST_WIDE_INT power)
13784 HOST_WIDE_INT shift = 0;
13786 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13788 gcc_assert (shift <= 31);
13789 shift++;
13792 return shift;
13795 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13796 because /bin/as is horribly restrictive. The judgement about
13797 whether or not each character is 'printable' (and can be output as
13798 is) or not (and must be printed with an octal escape) must be made
13799 with reference to the *host* character set -- the situation is
13800 similar to that discussed in the comments above pp_c_char in
13801 c-pretty-print.c. */
13803 #define MAX_ASCII_LEN 51
13805 void
13806 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13808 int i;
13809 int len_so_far = 0;
13811 fputs ("\t.ascii\t\"", stream);
13813 for (i = 0; i < len; i++)
13815 int c = p[i];
13817 if (len_so_far >= MAX_ASCII_LEN)
13819 fputs ("\"\n\t.ascii\t\"", stream);
13820 len_so_far = 0;
13823 if (ISPRINT (c))
13825 if (c == '\\' || c == '\"')
13827 putc ('\\', stream);
13828 len_so_far++;
13830 putc (c, stream);
13831 len_so_far++;
13833 else
13835 fprintf (stream, "\\%03o", c);
13836 len_so_far += 4;
13840 fputs ("\"\n", stream);
13843 /* Compute the register save mask for registers 0 through 12
13844 inclusive. This code is used by arm_compute_save_reg_mask. */
13846 static unsigned long
13847 arm_compute_save_reg0_reg12_mask (void)
13849 unsigned long func_type = arm_current_func_type ();
13850 unsigned long save_reg_mask = 0;
13851 unsigned int reg;
13853 if (IS_INTERRUPT (func_type))
13855 unsigned int max_reg;
13856 /* Interrupt functions must not corrupt any registers,
13857 even call clobbered ones. If this is a leaf function
13858 we can just examine the registers used by the RTL, but
13859 otherwise we have to assume that whatever function is
13860 called might clobber anything, and so we have to save
13861 all the call-clobbered registers as well. */
13862 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13863 /* FIQ handlers have registers r8 - r12 banked, so
13864 we only need to check r0 - r7, Normal ISRs only
13865 bank r14 and r15, so we must check up to r12.
13866 r13 is the stack pointer which is always preserved,
13867 so we do not need to consider it here. */
13868 max_reg = 7;
13869 else
13870 max_reg = 12;
13872 for (reg = 0; reg <= max_reg; reg++)
13873 if (df_regs_ever_live_p (reg)
13874 || (! current_function_is_leaf && call_used_regs[reg]))
13875 save_reg_mask |= (1 << reg);
13877 /* Also save the pic base register if necessary. */
13878 if (flag_pic
13879 && !TARGET_SINGLE_PIC_BASE
13880 && arm_pic_register != INVALID_REGNUM
13881 && crtl->uses_pic_offset_table)
13882 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13884 else if (IS_VOLATILE(func_type))
13886 /* For noreturn functions we historically omitted register saves
13887 altogether. However this really messes up debugging. As a
13888 compromise save just the frame pointers. Combined with the link
13889 register saved elsewhere this should be sufficient to get
13890 a backtrace. */
13891 if (frame_pointer_needed)
13892 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13893 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13894 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13895 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13896 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13898 else
13900 /* In the normal case we only need to save those registers
13901 which are call saved and which are used by this function. */
13902 for (reg = 0; reg <= 11; reg++)
13903 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13904 save_reg_mask |= (1 << reg);
13906 /* Handle the frame pointer as a special case. */
13907 if (frame_pointer_needed)
13908 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13910 /* If we aren't loading the PIC register,
13911 don't stack it even though it may be live. */
13912 if (flag_pic
13913 && !TARGET_SINGLE_PIC_BASE
13914 && arm_pic_register != INVALID_REGNUM
13915 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13916 || crtl->uses_pic_offset_table))
13917 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13919 /* The prologue will copy SP into R0, so save it. */
13920 if (IS_STACKALIGN (func_type))
13921 save_reg_mask |= 1;
13924 /* Save registers so the exception handler can modify them. */
13925 if (crtl->calls_eh_return)
13927 unsigned int i;
13929 for (i = 0; ; i++)
13931 reg = EH_RETURN_DATA_REGNO (i);
13932 if (reg == INVALID_REGNUM)
13933 break;
13934 save_reg_mask |= 1 << reg;
13938 return save_reg_mask;
13942 /* Compute the number of bytes used to store the static chain register on the
13943 stack, above the stack frame. We need to know this accurately to get the
13944 alignment of the rest of the stack frame correct. */
13946 static int arm_compute_static_chain_stack_bytes (void)
13948 unsigned long func_type = arm_current_func_type ();
13949 int static_chain_stack_bytes = 0;
13951 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13952 IS_NESTED (func_type) &&
13953 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13954 static_chain_stack_bytes = 4;
13956 return static_chain_stack_bytes;
13960 /* Compute a bit mask of which registers need to be
13961 saved on the stack for the current function.
13962 This is used by arm_get_frame_offsets, which may add extra registers. */
13964 static unsigned long
13965 arm_compute_save_reg_mask (void)
13967 unsigned int save_reg_mask = 0;
13968 unsigned long func_type = arm_current_func_type ();
13969 unsigned int reg;
13971 if (IS_NAKED (func_type))
13972 /* This should never really happen. */
13973 return 0;
13975 /* If we are creating a stack frame, then we must save the frame pointer,
13976 IP (which will hold the old stack pointer), LR and the PC. */
13977 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13978 save_reg_mask |=
13979 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13980 | (1 << IP_REGNUM)
13981 | (1 << LR_REGNUM)
13982 | (1 << PC_REGNUM);
13984 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13986 /* Decide if we need to save the link register.
13987 Interrupt routines have their own banked link register,
13988 so they never need to save it.
13989 Otherwise if we do not use the link register we do not need to save
13990 it. If we are pushing other registers onto the stack however, we
13991 can save an instruction in the epilogue by pushing the link register
13992 now and then popping it back into the PC. This incurs extra memory
13993 accesses though, so we only do it when optimizing for size, and only
13994 if we know that we will not need a fancy return sequence. */
13995 if (df_regs_ever_live_p (LR_REGNUM)
13996 || (save_reg_mask
13997 && optimize_size
13998 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13999 && !crtl->calls_eh_return))
14000 save_reg_mask |= 1 << LR_REGNUM;
14002 if (cfun->machine->lr_save_eliminated)
14003 save_reg_mask &= ~ (1 << LR_REGNUM);
14005 if (TARGET_REALLY_IWMMXT
14006 && ((bit_count (save_reg_mask)
14007 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14008 arm_compute_static_chain_stack_bytes())
14009 ) % 2) != 0)
14011 /* The total number of registers that are going to be pushed
14012 onto the stack is odd. We need to ensure that the stack
14013 is 64-bit aligned before we start to save iWMMXt registers,
14014 and also before we start to create locals. (A local variable
14015 might be a double or long long which we will load/store using
14016 an iWMMXt instruction). Therefore we need to push another
14017 ARM register, so that the stack will be 64-bit aligned. We
14018 try to avoid using the arg registers (r0 -r3) as they might be
14019 used to pass values in a tail call. */
14020 for (reg = 4; reg <= 12; reg++)
14021 if ((save_reg_mask & (1 << reg)) == 0)
14022 break;
14024 if (reg <= 12)
14025 save_reg_mask |= (1 << reg);
14026 else
14028 cfun->machine->sibcall_blocked = 1;
14029 save_reg_mask |= (1 << 3);
14033 /* We may need to push an additional register for use initializing the
14034 PIC base register. */
14035 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14036 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14038 reg = thumb_find_work_register (1 << 4);
14039 if (!call_used_regs[reg])
14040 save_reg_mask |= (1 << reg);
14043 return save_reg_mask;
14047 /* Compute a bit mask of which registers need to be
14048 saved on the stack for the current function. */
14049 static unsigned long
14050 thumb1_compute_save_reg_mask (void)
14052 unsigned long mask;
14053 unsigned reg;
14055 mask = 0;
14056 for (reg = 0; reg < 12; reg ++)
14057 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14058 mask |= 1 << reg;
14060 if (flag_pic
14061 && !TARGET_SINGLE_PIC_BASE
14062 && arm_pic_register != INVALID_REGNUM
14063 && crtl->uses_pic_offset_table)
14064 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14066 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14067 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14068 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14070 /* LR will also be pushed if any lo regs are pushed. */
14071 if (mask & 0xff || thumb_force_lr_save ())
14072 mask |= (1 << LR_REGNUM);
14074 /* Make sure we have a low work register if we need one.
14075 We will need one if we are going to push a high register,
14076 but we are not currently intending to push a low register. */
14077 if ((mask & 0xff) == 0
14078 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14080 /* Use thumb_find_work_register to choose which register
14081 we will use. If the register is live then we will
14082 have to push it. Use LAST_LO_REGNUM as our fallback
14083 choice for the register to select. */
14084 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14085 /* Make sure the register returned by thumb_find_work_register is
14086 not part of the return value. */
14087 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14088 reg = LAST_LO_REGNUM;
14090 if (! call_used_regs[reg])
14091 mask |= 1 << reg;
14094 /* The 504 below is 8 bytes less than 512 because there are two possible
14095 alignment words. We can't tell here if they will be present or not so we
14096 have to play it safe and assume that they are. */
14097 if ((CALLER_INTERWORKING_SLOT_SIZE +
14098 ROUND_UP_WORD (get_frame_size ()) +
14099 crtl->outgoing_args_size) >= 504)
14101 /* This is the same as the code in thumb1_expand_prologue() which
14102 determines which register to use for stack decrement. */
14103 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14104 if (mask & (1 << reg))
14105 break;
14107 if (reg > LAST_LO_REGNUM)
14109 /* Make sure we have a register available for stack decrement. */
14110 mask |= 1 << LAST_LO_REGNUM;
14114 return mask;
14118 /* Return the number of bytes required to save VFP registers. */
14119 static int
14120 arm_get_vfp_saved_size (void)
14122 unsigned int regno;
14123 int count;
14124 int saved;
14126 saved = 0;
14127 /* Space for saved VFP registers. */
14128 if (TARGET_HARD_FLOAT && TARGET_VFP)
14130 count = 0;
14131 for (regno = FIRST_VFP_REGNUM;
14132 regno < LAST_VFP_REGNUM;
14133 regno += 2)
14135 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14136 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14138 if (count > 0)
14140 /* Workaround ARM10 VFPr1 bug. */
14141 if (count == 2 && !arm_arch6)
14142 count++;
14143 saved += count * 8;
14145 count = 0;
14147 else
14148 count++;
14150 if (count > 0)
14152 if (count == 2 && !arm_arch6)
14153 count++;
14154 saved += count * 8;
14157 return saved;
14161 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14162 everything bar the final return instruction. */
14163 const char *
14164 output_return_instruction (rtx operand, int really_return, int reverse)
14166 char conditional[10];
14167 char instr[100];
14168 unsigned reg;
14169 unsigned long live_regs_mask;
14170 unsigned long func_type;
14171 arm_stack_offsets *offsets;
14173 func_type = arm_current_func_type ();
14175 if (IS_NAKED (func_type))
14176 return "";
14178 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14180 /* If this function was declared non-returning, and we have
14181 found a tail call, then we have to trust that the called
14182 function won't return. */
14183 if (really_return)
14185 rtx ops[2];
14187 /* Otherwise, trap an attempted return by aborting. */
14188 ops[0] = operand;
14189 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14190 : "abort");
14191 assemble_external_libcall (ops[1]);
14192 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14195 return "";
14198 gcc_assert (!cfun->calls_alloca || really_return);
14200 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14202 cfun->machine->return_used_this_function = 1;
14204 offsets = arm_get_frame_offsets ();
14205 live_regs_mask = offsets->saved_regs_mask;
14207 if (live_regs_mask)
14209 const char * return_reg;
14211 /* If we do not have any special requirements for function exit
14212 (e.g. interworking) then we can load the return address
14213 directly into the PC. Otherwise we must load it into LR. */
14214 if (really_return
14215 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14216 return_reg = reg_names[PC_REGNUM];
14217 else
14218 return_reg = reg_names[LR_REGNUM];
14220 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14222 /* There are three possible reasons for the IP register
14223 being saved. 1) a stack frame was created, in which case
14224 IP contains the old stack pointer, or 2) an ISR routine
14225 corrupted it, or 3) it was saved to align the stack on
14226 iWMMXt. In case 1, restore IP into SP, otherwise just
14227 restore IP. */
14228 if (frame_pointer_needed)
14230 live_regs_mask &= ~ (1 << IP_REGNUM);
14231 live_regs_mask |= (1 << SP_REGNUM);
14233 else
14234 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14237 /* On some ARM architectures it is faster to use LDR rather than
14238 LDM to load a single register. On other architectures, the
14239 cost is the same. In 26 bit mode, or for exception handlers,
14240 we have to use LDM to load the PC so that the CPSR is also
14241 restored. */
14242 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14243 if (live_regs_mask == (1U << reg))
14244 break;
14246 if (reg <= LAST_ARM_REGNUM
14247 && (reg != LR_REGNUM
14248 || ! really_return
14249 || ! IS_INTERRUPT (func_type)))
14251 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14252 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14254 else
14256 char *p;
14257 int first = 1;
14259 /* Generate the load multiple instruction to restore the
14260 registers. Note we can get here, even if
14261 frame_pointer_needed is true, but only if sp already
14262 points to the base of the saved core registers. */
14263 if (live_regs_mask & (1 << SP_REGNUM))
14265 unsigned HOST_WIDE_INT stack_adjust;
14267 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14268 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14270 if (stack_adjust && arm_arch5 && TARGET_ARM)
14271 if (TARGET_UNIFIED_ASM)
14272 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14273 else
14274 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14275 else
14277 /* If we can't use ldmib (SA110 bug),
14278 then try to pop r3 instead. */
14279 if (stack_adjust)
14280 live_regs_mask |= 1 << 3;
14282 if (TARGET_UNIFIED_ASM)
14283 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14284 else
14285 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14288 else
14289 if (TARGET_UNIFIED_ASM)
14290 sprintf (instr, "pop%s\t{", conditional);
14291 else
14292 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14294 p = instr + strlen (instr);
14296 for (reg = 0; reg <= SP_REGNUM; reg++)
14297 if (live_regs_mask & (1 << reg))
14299 int l = strlen (reg_names[reg]);
14301 if (first)
14302 first = 0;
14303 else
14305 memcpy (p, ", ", 2);
14306 p += 2;
14309 memcpy (p, "%|", 2);
14310 memcpy (p + 2, reg_names[reg], l);
14311 p += l + 2;
14314 if (live_regs_mask & (1 << LR_REGNUM))
14316 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14317 /* If returning from an interrupt, restore the CPSR. */
14318 if (IS_INTERRUPT (func_type))
14319 strcat (p, "^");
14321 else
14322 strcpy (p, "}");
14325 output_asm_insn (instr, & operand);
14327 /* See if we need to generate an extra instruction to
14328 perform the actual function return. */
14329 if (really_return
14330 && func_type != ARM_FT_INTERWORKED
14331 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14333 /* The return has already been handled
14334 by loading the LR into the PC. */
14335 really_return = 0;
14339 if (really_return)
14341 switch ((int) ARM_FUNC_TYPE (func_type))
14343 case ARM_FT_ISR:
14344 case ARM_FT_FIQ:
14345 /* ??? This is wrong for unified assembly syntax. */
14346 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14347 break;
14349 case ARM_FT_INTERWORKED:
14350 sprintf (instr, "bx%s\t%%|lr", conditional);
14351 break;
14353 case ARM_FT_EXCEPTION:
14354 /* ??? This is wrong for unified assembly syntax. */
14355 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14356 break;
14358 default:
14359 /* Use bx if it's available. */
14360 if (arm_arch5 || arm_arch4t)
14361 sprintf (instr, "bx%s\t%%|lr", conditional);
14362 else
14363 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14364 break;
14367 output_asm_insn (instr, & operand);
14370 return "";
14373 /* Write the function name into the code section, directly preceding
14374 the function prologue.
14376 Code will be output similar to this:
14378 .ascii "arm_poke_function_name", 0
14379 .align
14381 .word 0xff000000 + (t1 - t0)
14382 arm_poke_function_name
14383 mov ip, sp
14384 stmfd sp!, {fp, ip, lr, pc}
14385 sub fp, ip, #4
14387 When performing a stack backtrace, code can inspect the value
14388 of 'pc' stored at 'fp' + 0. If the trace function then looks
14389 at location pc - 12 and the top 8 bits are set, then we know
14390 that there is a function name embedded immediately preceding this
14391 location and has length ((pc[-3]) & 0xff000000).
14393 We assume that pc is declared as a pointer to an unsigned long.
14395 It is of no benefit to output the function name if we are assembling
14396 a leaf function. These function types will not contain a stack
14397 backtrace structure, therefore it is not possible to determine the
14398 function name. */
14399 void
14400 arm_poke_function_name (FILE *stream, const char *name)
14402 unsigned long alignlength;
14403 unsigned long length;
14404 rtx x;
14406 length = strlen (name) + 1;
14407 alignlength = ROUND_UP_WORD (length);
14409 ASM_OUTPUT_ASCII (stream, name, length);
14410 ASM_OUTPUT_ALIGN (stream, 2);
14411 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14412 assemble_aligned_integer (UNITS_PER_WORD, x);
14415 /* Place some comments into the assembler stream
14416 describing the current function. */
14417 static void
14418 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14420 unsigned long func_type;
14422 if (TARGET_THUMB1)
14424 thumb1_output_function_prologue (f, frame_size);
14425 return;
14428 /* Sanity check. */
14429 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14431 func_type = arm_current_func_type ();
14433 switch ((int) ARM_FUNC_TYPE (func_type))
14435 default:
14436 case ARM_FT_NORMAL:
14437 break;
14438 case ARM_FT_INTERWORKED:
14439 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14440 break;
14441 case ARM_FT_ISR:
14442 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14443 break;
14444 case ARM_FT_FIQ:
14445 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14446 break;
14447 case ARM_FT_EXCEPTION:
14448 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14449 break;
14452 if (IS_NAKED (func_type))
14453 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14455 if (IS_VOLATILE (func_type))
14456 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14458 if (IS_NESTED (func_type))
14459 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14460 if (IS_STACKALIGN (func_type))
14461 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14463 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14464 crtl->args.size,
14465 crtl->args.pretend_args_size, frame_size);
14467 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14468 frame_pointer_needed,
14469 cfun->machine->uses_anonymous_args);
14471 if (cfun->machine->lr_save_eliminated)
14472 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14474 if (crtl->calls_eh_return)
14475 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14479 const char *
14480 arm_output_epilogue (rtx sibling)
14482 int reg;
14483 unsigned long saved_regs_mask;
14484 unsigned long func_type;
14485 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14486 frame that is $fp + 4 for a non-variadic function. */
14487 int floats_offset = 0;
14488 rtx operands[3];
14489 FILE * f = asm_out_file;
14490 unsigned int lrm_count = 0;
14491 int really_return = (sibling == NULL);
14492 int start_reg;
14493 arm_stack_offsets *offsets;
14495 /* If we have already generated the return instruction
14496 then it is futile to generate anything else. */
14497 if (use_return_insn (FALSE, sibling) &&
14498 (cfun->machine->return_used_this_function != 0))
14499 return "";
14501 func_type = arm_current_func_type ();
14503 if (IS_NAKED (func_type))
14504 /* Naked functions don't have epilogues. */
14505 return "";
14507 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14509 rtx op;
14511 /* A volatile function should never return. Call abort. */
14512 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14513 assemble_external_libcall (op);
14514 output_asm_insn ("bl\t%a0", &op);
14516 return "";
14519 /* If we are throwing an exception, then we really must be doing a
14520 return, so we can't tail-call. */
14521 gcc_assert (!crtl->calls_eh_return || really_return);
14523 offsets = arm_get_frame_offsets ();
14524 saved_regs_mask = offsets->saved_regs_mask;
14526 if (TARGET_IWMMXT)
14527 lrm_count = bit_count (saved_regs_mask);
14529 floats_offset = offsets->saved_args;
14530 /* Compute how far away the floats will be. */
14531 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14532 if (saved_regs_mask & (1 << reg))
14533 floats_offset += 4;
14535 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14537 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14538 int vfp_offset = offsets->frame;
14540 if (TARGET_FPA_EMU2)
14542 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14543 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14545 floats_offset += 12;
14546 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14547 reg, FP_REGNUM, floats_offset - vfp_offset);
14550 else
14552 start_reg = LAST_FPA_REGNUM;
14554 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14556 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14558 floats_offset += 12;
14560 /* We can't unstack more than four registers at once. */
14561 if (start_reg - reg == 3)
14563 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14564 reg, FP_REGNUM, floats_offset - vfp_offset);
14565 start_reg = reg - 1;
14568 else
14570 if (reg != start_reg)
14571 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14572 reg + 1, start_reg - reg,
14573 FP_REGNUM, floats_offset - vfp_offset);
14574 start_reg = reg - 1;
14578 /* Just in case the last register checked also needs unstacking. */
14579 if (reg != start_reg)
14580 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14581 reg + 1, start_reg - reg,
14582 FP_REGNUM, floats_offset - vfp_offset);
14585 if (TARGET_HARD_FLOAT && TARGET_VFP)
14587 int saved_size;
14589 /* The fldmd insns do not have base+offset addressing
14590 modes, so we use IP to hold the address. */
14591 saved_size = arm_get_vfp_saved_size ();
14593 if (saved_size > 0)
14595 floats_offset += saved_size;
14596 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14597 FP_REGNUM, floats_offset - vfp_offset);
14599 start_reg = FIRST_VFP_REGNUM;
14600 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14602 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14603 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14605 if (start_reg != reg)
14606 vfp_output_fldmd (f, IP_REGNUM,
14607 (start_reg - FIRST_VFP_REGNUM) / 2,
14608 (reg - start_reg) / 2);
14609 start_reg = reg + 2;
14612 if (start_reg != reg)
14613 vfp_output_fldmd (f, IP_REGNUM,
14614 (start_reg - FIRST_VFP_REGNUM) / 2,
14615 (reg - start_reg) / 2);
14618 if (TARGET_IWMMXT)
14620 /* The frame pointer is guaranteed to be non-double-word aligned.
14621 This is because it is set to (old_stack_pointer - 4) and the
14622 old_stack_pointer was double word aligned. Thus the offset to
14623 the iWMMXt registers to be loaded must also be non-double-word
14624 sized, so that the resultant address *is* double-word aligned.
14625 We can ignore floats_offset since that was already included in
14626 the live_regs_mask. */
14627 lrm_count += (lrm_count % 2 ? 2 : 1);
14629 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14630 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14632 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14633 reg, FP_REGNUM, lrm_count * 4);
14634 lrm_count += 2;
14638 /* saved_regs_mask should contain the IP, which at the time of stack
14639 frame generation actually contains the old stack pointer. So a
14640 quick way to unwind the stack is just pop the IP register directly
14641 into the stack pointer. */
14642 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14643 saved_regs_mask &= ~ (1 << IP_REGNUM);
14644 saved_regs_mask |= (1 << SP_REGNUM);
14646 /* There are two registers left in saved_regs_mask - LR and PC. We
14647 only need to restore the LR register (the return address), but to
14648 save time we can load it directly into the PC, unless we need a
14649 special function exit sequence, or we are not really returning. */
14650 if (really_return
14651 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14652 && !crtl->calls_eh_return)
14653 /* Delete the LR from the register mask, so that the LR on
14654 the stack is loaded into the PC in the register mask. */
14655 saved_regs_mask &= ~ (1 << LR_REGNUM);
14656 else
14657 saved_regs_mask &= ~ (1 << PC_REGNUM);
14659 /* We must use SP as the base register, because SP is one of the
14660 registers being restored. If an interrupt or page fault
14661 happens in the ldm instruction, the SP might or might not
14662 have been restored. That would be bad, as then SP will no
14663 longer indicate the safe area of stack, and we can get stack
14664 corruption. Using SP as the base register means that it will
14665 be reset correctly to the original value, should an interrupt
14666 occur. If the stack pointer already points at the right
14667 place, then omit the subtraction. */
14668 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14669 || cfun->calls_alloca)
14670 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14671 4 * bit_count (saved_regs_mask));
14672 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14674 if (IS_INTERRUPT (func_type))
14675 /* Interrupt handlers will have pushed the
14676 IP onto the stack, so restore it now. */
14677 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14679 else
14681 /* This branch is executed for ARM mode (non-apcs frames) and
14682 Thumb-2 mode. Frame layout is essentially the same for those
14683 cases, except that in ARM mode frame pointer points to the
14684 first saved register, while in Thumb-2 mode the frame pointer points
14685 to the last saved register.
14687 It is possible to make frame pointer point to last saved
14688 register in both cases, and remove some conditionals below.
14689 That means that fp setup in prologue would be just "mov fp, sp"
14690 and sp restore in epilogue would be just "mov sp, fp", whereas
14691 now we have to use add/sub in those cases. However, the value
14692 of that would be marginal, as both mov and add/sub are 32-bit
14693 in ARM mode, and it would require extra conditionals
14694 in arm_expand_prologue to distingish ARM-apcs-frame case
14695 (where frame pointer is required to point at first register)
14696 and ARM-non-apcs-frame. Therefore, such change is postponed
14697 until real need arise. */
14698 unsigned HOST_WIDE_INT amount;
14699 int rfe;
14700 /* Restore stack pointer if necessary. */
14701 if (TARGET_ARM && frame_pointer_needed)
14703 operands[0] = stack_pointer_rtx;
14704 operands[1] = hard_frame_pointer_rtx;
14706 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14707 output_add_immediate (operands);
14709 else
14711 if (frame_pointer_needed)
14713 /* For Thumb-2 restore sp from the frame pointer.
14714 Operand restrictions mean we have to incrememnt FP, then copy
14715 to SP. */
14716 amount = offsets->locals_base - offsets->saved_regs;
14717 operands[0] = hard_frame_pointer_rtx;
14719 else
14721 unsigned long count;
14722 operands[0] = stack_pointer_rtx;
14723 amount = offsets->outgoing_args - offsets->saved_regs;
14724 /* pop call clobbered registers if it avoids a
14725 separate stack adjustment. */
14726 count = offsets->saved_regs - offsets->saved_args;
14727 if (optimize_size
14728 && count != 0
14729 && !crtl->calls_eh_return
14730 && bit_count(saved_regs_mask) * 4 == count
14731 && !IS_INTERRUPT (func_type)
14732 && !crtl->tail_call_emit)
14734 unsigned long mask;
14735 /* Preserve return values, of any size. */
14736 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14737 mask ^= 0xf;
14738 mask &= ~saved_regs_mask;
14739 reg = 0;
14740 while (bit_count (mask) * 4 > amount)
14742 while ((mask & (1 << reg)) == 0)
14743 reg++;
14744 mask &= ~(1 << reg);
14746 if (bit_count (mask) * 4 == amount) {
14747 amount = 0;
14748 saved_regs_mask |= mask;
14753 if (amount)
14755 operands[1] = operands[0];
14756 operands[2] = GEN_INT (amount);
14757 output_add_immediate (operands);
14759 if (frame_pointer_needed)
14760 asm_fprintf (f, "\tmov\t%r, %r\n",
14761 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14764 if (TARGET_FPA_EMU2)
14766 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14767 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14768 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14769 reg, SP_REGNUM);
14771 else
14773 start_reg = FIRST_FPA_REGNUM;
14775 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14777 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14779 if (reg - start_reg == 3)
14781 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14782 start_reg, SP_REGNUM);
14783 start_reg = reg + 1;
14786 else
14788 if (reg != start_reg)
14789 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14790 start_reg, reg - start_reg,
14791 SP_REGNUM);
14793 start_reg = reg + 1;
14797 /* Just in case the last register checked also needs unstacking. */
14798 if (reg != start_reg)
14799 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14800 start_reg, reg - start_reg, SP_REGNUM);
14803 if (TARGET_HARD_FLOAT && TARGET_VFP)
14805 int end_reg = LAST_VFP_REGNUM + 1;
14807 /* Scan the registers in reverse order. We need to match
14808 any groupings made in the prologue and generate matching
14809 pop operations. */
14810 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14812 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14813 && (!df_regs_ever_live_p (reg + 1)
14814 || call_used_regs[reg + 1]))
14816 if (end_reg > reg + 2)
14817 vfp_output_fldmd (f, SP_REGNUM,
14818 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14819 (end_reg - (reg + 2)) / 2);
14820 end_reg = reg;
14823 if (end_reg > reg + 2)
14824 vfp_output_fldmd (f, SP_REGNUM, 0,
14825 (end_reg - (reg + 2)) / 2);
14828 if (TARGET_IWMMXT)
14829 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14830 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14831 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14833 /* If we can, restore the LR into the PC. */
14834 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14835 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14836 && !IS_STACKALIGN (func_type)
14837 && really_return
14838 && crtl->args.pretend_args_size == 0
14839 && saved_regs_mask & (1 << LR_REGNUM)
14840 && !crtl->calls_eh_return)
14842 saved_regs_mask &= ~ (1 << LR_REGNUM);
14843 saved_regs_mask |= (1 << PC_REGNUM);
14844 rfe = IS_INTERRUPT (func_type);
14846 else
14847 rfe = 0;
14849 /* Load the registers off the stack. If we only have one register
14850 to load use the LDR instruction - it is faster. For Thumb-2
14851 always use pop and the assembler will pick the best instruction.*/
14852 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14853 && !IS_INTERRUPT(func_type))
14855 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14857 else if (saved_regs_mask)
14859 if (saved_regs_mask & (1 << SP_REGNUM))
14860 /* Note - write back to the stack register is not enabled
14861 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14862 in the list of registers and if we add writeback the
14863 instruction becomes UNPREDICTABLE. */
14864 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14865 rfe);
14866 else if (TARGET_ARM)
14867 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14868 rfe);
14869 else
14870 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14873 if (crtl->args.pretend_args_size)
14875 /* Unwind the pre-pushed regs. */
14876 operands[0] = operands[1] = stack_pointer_rtx;
14877 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14878 output_add_immediate (operands);
14882 /* We may have already restored PC directly from the stack. */
14883 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14884 return "";
14886 /* Stack adjustment for exception handler. */
14887 if (crtl->calls_eh_return)
14888 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14889 ARM_EH_STACKADJ_REGNUM);
14891 /* Generate the return instruction. */
14892 switch ((int) ARM_FUNC_TYPE (func_type))
14894 case ARM_FT_ISR:
14895 case ARM_FT_FIQ:
14896 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14897 break;
14899 case ARM_FT_EXCEPTION:
14900 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14901 break;
14903 case ARM_FT_INTERWORKED:
14904 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14905 break;
14907 default:
14908 if (IS_STACKALIGN (func_type))
14910 /* See comment in arm_expand_prologue. */
14911 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14913 if (arm_arch5 || arm_arch4t)
14914 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14915 else
14916 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14917 break;
14920 return "";
14923 static void
14924 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14925 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14927 arm_stack_offsets *offsets;
14929 if (TARGET_THUMB1)
14931 int regno;
14933 /* Emit any call-via-reg trampolines that are needed for v4t support
14934 of call_reg and call_value_reg type insns. */
14935 for (regno = 0; regno < LR_REGNUM; regno++)
14937 rtx label = cfun->machine->call_via[regno];
14939 if (label != NULL)
14941 switch_to_section (function_section (current_function_decl));
14942 targetm.asm_out.internal_label (asm_out_file, "L",
14943 CODE_LABEL_NUMBER (label));
14944 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14948 /* ??? Probably not safe to set this here, since it assumes that a
14949 function will be emitted as assembly immediately after we generate
14950 RTL for it. This does not happen for inline functions. */
14951 cfun->machine->return_used_this_function = 0;
14953 else /* TARGET_32BIT */
14955 /* We need to take into account any stack-frame rounding. */
14956 offsets = arm_get_frame_offsets ();
14958 gcc_assert (!use_return_insn (FALSE, NULL)
14959 || (cfun->machine->return_used_this_function != 0)
14960 || offsets->saved_regs == offsets->outgoing_args
14961 || frame_pointer_needed);
14963 /* Reset the ARM-specific per-function variables. */
14964 after_arm_reorg = 0;
14968 /* Generate and emit an insn that we will recognize as a push_multi.
14969 Unfortunately, since this insn does not reflect very well the actual
14970 semantics of the operation, we need to annotate the insn for the benefit
14971 of DWARF2 frame unwind information. */
14972 static rtx
14973 emit_multi_reg_push (unsigned long mask)
14975 int num_regs = 0;
14976 int num_dwarf_regs;
14977 int i, j;
14978 rtx par;
14979 rtx dwarf;
14980 int dwarf_par_index;
14981 rtx tmp, reg;
14983 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14984 if (mask & (1 << i))
14985 num_regs++;
14987 gcc_assert (num_regs && num_regs <= 16);
14989 /* We don't record the PC in the dwarf frame information. */
14990 num_dwarf_regs = num_regs;
14991 if (mask & (1 << PC_REGNUM))
14992 num_dwarf_regs--;
14994 /* For the body of the insn we are going to generate an UNSPEC in
14995 parallel with several USEs. This allows the insn to be recognized
14996 by the push_multi pattern in the arm.md file.
14998 The body of the insn looks something like this:
15000 (parallel [
15001 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15002 (const_int:SI <num>)))
15003 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15004 (use (reg:SI XX))
15005 (use (reg:SI YY))
15009 For the frame note however, we try to be more explicit and actually
15010 show each register being stored into the stack frame, plus a (single)
15011 decrement of the stack pointer. We do it this way in order to be
15012 friendly to the stack unwinding code, which only wants to see a single
15013 stack decrement per instruction. The RTL we generate for the note looks
15014 something like this:
15016 (sequence [
15017 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15018 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15019 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15020 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15024 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15025 instead we'd have a parallel expression detailing all
15026 the stores to the various memory addresses so that debug
15027 information is more up-to-date. Remember however while writing
15028 this to take care of the constraints with the push instruction.
15030 Note also that this has to be taken care of for the VFP registers.
15032 For more see PR43399. */
15034 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15035 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15036 dwarf_par_index = 1;
15038 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15040 if (mask & (1 << i))
15042 reg = gen_rtx_REG (SImode, i);
15044 XVECEXP (par, 0, 0)
15045 = gen_rtx_SET (VOIDmode,
15046 gen_frame_mem
15047 (BLKmode,
15048 gen_rtx_PRE_MODIFY (Pmode,
15049 stack_pointer_rtx,
15050 plus_constant
15051 (stack_pointer_rtx,
15052 -4 * num_regs))
15054 gen_rtx_UNSPEC (BLKmode,
15055 gen_rtvec (1, reg),
15056 UNSPEC_PUSH_MULT));
15058 if (i != PC_REGNUM)
15060 tmp = gen_rtx_SET (VOIDmode,
15061 gen_frame_mem (SImode, stack_pointer_rtx),
15062 reg);
15063 RTX_FRAME_RELATED_P (tmp) = 1;
15064 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15065 dwarf_par_index++;
15068 break;
15072 for (j = 1, i++; j < num_regs; i++)
15074 if (mask & (1 << i))
15076 reg = gen_rtx_REG (SImode, i);
15078 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15080 if (i != PC_REGNUM)
15083 = gen_rtx_SET (VOIDmode,
15084 gen_frame_mem
15085 (SImode,
15086 plus_constant (stack_pointer_rtx,
15087 4 * j)),
15088 reg);
15089 RTX_FRAME_RELATED_P (tmp) = 1;
15090 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15093 j++;
15097 par = emit_insn (par);
15099 tmp = gen_rtx_SET (VOIDmode,
15100 stack_pointer_rtx,
15101 plus_constant (stack_pointer_rtx, -4 * num_regs));
15102 RTX_FRAME_RELATED_P (tmp) = 1;
15103 XVECEXP (dwarf, 0, 0) = tmp;
15105 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15107 return par;
15110 /* Calculate the size of the return value that is passed in registers. */
15111 static unsigned
15112 arm_size_return_regs (void)
15114 enum machine_mode mode;
15116 if (crtl->return_rtx != 0)
15117 mode = GET_MODE (crtl->return_rtx);
15118 else
15119 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15121 return GET_MODE_SIZE (mode);
15124 static rtx
15125 emit_sfm (int base_reg, int count)
15127 rtx par;
15128 rtx dwarf;
15129 rtx tmp, reg;
15130 int i;
15132 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15133 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15135 reg = gen_rtx_REG (XFmode, base_reg++);
15137 XVECEXP (par, 0, 0)
15138 = gen_rtx_SET (VOIDmode,
15139 gen_frame_mem
15140 (BLKmode,
15141 gen_rtx_PRE_MODIFY (Pmode,
15142 stack_pointer_rtx,
15143 plus_constant
15144 (stack_pointer_rtx,
15145 -12 * count))
15147 gen_rtx_UNSPEC (BLKmode,
15148 gen_rtvec (1, reg),
15149 UNSPEC_PUSH_MULT));
15150 tmp = gen_rtx_SET (VOIDmode,
15151 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15152 RTX_FRAME_RELATED_P (tmp) = 1;
15153 XVECEXP (dwarf, 0, 1) = tmp;
15155 for (i = 1; i < count; i++)
15157 reg = gen_rtx_REG (XFmode, base_reg++);
15158 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15160 tmp = gen_rtx_SET (VOIDmode,
15161 gen_frame_mem (XFmode,
15162 plus_constant (stack_pointer_rtx,
15163 i * 12)),
15164 reg);
15165 RTX_FRAME_RELATED_P (tmp) = 1;
15166 XVECEXP (dwarf, 0, i + 1) = tmp;
15169 tmp = gen_rtx_SET (VOIDmode,
15170 stack_pointer_rtx,
15171 plus_constant (stack_pointer_rtx, -12 * count));
15173 RTX_FRAME_RELATED_P (tmp) = 1;
15174 XVECEXP (dwarf, 0, 0) = tmp;
15176 par = emit_insn (par);
15177 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15179 return par;
15183 /* Return true if the current function needs to save/restore LR. */
15185 static bool
15186 thumb_force_lr_save (void)
15188 return !cfun->machine->lr_save_eliminated
15189 && (!leaf_function_p ()
15190 || thumb_far_jump_used_p ()
15191 || df_regs_ever_live_p (LR_REGNUM));
15195 /* Return true if r3 is used by any of the tail call insns in the
15196 current function. */
15198 static bool
15199 any_sibcall_uses_r3 (void)
15201 edge_iterator ei;
15202 edge e;
15204 if (!crtl->tail_call_emit)
15205 return false;
15206 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15207 if (e->flags & EDGE_SIBCALL)
15209 rtx call = BB_END (e->src);
15210 if (!CALL_P (call))
15211 call = prev_nonnote_nondebug_insn (call);
15212 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15213 if (find_regno_fusage (call, USE, 3))
15214 return true;
15216 return false;
15220 /* Compute the distance from register FROM to register TO.
15221 These can be the arg pointer (26), the soft frame pointer (25),
15222 the stack pointer (13) or the hard frame pointer (11).
15223 In thumb mode r7 is used as the soft frame pointer, if needed.
15224 Typical stack layout looks like this:
15226 old stack pointer -> | |
15227 ----
15228 | | \
15229 | | saved arguments for
15230 | | vararg functions
15231 | | /
15233 hard FP & arg pointer -> | | \
15234 | | stack
15235 | | frame
15236 | | /
15238 | | \
15239 | | call saved
15240 | | registers
15241 soft frame pointer -> | | /
15243 | | \
15244 | | local
15245 | | variables
15246 locals base pointer -> | | /
15248 | | \
15249 | | outgoing
15250 | | arguments
15251 current stack pointer -> | | /
15254 For a given function some or all of these stack components
15255 may not be needed, giving rise to the possibility of
15256 eliminating some of the registers.
15258 The values returned by this function must reflect the behavior
15259 of arm_expand_prologue() and arm_compute_save_reg_mask().
15261 The sign of the number returned reflects the direction of stack
15262 growth, so the values are positive for all eliminations except
15263 from the soft frame pointer to the hard frame pointer.
15265 SFP may point just inside the local variables block to ensure correct
15266 alignment. */
15269 /* Calculate stack offsets. These are used to calculate register elimination
15270 offsets and in prologue/epilogue code. Also calculates which registers
15271 should be saved. */
15273 static arm_stack_offsets *
15274 arm_get_frame_offsets (void)
15276 struct arm_stack_offsets *offsets;
15277 unsigned long func_type;
15278 int leaf;
15279 int saved;
15280 int core_saved;
15281 HOST_WIDE_INT frame_size;
15282 int i;
15284 offsets = &cfun->machine->stack_offsets;
15286 /* We need to know if we are a leaf function. Unfortunately, it
15287 is possible to be called after start_sequence has been called,
15288 which causes get_insns to return the insns for the sequence,
15289 not the function, which will cause leaf_function_p to return
15290 the incorrect result.
15292 to know about leaf functions once reload has completed, and the
15293 frame size cannot be changed after that time, so we can safely
15294 use the cached value. */
15296 if (reload_completed)
15297 return offsets;
15299 /* Initially this is the size of the local variables. It will translated
15300 into an offset once we have determined the size of preceding data. */
15301 frame_size = ROUND_UP_WORD (get_frame_size ());
15303 leaf = leaf_function_p ();
15305 /* Space for variadic functions. */
15306 offsets->saved_args = crtl->args.pretend_args_size;
15308 /* In Thumb mode this is incorrect, but never used. */
15309 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15310 arm_compute_static_chain_stack_bytes();
15312 if (TARGET_32BIT)
15314 unsigned int regno;
15316 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15317 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15318 saved = core_saved;
15320 /* We know that SP will be doubleword aligned on entry, and we must
15321 preserve that condition at any subroutine call. We also require the
15322 soft frame pointer to be doubleword aligned. */
15324 if (TARGET_REALLY_IWMMXT)
15326 /* Check for the call-saved iWMMXt registers. */
15327 for (regno = FIRST_IWMMXT_REGNUM;
15328 regno <= LAST_IWMMXT_REGNUM;
15329 regno++)
15330 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15331 saved += 8;
15334 func_type = arm_current_func_type ();
15335 if (! IS_VOLATILE (func_type))
15337 /* Space for saved FPA registers. */
15338 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15339 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15340 saved += 12;
15342 /* Space for saved VFP registers. */
15343 if (TARGET_HARD_FLOAT && TARGET_VFP)
15344 saved += arm_get_vfp_saved_size ();
15347 else /* TARGET_THUMB1 */
15349 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15350 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15351 saved = core_saved;
15352 if (TARGET_BACKTRACE)
15353 saved += 16;
15356 /* Saved registers include the stack frame. */
15357 offsets->saved_regs = offsets->saved_args + saved +
15358 arm_compute_static_chain_stack_bytes();
15359 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15360 /* A leaf function does not need any stack alignment if it has nothing
15361 on the stack. */
15362 if (leaf && frame_size == 0)
15364 offsets->outgoing_args = offsets->soft_frame;
15365 offsets->locals_base = offsets->soft_frame;
15366 return offsets;
15369 /* Ensure SFP has the correct alignment. */
15370 if (ARM_DOUBLEWORD_ALIGN
15371 && (offsets->soft_frame & 7))
15373 offsets->soft_frame += 4;
15374 /* Try to align stack by pushing an extra reg. Don't bother doing this
15375 when there is a stack frame as the alignment will be rolled into
15376 the normal stack adjustment. */
15377 if (frame_size + crtl->outgoing_args_size == 0)
15379 int reg = -1;
15381 /* If it is safe to use r3, then do so. This sometimes
15382 generates better code on Thumb-2 by avoiding the need to
15383 use 32-bit push/pop instructions. */
15384 if (! any_sibcall_uses_r3 ()
15385 && arm_size_return_regs () <= 12
15386 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15388 reg = 3;
15390 else
15391 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15393 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15395 reg = i;
15396 break;
15400 if (reg != -1)
15402 offsets->saved_regs += 4;
15403 offsets->saved_regs_mask |= (1 << reg);
15408 offsets->locals_base = offsets->soft_frame + frame_size;
15409 offsets->outgoing_args = (offsets->locals_base
15410 + crtl->outgoing_args_size);
15412 if (ARM_DOUBLEWORD_ALIGN)
15414 /* Ensure SP remains doubleword aligned. */
15415 if (offsets->outgoing_args & 7)
15416 offsets->outgoing_args += 4;
15417 gcc_assert (!(offsets->outgoing_args & 7));
15420 return offsets;
15424 /* Calculate the relative offsets for the different stack pointers. Positive
15425 offsets are in the direction of stack growth. */
15427 HOST_WIDE_INT
15428 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15430 arm_stack_offsets *offsets;
15432 offsets = arm_get_frame_offsets ();
15434 /* OK, now we have enough information to compute the distances.
15435 There must be an entry in these switch tables for each pair
15436 of registers in ELIMINABLE_REGS, even if some of the entries
15437 seem to be redundant or useless. */
15438 switch (from)
15440 case ARG_POINTER_REGNUM:
15441 switch (to)
15443 case THUMB_HARD_FRAME_POINTER_REGNUM:
15444 return 0;
15446 case FRAME_POINTER_REGNUM:
15447 /* This is the reverse of the soft frame pointer
15448 to hard frame pointer elimination below. */
15449 return offsets->soft_frame - offsets->saved_args;
15451 case ARM_HARD_FRAME_POINTER_REGNUM:
15452 /* This is only non-zero in the case where the static chain register
15453 is stored above the frame. */
15454 return offsets->frame - offsets->saved_args - 4;
15456 case STACK_POINTER_REGNUM:
15457 /* If nothing has been pushed on the stack at all
15458 then this will return -4. This *is* correct! */
15459 return offsets->outgoing_args - (offsets->saved_args + 4);
15461 default:
15462 gcc_unreachable ();
15464 gcc_unreachable ();
15466 case FRAME_POINTER_REGNUM:
15467 switch (to)
15469 case THUMB_HARD_FRAME_POINTER_REGNUM:
15470 return 0;
15472 case ARM_HARD_FRAME_POINTER_REGNUM:
15473 /* The hard frame pointer points to the top entry in the
15474 stack frame. The soft frame pointer to the bottom entry
15475 in the stack frame. If there is no stack frame at all,
15476 then they are identical. */
15478 return offsets->frame - offsets->soft_frame;
15480 case STACK_POINTER_REGNUM:
15481 return offsets->outgoing_args - offsets->soft_frame;
15483 default:
15484 gcc_unreachable ();
15486 gcc_unreachable ();
15488 default:
15489 /* You cannot eliminate from the stack pointer.
15490 In theory you could eliminate from the hard frame
15491 pointer to the stack pointer, but this will never
15492 happen, since if a stack frame is not needed the
15493 hard frame pointer will never be used. */
15494 gcc_unreachable ();
15498 /* Given FROM and TO register numbers, say whether this elimination is
15499 allowed. Frame pointer elimination is automatically handled.
15501 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15502 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15503 pointer, we must eliminate FRAME_POINTER_REGNUM into
15504 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15505 ARG_POINTER_REGNUM. */
15507 bool
15508 arm_can_eliminate (const int from, const int to)
15510 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15511 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15512 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15513 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15514 true);
15517 /* Emit RTL to save coprocessor registers on function entry. Returns the
15518 number of bytes pushed. */
15520 static int
15521 arm_save_coproc_regs(void)
15523 int saved_size = 0;
15524 unsigned reg;
15525 unsigned start_reg;
15526 rtx insn;
15528 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15529 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15531 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15532 insn = gen_rtx_MEM (V2SImode, insn);
15533 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15534 RTX_FRAME_RELATED_P (insn) = 1;
15535 saved_size += 8;
15538 /* Save any floating point call-saved registers used by this
15539 function. */
15540 if (TARGET_FPA_EMU2)
15542 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15543 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15545 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15546 insn = gen_rtx_MEM (XFmode, insn);
15547 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15548 RTX_FRAME_RELATED_P (insn) = 1;
15549 saved_size += 12;
15552 else
15554 start_reg = LAST_FPA_REGNUM;
15556 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15558 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15560 if (start_reg - reg == 3)
15562 insn = emit_sfm (reg, 4);
15563 RTX_FRAME_RELATED_P (insn) = 1;
15564 saved_size += 48;
15565 start_reg = reg - 1;
15568 else
15570 if (start_reg != reg)
15572 insn = emit_sfm (reg + 1, start_reg - reg);
15573 RTX_FRAME_RELATED_P (insn) = 1;
15574 saved_size += (start_reg - reg) * 12;
15576 start_reg = reg - 1;
15580 if (start_reg != reg)
15582 insn = emit_sfm (reg + 1, start_reg - reg);
15583 saved_size += (start_reg - reg) * 12;
15584 RTX_FRAME_RELATED_P (insn) = 1;
15587 if (TARGET_HARD_FLOAT && TARGET_VFP)
15589 start_reg = FIRST_VFP_REGNUM;
15591 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15593 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15594 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15596 if (start_reg != reg)
15597 saved_size += vfp_emit_fstmd (start_reg,
15598 (reg - start_reg) / 2);
15599 start_reg = reg + 2;
15602 if (start_reg != reg)
15603 saved_size += vfp_emit_fstmd (start_reg,
15604 (reg - start_reg) / 2);
15606 return saved_size;
15610 /* Set the Thumb frame pointer from the stack pointer. */
15612 static void
15613 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15615 HOST_WIDE_INT amount;
15616 rtx insn, dwarf;
15618 amount = offsets->outgoing_args - offsets->locals_base;
15619 if (amount < 1024)
15620 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15621 stack_pointer_rtx, GEN_INT (amount)));
15622 else
15624 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15625 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15626 expects the first two operands to be the same. */
15627 if (TARGET_THUMB2)
15629 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15630 stack_pointer_rtx,
15631 hard_frame_pointer_rtx));
15633 else
15635 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15636 hard_frame_pointer_rtx,
15637 stack_pointer_rtx));
15639 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15640 plus_constant (stack_pointer_rtx, amount));
15641 RTX_FRAME_RELATED_P (dwarf) = 1;
15642 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15645 RTX_FRAME_RELATED_P (insn) = 1;
15648 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15649 function. */
15650 void
15651 arm_expand_prologue (void)
15653 rtx amount;
15654 rtx insn;
15655 rtx ip_rtx;
15656 unsigned long live_regs_mask;
15657 unsigned long func_type;
15658 int fp_offset = 0;
15659 int saved_pretend_args = 0;
15660 int saved_regs = 0;
15661 unsigned HOST_WIDE_INT args_to_push;
15662 arm_stack_offsets *offsets;
15664 func_type = arm_current_func_type ();
15666 /* Naked functions don't have prologues. */
15667 if (IS_NAKED (func_type))
15668 return;
15670 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15671 args_to_push = crtl->args.pretend_args_size;
15673 /* Compute which register we will have to save onto the stack. */
15674 offsets = arm_get_frame_offsets ();
15675 live_regs_mask = offsets->saved_regs_mask;
15677 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15679 if (IS_STACKALIGN (func_type))
15681 rtx dwarf;
15682 rtx r0;
15683 rtx r1;
15684 /* Handle a word-aligned stack pointer. We generate the following:
15686 mov r0, sp
15687 bic r1, r0, #7
15688 mov sp, r1
15689 <save and restore r0 in normal prologue/epilogue>
15690 mov sp, r0
15691 bx lr
15693 The unwinder doesn't need to know about the stack realignment.
15694 Just tell it we saved SP in r0. */
15695 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15697 r0 = gen_rtx_REG (SImode, 0);
15698 r1 = gen_rtx_REG (SImode, 1);
15699 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15700 compiler won't choke. */
15701 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15702 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15703 insn = gen_movsi (r0, stack_pointer_rtx);
15704 RTX_FRAME_RELATED_P (insn) = 1;
15705 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15706 emit_insn (insn);
15707 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15708 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15711 /* For APCS frames, if IP register is clobbered
15712 when creating frame, save that register in a special
15713 way. */
15714 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15716 if (IS_INTERRUPT (func_type))
15718 /* Interrupt functions must not corrupt any registers.
15719 Creating a frame pointer however, corrupts the IP
15720 register, so we must push it first. */
15721 insn = emit_multi_reg_push (1 << IP_REGNUM);
15723 /* Do not set RTX_FRAME_RELATED_P on this insn.
15724 The dwarf stack unwinding code only wants to see one
15725 stack decrement per function, and this is not it. If
15726 this instruction is labeled as being part of the frame
15727 creation sequence then dwarf2out_frame_debug_expr will
15728 die when it encounters the assignment of IP to FP
15729 later on, since the use of SP here establishes SP as
15730 the CFA register and not IP.
15732 Anyway this instruction is not really part of the stack
15733 frame creation although it is part of the prologue. */
15735 else if (IS_NESTED (func_type))
15737 /* The Static chain register is the same as the IP register
15738 used as a scratch register during stack frame creation.
15739 To get around this need to find somewhere to store IP
15740 whilst the frame is being created. We try the following
15741 places in order:
15743 1. The last argument register.
15744 2. A slot on the stack above the frame. (This only
15745 works if the function is not a varargs function).
15746 3. Register r3, after pushing the argument registers
15747 onto the stack.
15749 Note - we only need to tell the dwarf2 backend about the SP
15750 adjustment in the second variant; the static chain register
15751 doesn't need to be unwound, as it doesn't contain a value
15752 inherited from the caller. */
15754 if (df_regs_ever_live_p (3) == false)
15755 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15756 else if (args_to_push == 0)
15758 rtx dwarf;
15760 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15761 saved_regs += 4;
15763 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15764 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15765 fp_offset = 4;
15767 /* Just tell the dwarf backend that we adjusted SP. */
15768 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15769 plus_constant (stack_pointer_rtx,
15770 -fp_offset));
15771 RTX_FRAME_RELATED_P (insn) = 1;
15772 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15774 else
15776 /* Store the args on the stack. */
15777 if (cfun->machine->uses_anonymous_args)
15778 insn = emit_multi_reg_push
15779 ((0xf0 >> (args_to_push / 4)) & 0xf);
15780 else
15781 insn = emit_insn
15782 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15783 GEN_INT (- args_to_push)));
15785 RTX_FRAME_RELATED_P (insn) = 1;
15787 saved_pretend_args = 1;
15788 fp_offset = args_to_push;
15789 args_to_push = 0;
15791 /* Now reuse r3 to preserve IP. */
15792 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15796 insn = emit_set_insn (ip_rtx,
15797 plus_constant (stack_pointer_rtx, fp_offset));
15798 RTX_FRAME_RELATED_P (insn) = 1;
15801 if (args_to_push)
15803 /* Push the argument registers, or reserve space for them. */
15804 if (cfun->machine->uses_anonymous_args)
15805 insn = emit_multi_reg_push
15806 ((0xf0 >> (args_to_push / 4)) & 0xf);
15807 else
15808 insn = emit_insn
15809 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15810 GEN_INT (- args_to_push)));
15811 RTX_FRAME_RELATED_P (insn) = 1;
15814 /* If this is an interrupt service routine, and the link register
15815 is going to be pushed, and we're not generating extra
15816 push of IP (needed when frame is needed and frame layout if apcs),
15817 subtracting four from LR now will mean that the function return
15818 can be done with a single instruction. */
15819 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15820 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15821 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15822 && TARGET_ARM)
15824 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15826 emit_set_insn (lr, plus_constant (lr, -4));
15829 if (live_regs_mask)
15831 saved_regs += bit_count (live_regs_mask) * 4;
15832 if (optimize_size && !frame_pointer_needed
15833 && saved_regs == offsets->saved_regs - offsets->saved_args)
15835 /* If no coprocessor registers are being pushed and we don't have
15836 to worry about a frame pointer then push extra registers to
15837 create the stack frame. This is done is a way that does not
15838 alter the frame layout, so is independent of the epilogue. */
15839 int n;
15840 int frame;
15841 n = 0;
15842 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15843 n++;
15844 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15845 if (frame && n * 4 >= frame)
15847 n = frame / 4;
15848 live_regs_mask |= (1 << n) - 1;
15849 saved_regs += frame;
15852 insn = emit_multi_reg_push (live_regs_mask);
15853 RTX_FRAME_RELATED_P (insn) = 1;
15856 if (! IS_VOLATILE (func_type))
15857 saved_regs += arm_save_coproc_regs ();
15859 if (frame_pointer_needed && TARGET_ARM)
15861 /* Create the new frame pointer. */
15862 if (TARGET_APCS_FRAME)
15864 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15865 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15866 RTX_FRAME_RELATED_P (insn) = 1;
15868 if (IS_NESTED (func_type))
15870 /* Recover the static chain register. */
15871 if (!df_regs_ever_live_p (3)
15872 || saved_pretend_args)
15873 insn = gen_rtx_REG (SImode, 3);
15874 else /* if (crtl->args.pretend_args_size == 0) */
15876 insn = plus_constant (hard_frame_pointer_rtx, 4);
15877 insn = gen_frame_mem (SImode, insn);
15879 emit_set_insn (ip_rtx, insn);
15880 /* Add a USE to stop propagate_one_insn() from barfing. */
15881 emit_insn (gen_prologue_use (ip_rtx));
15884 else
15886 insn = GEN_INT (saved_regs - 4);
15887 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15888 stack_pointer_rtx, insn));
15889 RTX_FRAME_RELATED_P (insn) = 1;
15893 if (flag_stack_usage)
15894 current_function_static_stack_size
15895 = offsets->outgoing_args - offsets->saved_args;
15897 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15899 /* This add can produce multiple insns for a large constant, so we
15900 need to get tricky. */
15901 rtx last = get_last_insn ();
15903 amount = GEN_INT (offsets->saved_args + saved_regs
15904 - offsets->outgoing_args);
15906 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15907 amount));
15910 last = last ? NEXT_INSN (last) : get_insns ();
15911 RTX_FRAME_RELATED_P (last) = 1;
15913 while (last != insn);
15915 /* If the frame pointer is needed, emit a special barrier that
15916 will prevent the scheduler from moving stores to the frame
15917 before the stack adjustment. */
15918 if (frame_pointer_needed)
15919 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15920 hard_frame_pointer_rtx));
15924 if (frame_pointer_needed && TARGET_THUMB2)
15925 thumb_set_frame_pointer (offsets);
15927 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15929 unsigned long mask;
15931 mask = live_regs_mask;
15932 mask &= THUMB2_WORK_REGS;
15933 if (!IS_NESTED (func_type))
15934 mask |= (1 << IP_REGNUM);
15935 arm_load_pic_register (mask);
15938 /* If we are profiling, make sure no instructions are scheduled before
15939 the call to mcount. Similarly if the user has requested no
15940 scheduling in the prolog. Similarly if we want non-call exceptions
15941 using the EABI unwinder, to prevent faulting instructions from being
15942 swapped with a stack adjustment. */
15943 if (crtl->profile || !TARGET_SCHED_PROLOG
15944 || (arm_except_unwind_info (&global_options) == UI_TARGET
15945 && cfun->can_throw_non_call_exceptions))
15946 emit_insn (gen_blockage ());
15948 /* If the link register is being kept alive, with the return address in it,
15949 then make sure that it does not get reused by the ce2 pass. */
15950 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15951 cfun->machine->lr_save_eliminated = 1;
15954 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15955 static void
15956 arm_print_condition (FILE *stream)
15958 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15960 /* Branch conversion is not implemented for Thumb-2. */
15961 if (TARGET_THUMB)
15963 output_operand_lossage ("predicated Thumb instruction");
15964 return;
15966 if (current_insn_predicate != NULL)
15968 output_operand_lossage
15969 ("predicated instruction in conditional sequence");
15970 return;
15973 fputs (arm_condition_codes[arm_current_cc], stream);
15975 else if (current_insn_predicate)
15977 enum arm_cond_code code;
15979 if (TARGET_THUMB1)
15981 output_operand_lossage ("predicated Thumb instruction");
15982 return;
15985 code = get_arm_condition_code (current_insn_predicate);
15986 fputs (arm_condition_codes[code], stream);
15991 /* If CODE is 'd', then the X is a condition operand and the instruction
15992 should only be executed if the condition is true.
15993 if CODE is 'D', then the X is a condition operand and the instruction
15994 should only be executed if the condition is false: however, if the mode
15995 of the comparison is CCFPEmode, then always execute the instruction -- we
15996 do this because in these circumstances !GE does not necessarily imply LT;
15997 in these cases the instruction pattern will take care to make sure that
15998 an instruction containing %d will follow, thereby undoing the effects of
15999 doing this instruction unconditionally.
16000 If CODE is 'N' then X is a floating point operand that must be negated
16001 before output.
16002 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16003 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16004 static void
16005 arm_print_operand (FILE *stream, rtx x, int code)
16007 switch (code)
16009 case '@':
16010 fputs (ASM_COMMENT_START, stream);
16011 return;
16013 case '_':
16014 fputs (user_label_prefix, stream);
16015 return;
16017 case '|':
16018 fputs (REGISTER_PREFIX, stream);
16019 return;
16021 case '?':
16022 arm_print_condition (stream);
16023 return;
16025 case '(':
16026 /* Nothing in unified syntax, otherwise the current condition code. */
16027 if (!TARGET_UNIFIED_ASM)
16028 arm_print_condition (stream);
16029 break;
16031 case ')':
16032 /* The current condition code in unified syntax, otherwise nothing. */
16033 if (TARGET_UNIFIED_ASM)
16034 arm_print_condition (stream);
16035 break;
16037 case '.':
16038 /* The current condition code for a condition code setting instruction.
16039 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16040 if (TARGET_UNIFIED_ASM)
16042 fputc('s', stream);
16043 arm_print_condition (stream);
16045 else
16047 arm_print_condition (stream);
16048 fputc('s', stream);
16050 return;
16052 case '!':
16053 /* If the instruction is conditionally executed then print
16054 the current condition code, otherwise print 's'. */
16055 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16056 if (current_insn_predicate)
16057 arm_print_condition (stream);
16058 else
16059 fputc('s', stream);
16060 break;
16062 /* %# is a "break" sequence. It doesn't output anything, but is used to
16063 separate e.g. operand numbers from following text, if that text consists
16064 of further digits which we don't want to be part of the operand
16065 number. */
16066 case '#':
16067 return;
16069 case 'N':
16071 REAL_VALUE_TYPE r;
16072 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16073 r = real_value_negate (&r);
16074 fprintf (stream, "%s", fp_const_from_val (&r));
16076 return;
16078 /* An integer or symbol address without a preceding # sign. */
16079 case 'c':
16080 switch (GET_CODE (x))
16082 case CONST_INT:
16083 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16084 break;
16086 case SYMBOL_REF:
16087 output_addr_const (stream, x);
16088 break;
16090 default:
16091 gcc_unreachable ();
16093 return;
16095 case 'B':
16096 if (GET_CODE (x) == CONST_INT)
16098 HOST_WIDE_INT val;
16099 val = ARM_SIGN_EXTEND (~INTVAL (x));
16100 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16102 else
16104 putc ('~', stream);
16105 output_addr_const (stream, x);
16107 return;
16109 case 'L':
16110 /* The low 16 bits of an immediate constant. */
16111 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16112 return;
16114 case 'i':
16115 fprintf (stream, "%s", arithmetic_instr (x, 1));
16116 return;
16118 /* Truncate Cirrus shift counts. */
16119 case 's':
16120 if (GET_CODE (x) == CONST_INT)
16122 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16123 return;
16125 arm_print_operand (stream, x, 0);
16126 return;
16128 case 'I':
16129 fprintf (stream, "%s", arithmetic_instr (x, 0));
16130 return;
16132 case 'S':
16134 HOST_WIDE_INT val;
16135 const char *shift;
16137 if (!shift_operator (x, SImode))
16139 output_operand_lossage ("invalid shift operand");
16140 break;
16143 shift = shift_op (x, &val);
16145 if (shift)
16147 fprintf (stream, ", %s ", shift);
16148 if (val == -1)
16149 arm_print_operand (stream, XEXP (x, 1), 0);
16150 else
16151 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16154 return;
16156 /* An explanation of the 'Q', 'R' and 'H' register operands:
16158 In a pair of registers containing a DI or DF value the 'Q'
16159 operand returns the register number of the register containing
16160 the least significant part of the value. The 'R' operand returns
16161 the register number of the register containing the most
16162 significant part of the value.
16164 The 'H' operand returns the higher of the two register numbers.
16165 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16166 same as the 'Q' operand, since the most significant part of the
16167 value is held in the lower number register. The reverse is true
16168 on systems where WORDS_BIG_ENDIAN is false.
16170 The purpose of these operands is to distinguish between cases
16171 where the endian-ness of the values is important (for example
16172 when they are added together), and cases where the endian-ness
16173 is irrelevant, but the order of register operations is important.
16174 For example when loading a value from memory into a register
16175 pair, the endian-ness does not matter. Provided that the value
16176 from the lower memory address is put into the lower numbered
16177 register, and the value from the higher address is put into the
16178 higher numbered register, the load will work regardless of whether
16179 the value being loaded is big-wordian or little-wordian. The
16180 order of the two register loads can matter however, if the address
16181 of the memory location is actually held in one of the registers
16182 being overwritten by the load.
16184 The 'Q' and 'R' constraints are also available for 64-bit
16185 constants. */
16186 case 'Q':
16187 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16189 rtx part = gen_lowpart (SImode, x);
16190 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16191 return;
16194 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16196 output_operand_lossage ("invalid operand for code '%c'", code);
16197 return;
16200 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16201 return;
16203 case 'R':
16204 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16206 enum machine_mode mode = GET_MODE (x);
16207 rtx part;
16209 if (mode == VOIDmode)
16210 mode = DImode;
16211 part = gen_highpart_mode (SImode, mode, x);
16212 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16213 return;
16216 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16218 output_operand_lossage ("invalid operand for code '%c'", code);
16219 return;
16222 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16223 return;
16225 case 'H':
16226 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16228 output_operand_lossage ("invalid operand for code '%c'", code);
16229 return;
16232 asm_fprintf (stream, "%r", REGNO (x) + 1);
16233 return;
16235 case 'J':
16236 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16238 output_operand_lossage ("invalid operand for code '%c'", code);
16239 return;
16242 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16243 return;
16245 case 'K':
16246 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16248 output_operand_lossage ("invalid operand for code '%c'", code);
16249 return;
16252 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16253 return;
16255 case 'm':
16256 asm_fprintf (stream, "%r",
16257 GET_CODE (XEXP (x, 0)) == REG
16258 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16259 return;
16261 case 'M':
16262 asm_fprintf (stream, "{%r-%r}",
16263 REGNO (x),
16264 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16265 return;
16267 /* Like 'M', but writing doubleword vector registers, for use by Neon
16268 insns. */
16269 case 'h':
16271 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16272 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16273 if (numregs == 1)
16274 asm_fprintf (stream, "{d%d}", regno);
16275 else
16276 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16278 return;
16280 case 'd':
16281 /* CONST_TRUE_RTX means always -- that's the default. */
16282 if (x == const_true_rtx)
16283 return;
16285 if (!COMPARISON_P (x))
16287 output_operand_lossage ("invalid operand for code '%c'", code);
16288 return;
16291 fputs (arm_condition_codes[get_arm_condition_code (x)],
16292 stream);
16293 return;
16295 case 'D':
16296 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16297 want to do that. */
16298 if (x == const_true_rtx)
16300 output_operand_lossage ("instruction never executed");
16301 return;
16303 if (!COMPARISON_P (x))
16305 output_operand_lossage ("invalid operand for code '%c'", code);
16306 return;
16309 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16310 (get_arm_condition_code (x))],
16311 stream);
16312 return;
16314 /* Cirrus registers can be accessed in a variety of ways:
16315 single floating point (f)
16316 double floating point (d)
16317 32bit integer (fx)
16318 64bit integer (dx). */
16319 case 'W': /* Cirrus register in F mode. */
16320 case 'X': /* Cirrus register in D mode. */
16321 case 'Y': /* Cirrus register in FX mode. */
16322 case 'Z': /* Cirrus register in DX mode. */
16323 gcc_assert (GET_CODE (x) == REG
16324 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16326 fprintf (stream, "mv%s%s",
16327 code == 'W' ? "f"
16328 : code == 'X' ? "d"
16329 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16331 return;
16333 /* Print cirrus register in the mode specified by the register's mode. */
16334 case 'V':
16336 int mode = GET_MODE (x);
16338 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16340 output_operand_lossage ("invalid operand for code '%c'", code);
16341 return;
16344 fprintf (stream, "mv%s%s",
16345 mode == DFmode ? "d"
16346 : mode == SImode ? "fx"
16347 : mode == DImode ? "dx"
16348 : "f", reg_names[REGNO (x)] + 2);
16350 return;
16353 case 'U':
16354 if (GET_CODE (x) != REG
16355 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16356 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16357 /* Bad value for wCG register number. */
16359 output_operand_lossage ("invalid operand for code '%c'", code);
16360 return;
16363 else
16364 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16365 return;
16367 /* Print an iWMMXt control register name. */
16368 case 'w':
16369 if (GET_CODE (x) != CONST_INT
16370 || INTVAL (x) < 0
16371 || INTVAL (x) >= 16)
16372 /* Bad value for wC register number. */
16374 output_operand_lossage ("invalid operand for code '%c'", code);
16375 return;
16378 else
16380 static const char * wc_reg_names [16] =
16382 "wCID", "wCon", "wCSSF", "wCASF",
16383 "wC4", "wC5", "wC6", "wC7",
16384 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16385 "wC12", "wC13", "wC14", "wC15"
16388 fprintf (stream, wc_reg_names [INTVAL (x)]);
16390 return;
16392 /* Print the high single-precision register of a VFP double-precision
16393 register. */
16394 case 'p':
16396 int mode = GET_MODE (x);
16397 int regno;
16399 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16401 output_operand_lossage ("invalid operand for code '%c'", code);
16402 return;
16405 regno = REGNO (x);
16406 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16408 output_operand_lossage ("invalid operand for code '%c'", code);
16409 return;
16412 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16414 return;
16416 /* Print a VFP/Neon double precision or quad precision register name. */
16417 case 'P':
16418 case 'q':
16420 int mode = GET_MODE (x);
16421 int is_quad = (code == 'q');
16422 int regno;
16424 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16426 output_operand_lossage ("invalid operand for code '%c'", code);
16427 return;
16430 if (GET_CODE (x) != REG
16431 || !IS_VFP_REGNUM (REGNO (x)))
16433 output_operand_lossage ("invalid operand for code '%c'", code);
16434 return;
16437 regno = REGNO (x);
16438 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16439 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16441 output_operand_lossage ("invalid operand for code '%c'", code);
16442 return;
16445 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16446 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16448 return;
16450 /* These two codes print the low/high doubleword register of a Neon quad
16451 register, respectively. For pair-structure types, can also print
16452 low/high quadword registers. */
16453 case 'e':
16454 case 'f':
16456 int mode = GET_MODE (x);
16457 int regno;
16459 if ((GET_MODE_SIZE (mode) != 16
16460 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16462 output_operand_lossage ("invalid operand for code '%c'", code);
16463 return;
16466 regno = REGNO (x);
16467 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16469 output_operand_lossage ("invalid operand for code '%c'", code);
16470 return;
16473 if (GET_MODE_SIZE (mode) == 16)
16474 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16475 + (code == 'f' ? 1 : 0));
16476 else
16477 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16478 + (code == 'f' ? 1 : 0));
16480 return;
16482 /* Print a VFPv3 floating-point constant, represented as an integer
16483 index. */
16484 case 'G':
16486 int index = vfp3_const_double_index (x);
16487 gcc_assert (index != -1);
16488 fprintf (stream, "%d", index);
16490 return;
16492 /* Print bits representing opcode features for Neon.
16494 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16495 and polynomials as unsigned.
16497 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16499 Bit 2 is 1 for rounding functions, 0 otherwise. */
16501 /* Identify the type as 's', 'u', 'p' or 'f'. */
16502 case 'T':
16504 HOST_WIDE_INT bits = INTVAL (x);
16505 fputc ("uspf"[bits & 3], stream);
16507 return;
16509 /* Likewise, but signed and unsigned integers are both 'i'. */
16510 case 'F':
16512 HOST_WIDE_INT bits = INTVAL (x);
16513 fputc ("iipf"[bits & 3], stream);
16515 return;
16517 /* As for 'T', but emit 'u' instead of 'p'. */
16518 case 't':
16520 HOST_WIDE_INT bits = INTVAL (x);
16521 fputc ("usuf"[bits & 3], stream);
16523 return;
16525 /* Bit 2: rounding (vs none). */
16526 case 'O':
16528 HOST_WIDE_INT bits = INTVAL (x);
16529 fputs ((bits & 4) != 0 ? "r" : "", stream);
16531 return;
16533 /* Memory operand for vld1/vst1 instruction. */
16534 case 'A':
16536 rtx addr;
16537 bool postinc = FALSE;
16538 unsigned align, modesize, align_bits;
16540 gcc_assert (GET_CODE (x) == MEM);
16541 addr = XEXP (x, 0);
16542 if (GET_CODE (addr) == POST_INC)
16544 postinc = 1;
16545 addr = XEXP (addr, 0);
16547 asm_fprintf (stream, "[%r", REGNO (addr));
16549 /* We know the alignment of this access, so we can emit a hint in the
16550 instruction (for some alignments) as an aid to the memory subsystem
16551 of the target. */
16552 align = MEM_ALIGN (x) >> 3;
16553 modesize = GET_MODE_SIZE (GET_MODE (x));
16555 /* Only certain alignment specifiers are supported by the hardware. */
16556 if (modesize == 16 && (align % 32) == 0)
16557 align_bits = 256;
16558 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16559 align_bits = 128;
16560 else if ((align % 8) == 0)
16561 align_bits = 64;
16562 else
16563 align_bits = 0;
16565 if (align_bits != 0)
16566 asm_fprintf (stream, ":%d", align_bits);
16568 asm_fprintf (stream, "]");
16570 if (postinc)
16571 fputs("!", stream);
16573 return;
16575 case 'C':
16577 rtx addr;
16579 gcc_assert (GET_CODE (x) == MEM);
16580 addr = XEXP (x, 0);
16581 gcc_assert (GET_CODE (addr) == REG);
16582 asm_fprintf (stream, "[%r]", REGNO (addr));
16584 return;
16586 /* Translate an S register number into a D register number and element index. */
16587 case 'y':
16589 int mode = GET_MODE (x);
16590 int regno;
16592 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16594 output_operand_lossage ("invalid operand for code '%c'", code);
16595 return;
16598 regno = REGNO (x);
16599 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16601 output_operand_lossage ("invalid operand for code '%c'", code);
16602 return;
16605 regno = regno - FIRST_VFP_REGNUM;
16606 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16608 return;
16610 /* Register specifier for vld1.16/vst1.16. Translate the S register
16611 number into a D register number and element index. */
16612 case 'z':
16614 int mode = GET_MODE (x);
16615 int regno;
16617 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16619 output_operand_lossage ("invalid operand for code '%c'", code);
16620 return;
16623 regno = REGNO (x);
16624 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16626 output_operand_lossage ("invalid operand for code '%c'", code);
16627 return;
16630 regno = regno - FIRST_VFP_REGNUM;
16631 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16633 return;
16635 default:
16636 if (x == 0)
16638 output_operand_lossage ("missing operand");
16639 return;
16642 switch (GET_CODE (x))
16644 case REG:
16645 asm_fprintf (stream, "%r", REGNO (x));
16646 break;
16648 case MEM:
16649 output_memory_reference_mode = GET_MODE (x);
16650 output_address (XEXP (x, 0));
16651 break;
16653 case CONST_DOUBLE:
16654 if (TARGET_NEON)
16656 char fpstr[20];
16657 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16658 sizeof (fpstr), 0, 1);
16659 fprintf (stream, "#%s", fpstr);
16661 else
16662 fprintf (stream, "#%s", fp_immediate_constant (x));
16663 break;
16665 default:
16666 gcc_assert (GET_CODE (x) != NEG);
16667 fputc ('#', stream);
16668 if (GET_CODE (x) == HIGH)
16670 fputs (":lower16:", stream);
16671 x = XEXP (x, 0);
16674 output_addr_const (stream, x);
16675 break;
16680 /* Target hook for printing a memory address. */
16681 static void
16682 arm_print_operand_address (FILE *stream, rtx x)
16684 if (TARGET_32BIT)
16686 int is_minus = GET_CODE (x) == MINUS;
16688 if (GET_CODE (x) == REG)
16689 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16690 else if (GET_CODE (x) == PLUS || is_minus)
16692 rtx base = XEXP (x, 0);
16693 rtx index = XEXP (x, 1);
16694 HOST_WIDE_INT offset = 0;
16695 if (GET_CODE (base) != REG
16696 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16698 /* Ensure that BASE is a register. */
16699 /* (one of them must be). */
16700 /* Also ensure the SP is not used as in index register. */
16701 rtx temp = base;
16702 base = index;
16703 index = temp;
16705 switch (GET_CODE (index))
16707 case CONST_INT:
16708 offset = INTVAL (index);
16709 if (is_minus)
16710 offset = -offset;
16711 asm_fprintf (stream, "[%r, #%wd]",
16712 REGNO (base), offset);
16713 break;
16715 case REG:
16716 asm_fprintf (stream, "[%r, %s%r]",
16717 REGNO (base), is_minus ? "-" : "",
16718 REGNO (index));
16719 break;
16721 case MULT:
16722 case ASHIFTRT:
16723 case LSHIFTRT:
16724 case ASHIFT:
16725 case ROTATERT:
16727 asm_fprintf (stream, "[%r, %s%r",
16728 REGNO (base), is_minus ? "-" : "",
16729 REGNO (XEXP (index, 0)));
16730 arm_print_operand (stream, index, 'S');
16731 fputs ("]", stream);
16732 break;
16735 default:
16736 gcc_unreachable ();
16739 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16740 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16742 extern enum machine_mode output_memory_reference_mode;
16744 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16746 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16747 asm_fprintf (stream, "[%r, #%s%d]!",
16748 REGNO (XEXP (x, 0)),
16749 GET_CODE (x) == PRE_DEC ? "-" : "",
16750 GET_MODE_SIZE (output_memory_reference_mode));
16751 else
16752 asm_fprintf (stream, "[%r], #%s%d",
16753 REGNO (XEXP (x, 0)),
16754 GET_CODE (x) == POST_DEC ? "-" : "",
16755 GET_MODE_SIZE (output_memory_reference_mode));
16757 else if (GET_CODE (x) == PRE_MODIFY)
16759 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16760 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16761 asm_fprintf (stream, "#%wd]!",
16762 INTVAL (XEXP (XEXP (x, 1), 1)));
16763 else
16764 asm_fprintf (stream, "%r]!",
16765 REGNO (XEXP (XEXP (x, 1), 1)));
16767 else if (GET_CODE (x) == POST_MODIFY)
16769 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16770 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16771 asm_fprintf (stream, "#%wd",
16772 INTVAL (XEXP (XEXP (x, 1), 1)));
16773 else
16774 asm_fprintf (stream, "%r",
16775 REGNO (XEXP (XEXP (x, 1), 1)));
16777 else output_addr_const (stream, x);
16779 else
16781 if (GET_CODE (x) == REG)
16782 asm_fprintf (stream, "[%r]", REGNO (x));
16783 else if (GET_CODE (x) == POST_INC)
16784 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16785 else if (GET_CODE (x) == PLUS)
16787 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16788 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16789 asm_fprintf (stream, "[%r, #%wd]",
16790 REGNO (XEXP (x, 0)),
16791 INTVAL (XEXP (x, 1)));
16792 else
16793 asm_fprintf (stream, "[%r, %r]",
16794 REGNO (XEXP (x, 0)),
16795 REGNO (XEXP (x, 1)));
16797 else
16798 output_addr_const (stream, x);
16802 /* Target hook for indicating whether a punctuation character for
16803 TARGET_PRINT_OPERAND is valid. */
16804 static bool
16805 arm_print_operand_punct_valid_p (unsigned char code)
16807 return (code == '@' || code == '|' || code == '.'
16808 || code == '(' || code == ')' || code == '#'
16809 || (TARGET_32BIT && (code == '?'))
16810 || (TARGET_THUMB2 && (code == '!'))
16811 || (TARGET_THUMB && (code == '_')));
16814 /* Target hook for assembling integer objects. The ARM version needs to
16815 handle word-sized values specially. */
16816 static bool
16817 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16819 enum machine_mode mode;
16821 if (size == UNITS_PER_WORD && aligned_p)
16823 fputs ("\t.word\t", asm_out_file);
16824 output_addr_const (asm_out_file, x);
16826 /* Mark symbols as position independent. We only do this in the
16827 .text segment, not in the .data segment. */
16828 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16829 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16831 /* See legitimize_pic_address for an explanation of the
16832 TARGET_VXWORKS_RTP check. */
16833 if (TARGET_VXWORKS_RTP
16834 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16835 fputs ("(GOT)", asm_out_file);
16836 else
16837 fputs ("(GOTOFF)", asm_out_file);
16839 fputc ('\n', asm_out_file);
16840 return true;
16843 mode = GET_MODE (x);
16845 if (arm_vector_mode_supported_p (mode))
16847 int i, units;
16849 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16851 units = CONST_VECTOR_NUNITS (x);
16852 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16854 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16855 for (i = 0; i < units; i++)
16857 rtx elt = CONST_VECTOR_ELT (x, i);
16858 assemble_integer
16859 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16861 else
16862 for (i = 0; i < units; i++)
16864 rtx elt = CONST_VECTOR_ELT (x, i);
16865 REAL_VALUE_TYPE rval;
16867 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16869 assemble_real
16870 (rval, GET_MODE_INNER (mode),
16871 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16874 return true;
16877 return default_assemble_integer (x, size, aligned_p);
16880 static void
16881 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16883 section *s;
16885 if (!TARGET_AAPCS_BASED)
16887 (is_ctor ?
16888 default_named_section_asm_out_constructor
16889 : default_named_section_asm_out_destructor) (symbol, priority);
16890 return;
16893 /* Put these in the .init_array section, using a special relocation. */
16894 if (priority != DEFAULT_INIT_PRIORITY)
16896 char buf[18];
16897 sprintf (buf, "%s.%.5u",
16898 is_ctor ? ".init_array" : ".fini_array",
16899 priority);
16900 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16902 else if (is_ctor)
16903 s = ctors_section;
16904 else
16905 s = dtors_section;
16907 switch_to_section (s);
16908 assemble_align (POINTER_SIZE);
16909 fputs ("\t.word\t", asm_out_file);
16910 output_addr_const (asm_out_file, symbol);
16911 fputs ("(target1)\n", asm_out_file);
16914 /* Add a function to the list of static constructors. */
16916 static void
16917 arm_elf_asm_constructor (rtx symbol, int priority)
16919 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16922 /* Add a function to the list of static destructors. */
16924 static void
16925 arm_elf_asm_destructor (rtx symbol, int priority)
16927 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16930 /* A finite state machine takes care of noticing whether or not instructions
16931 can be conditionally executed, and thus decrease execution time and code
16932 size by deleting branch instructions. The fsm is controlled by
16933 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16935 /* The state of the fsm controlling condition codes are:
16936 0: normal, do nothing special
16937 1: make ASM_OUTPUT_OPCODE not output this instruction
16938 2: make ASM_OUTPUT_OPCODE not output this instruction
16939 3: make instructions conditional
16940 4: make instructions conditional
16942 State transitions (state->state by whom under condition):
16943 0 -> 1 final_prescan_insn if the `target' is a label
16944 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16945 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16946 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16947 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16948 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16949 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16950 (the target insn is arm_target_insn).
16952 If the jump clobbers the conditions then we use states 2 and 4.
16954 A similar thing can be done with conditional return insns.
16956 XXX In case the `target' is an unconditional branch, this conditionalising
16957 of the instructions always reduces code size, but not always execution
16958 time. But then, I want to reduce the code size to somewhere near what
16959 /bin/cc produces. */
16961 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16962 instructions. When a COND_EXEC instruction is seen the subsequent
16963 instructions are scanned so that multiple conditional instructions can be
16964 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16965 specify the length and true/false mask for the IT block. These will be
16966 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16968 /* Returns the index of the ARM condition code string in
16969 `arm_condition_codes'. COMPARISON should be an rtx like
16970 `(eq (...) (...))'. */
16971 static enum arm_cond_code
16972 get_arm_condition_code (rtx comparison)
16974 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16975 enum arm_cond_code code;
16976 enum rtx_code comp_code = GET_CODE (comparison);
16978 if (GET_MODE_CLASS (mode) != MODE_CC)
16979 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16980 XEXP (comparison, 1));
16982 switch (mode)
16984 case CC_DNEmode: code = ARM_NE; goto dominance;
16985 case CC_DEQmode: code = ARM_EQ; goto dominance;
16986 case CC_DGEmode: code = ARM_GE; goto dominance;
16987 case CC_DGTmode: code = ARM_GT; goto dominance;
16988 case CC_DLEmode: code = ARM_LE; goto dominance;
16989 case CC_DLTmode: code = ARM_LT; goto dominance;
16990 case CC_DGEUmode: code = ARM_CS; goto dominance;
16991 case CC_DGTUmode: code = ARM_HI; goto dominance;
16992 case CC_DLEUmode: code = ARM_LS; goto dominance;
16993 case CC_DLTUmode: code = ARM_CC;
16995 dominance:
16996 gcc_assert (comp_code == EQ || comp_code == NE);
16998 if (comp_code == EQ)
16999 return ARM_INVERSE_CONDITION_CODE (code);
17000 return code;
17002 case CC_NOOVmode:
17003 switch (comp_code)
17005 case NE: return ARM_NE;
17006 case EQ: return ARM_EQ;
17007 case GE: return ARM_PL;
17008 case LT: return ARM_MI;
17009 default: gcc_unreachable ();
17012 case CC_Zmode:
17013 switch (comp_code)
17015 case NE: return ARM_NE;
17016 case EQ: return ARM_EQ;
17017 default: gcc_unreachable ();
17020 case CC_Nmode:
17021 switch (comp_code)
17023 case NE: return ARM_MI;
17024 case EQ: return ARM_PL;
17025 default: gcc_unreachable ();
17028 case CCFPEmode:
17029 case CCFPmode:
17030 /* These encodings assume that AC=1 in the FPA system control
17031 byte. This allows us to handle all cases except UNEQ and
17032 LTGT. */
17033 switch (comp_code)
17035 case GE: return ARM_GE;
17036 case GT: return ARM_GT;
17037 case LE: return ARM_LS;
17038 case LT: return ARM_MI;
17039 case NE: return ARM_NE;
17040 case EQ: return ARM_EQ;
17041 case ORDERED: return ARM_VC;
17042 case UNORDERED: return ARM_VS;
17043 case UNLT: return ARM_LT;
17044 case UNLE: return ARM_LE;
17045 case UNGT: return ARM_HI;
17046 case UNGE: return ARM_PL;
17047 /* UNEQ and LTGT do not have a representation. */
17048 case UNEQ: /* Fall through. */
17049 case LTGT: /* Fall through. */
17050 default: gcc_unreachable ();
17053 case CC_SWPmode:
17054 switch (comp_code)
17056 case NE: return ARM_NE;
17057 case EQ: return ARM_EQ;
17058 case GE: return ARM_LE;
17059 case GT: return ARM_LT;
17060 case LE: return ARM_GE;
17061 case LT: return ARM_GT;
17062 case GEU: return ARM_LS;
17063 case GTU: return ARM_CC;
17064 case LEU: return ARM_CS;
17065 case LTU: return ARM_HI;
17066 default: gcc_unreachable ();
17069 case CC_Cmode:
17070 switch (comp_code)
17072 case LTU: return ARM_CS;
17073 case GEU: return ARM_CC;
17074 default: gcc_unreachable ();
17077 case CC_CZmode:
17078 switch (comp_code)
17080 case NE: return ARM_NE;
17081 case EQ: return ARM_EQ;
17082 case GEU: return ARM_CS;
17083 case GTU: return ARM_HI;
17084 case LEU: return ARM_LS;
17085 case LTU: return ARM_CC;
17086 default: gcc_unreachable ();
17089 case CC_NCVmode:
17090 switch (comp_code)
17092 case GE: return ARM_GE;
17093 case LT: return ARM_LT;
17094 case GEU: return ARM_CS;
17095 case LTU: return ARM_CC;
17096 default: gcc_unreachable ();
17099 case CCmode:
17100 switch (comp_code)
17102 case NE: return ARM_NE;
17103 case EQ: return ARM_EQ;
17104 case GE: return ARM_GE;
17105 case GT: return ARM_GT;
17106 case LE: return ARM_LE;
17107 case LT: return ARM_LT;
17108 case GEU: return ARM_CS;
17109 case GTU: return ARM_HI;
17110 case LEU: return ARM_LS;
17111 case LTU: return ARM_CC;
17112 default: gcc_unreachable ();
17115 default: gcc_unreachable ();
17119 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17120 instructions. */
17121 void
17122 thumb2_final_prescan_insn (rtx insn)
17124 rtx first_insn = insn;
17125 rtx body = PATTERN (insn);
17126 rtx predicate;
17127 enum arm_cond_code code;
17128 int n;
17129 int mask;
17131 /* Remove the previous insn from the count of insns to be output. */
17132 if (arm_condexec_count)
17133 arm_condexec_count--;
17135 /* Nothing to do if we are already inside a conditional block. */
17136 if (arm_condexec_count)
17137 return;
17139 if (GET_CODE (body) != COND_EXEC)
17140 return;
17142 /* Conditional jumps are implemented directly. */
17143 if (GET_CODE (insn) == JUMP_INSN)
17144 return;
17146 predicate = COND_EXEC_TEST (body);
17147 arm_current_cc = get_arm_condition_code (predicate);
17149 n = get_attr_ce_count (insn);
17150 arm_condexec_count = 1;
17151 arm_condexec_mask = (1 << n) - 1;
17152 arm_condexec_masklen = n;
17153 /* See if subsequent instructions can be combined into the same block. */
17154 for (;;)
17156 insn = next_nonnote_insn (insn);
17158 /* Jumping into the middle of an IT block is illegal, so a label or
17159 barrier terminates the block. */
17160 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17161 break;
17163 body = PATTERN (insn);
17164 /* USE and CLOBBER aren't really insns, so just skip them. */
17165 if (GET_CODE (body) == USE
17166 || GET_CODE (body) == CLOBBER)
17167 continue;
17169 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17170 if (GET_CODE (body) != COND_EXEC)
17171 break;
17172 /* Allow up to 4 conditionally executed instructions in a block. */
17173 n = get_attr_ce_count (insn);
17174 if (arm_condexec_masklen + n > 4)
17175 break;
17177 predicate = COND_EXEC_TEST (body);
17178 code = get_arm_condition_code (predicate);
17179 mask = (1 << n) - 1;
17180 if (arm_current_cc == code)
17181 arm_condexec_mask |= (mask << arm_condexec_masklen);
17182 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17183 break;
17185 arm_condexec_count++;
17186 arm_condexec_masklen += n;
17188 /* A jump must be the last instruction in a conditional block. */
17189 if (GET_CODE(insn) == JUMP_INSN)
17190 break;
17192 /* Restore recog_data (getting the attributes of other insns can
17193 destroy this array, but final.c assumes that it remains intact
17194 across this call). */
17195 extract_constrain_insn_cached (first_insn);
17198 void
17199 arm_final_prescan_insn (rtx insn)
17201 /* BODY will hold the body of INSN. */
17202 rtx body = PATTERN (insn);
17204 /* This will be 1 if trying to repeat the trick, and things need to be
17205 reversed if it appears to fail. */
17206 int reverse = 0;
17208 /* If we start with a return insn, we only succeed if we find another one. */
17209 int seeking_return = 0;
17211 /* START_INSN will hold the insn from where we start looking. This is the
17212 first insn after the following code_label if REVERSE is true. */
17213 rtx start_insn = insn;
17215 /* If in state 4, check if the target branch is reached, in order to
17216 change back to state 0. */
17217 if (arm_ccfsm_state == 4)
17219 if (insn == arm_target_insn)
17221 arm_target_insn = NULL;
17222 arm_ccfsm_state = 0;
17224 return;
17227 /* If in state 3, it is possible to repeat the trick, if this insn is an
17228 unconditional branch to a label, and immediately following this branch
17229 is the previous target label which is only used once, and the label this
17230 branch jumps to is not too far off. */
17231 if (arm_ccfsm_state == 3)
17233 if (simplejump_p (insn))
17235 start_insn = next_nonnote_insn (start_insn);
17236 if (GET_CODE (start_insn) == BARRIER)
17238 /* XXX Isn't this always a barrier? */
17239 start_insn = next_nonnote_insn (start_insn);
17241 if (GET_CODE (start_insn) == CODE_LABEL
17242 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17243 && LABEL_NUSES (start_insn) == 1)
17244 reverse = TRUE;
17245 else
17246 return;
17248 else if (GET_CODE (body) == RETURN)
17250 start_insn = next_nonnote_insn (start_insn);
17251 if (GET_CODE (start_insn) == BARRIER)
17252 start_insn = next_nonnote_insn (start_insn);
17253 if (GET_CODE (start_insn) == CODE_LABEL
17254 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17255 && LABEL_NUSES (start_insn) == 1)
17257 reverse = TRUE;
17258 seeking_return = 1;
17260 else
17261 return;
17263 else
17264 return;
17267 gcc_assert (!arm_ccfsm_state || reverse);
17268 if (GET_CODE (insn) != JUMP_INSN)
17269 return;
17271 /* This jump might be paralleled with a clobber of the condition codes
17272 the jump should always come first */
17273 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17274 body = XVECEXP (body, 0, 0);
17276 if (reverse
17277 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17278 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17280 int insns_skipped;
17281 int fail = FALSE, succeed = FALSE;
17282 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17283 int then_not_else = TRUE;
17284 rtx this_insn = start_insn, label = 0;
17286 /* Register the insn jumped to. */
17287 if (reverse)
17289 if (!seeking_return)
17290 label = XEXP (SET_SRC (body), 0);
17292 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17293 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17294 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17296 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17297 then_not_else = FALSE;
17299 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17300 seeking_return = 1;
17301 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17303 seeking_return = 1;
17304 then_not_else = FALSE;
17306 else
17307 gcc_unreachable ();
17309 /* See how many insns this branch skips, and what kind of insns. If all
17310 insns are okay, and the label or unconditional branch to the same
17311 label is not too far away, succeed. */
17312 for (insns_skipped = 0;
17313 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17315 rtx scanbody;
17317 this_insn = next_nonnote_insn (this_insn);
17318 if (!this_insn)
17319 break;
17321 switch (GET_CODE (this_insn))
17323 case CODE_LABEL:
17324 /* Succeed if it is the target label, otherwise fail since
17325 control falls in from somewhere else. */
17326 if (this_insn == label)
17328 arm_ccfsm_state = 1;
17329 succeed = TRUE;
17331 else
17332 fail = TRUE;
17333 break;
17335 case BARRIER:
17336 /* Succeed if the following insn is the target label.
17337 Otherwise fail.
17338 If return insns are used then the last insn in a function
17339 will be a barrier. */
17340 this_insn = next_nonnote_insn (this_insn);
17341 if (this_insn && this_insn == label)
17343 arm_ccfsm_state = 1;
17344 succeed = TRUE;
17346 else
17347 fail = TRUE;
17348 break;
17350 case CALL_INSN:
17351 /* The AAPCS says that conditional calls should not be
17352 used since they make interworking inefficient (the
17353 linker can't transform BL<cond> into BLX). That's
17354 only a problem if the machine has BLX. */
17355 if (arm_arch5)
17357 fail = TRUE;
17358 break;
17361 /* Succeed if the following insn is the target label, or
17362 if the following two insns are a barrier and the
17363 target label. */
17364 this_insn = next_nonnote_insn (this_insn);
17365 if (this_insn && GET_CODE (this_insn) == BARRIER)
17366 this_insn = next_nonnote_insn (this_insn);
17368 if (this_insn && this_insn == label
17369 && insns_skipped < max_insns_skipped)
17371 arm_ccfsm_state = 1;
17372 succeed = TRUE;
17374 else
17375 fail = TRUE;
17376 break;
17378 case JUMP_INSN:
17379 /* If this is an unconditional branch to the same label, succeed.
17380 If it is to another label, do nothing. If it is conditional,
17381 fail. */
17382 /* XXX Probably, the tests for SET and the PC are
17383 unnecessary. */
17385 scanbody = PATTERN (this_insn);
17386 if (GET_CODE (scanbody) == SET
17387 && GET_CODE (SET_DEST (scanbody)) == PC)
17389 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17390 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17392 arm_ccfsm_state = 2;
17393 succeed = TRUE;
17395 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17396 fail = TRUE;
17398 /* Fail if a conditional return is undesirable (e.g. on a
17399 StrongARM), but still allow this if optimizing for size. */
17400 else if (GET_CODE (scanbody) == RETURN
17401 && !use_return_insn (TRUE, NULL)
17402 && !optimize_size)
17403 fail = TRUE;
17404 else if (GET_CODE (scanbody) == RETURN
17405 && seeking_return)
17407 arm_ccfsm_state = 2;
17408 succeed = TRUE;
17410 else if (GET_CODE (scanbody) == PARALLEL)
17412 switch (get_attr_conds (this_insn))
17414 case CONDS_NOCOND:
17415 break;
17416 default:
17417 fail = TRUE;
17418 break;
17421 else
17422 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17424 break;
17426 case INSN:
17427 /* Instructions using or affecting the condition codes make it
17428 fail. */
17429 scanbody = PATTERN (this_insn);
17430 if (!(GET_CODE (scanbody) == SET
17431 || GET_CODE (scanbody) == PARALLEL)
17432 || get_attr_conds (this_insn) != CONDS_NOCOND)
17433 fail = TRUE;
17435 /* A conditional cirrus instruction must be followed by
17436 a non Cirrus instruction. However, since we
17437 conditionalize instructions in this function and by
17438 the time we get here we can't add instructions
17439 (nops), because shorten_branches() has already been
17440 called, we will disable conditionalizing Cirrus
17441 instructions to be safe. */
17442 if (GET_CODE (scanbody) != USE
17443 && GET_CODE (scanbody) != CLOBBER
17444 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17445 fail = TRUE;
17446 break;
17448 default:
17449 break;
17452 if (succeed)
17454 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17455 arm_target_label = CODE_LABEL_NUMBER (label);
17456 else
17458 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17460 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17462 this_insn = next_nonnote_insn (this_insn);
17463 gcc_assert (!this_insn
17464 || (GET_CODE (this_insn) != BARRIER
17465 && GET_CODE (this_insn) != CODE_LABEL));
17467 if (!this_insn)
17469 /* Oh, dear! we ran off the end.. give up. */
17470 extract_constrain_insn_cached (insn);
17471 arm_ccfsm_state = 0;
17472 arm_target_insn = NULL;
17473 return;
17475 arm_target_insn = this_insn;
17478 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17479 what it was. */
17480 if (!reverse)
17481 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17483 if (reverse || then_not_else)
17484 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17487 /* Restore recog_data (getting the attributes of other insns can
17488 destroy this array, but final.c assumes that it remains intact
17489 across this call. */
17490 extract_constrain_insn_cached (insn);
17494 /* Output IT instructions. */
17495 void
17496 thumb2_asm_output_opcode (FILE * stream)
17498 char buff[5];
17499 int n;
17501 if (arm_condexec_mask)
17503 for (n = 0; n < arm_condexec_masklen; n++)
17504 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17505 buff[n] = 0;
17506 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17507 arm_condition_codes[arm_current_cc]);
17508 arm_condexec_mask = 0;
17512 /* Returns true if REGNO is a valid register
17513 for holding a quantity of type MODE. */
17515 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17517 if (GET_MODE_CLASS (mode) == MODE_CC)
17518 return (regno == CC_REGNUM
17519 || (TARGET_HARD_FLOAT && TARGET_VFP
17520 && regno == VFPCC_REGNUM));
17522 if (TARGET_THUMB1)
17523 /* For the Thumb we only allow values bigger than SImode in
17524 registers 0 - 6, so that there is always a second low
17525 register available to hold the upper part of the value.
17526 We probably we ought to ensure that the register is the
17527 start of an even numbered register pair. */
17528 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17530 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17531 && IS_CIRRUS_REGNUM (regno))
17532 /* We have outlawed SI values in Cirrus registers because they
17533 reside in the lower 32 bits, but SF values reside in the
17534 upper 32 bits. This causes gcc all sorts of grief. We can't
17535 even split the registers into pairs because Cirrus SI values
17536 get sign extended to 64bits-- aldyh. */
17537 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17539 if (TARGET_HARD_FLOAT && TARGET_VFP
17540 && IS_VFP_REGNUM (regno))
17542 if (mode == SFmode || mode == SImode)
17543 return VFP_REGNO_OK_FOR_SINGLE (regno);
17545 if (mode == DFmode)
17546 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17548 /* VFP registers can hold HFmode values, but there is no point in
17549 putting them there unless we have hardware conversion insns. */
17550 if (mode == HFmode)
17551 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17553 if (TARGET_NEON)
17554 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17555 || (VALID_NEON_QREG_MODE (mode)
17556 && NEON_REGNO_OK_FOR_QUAD (regno))
17557 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17558 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17559 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17560 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17561 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17563 return FALSE;
17566 if (TARGET_REALLY_IWMMXT)
17568 if (IS_IWMMXT_GR_REGNUM (regno))
17569 return mode == SImode;
17571 if (IS_IWMMXT_REGNUM (regno))
17572 return VALID_IWMMXT_REG_MODE (mode);
17575 /* We allow almost any value to be stored in the general registers.
17576 Restrict doubleword quantities to even register pairs so that we can
17577 use ldrd. Do not allow very large Neon structure opaque modes in
17578 general registers; they would use too many. */
17579 if (regno <= LAST_ARM_REGNUM)
17580 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17581 && ARM_NUM_REGS (mode) <= 4;
17583 if (regno == FRAME_POINTER_REGNUM
17584 || regno == ARG_POINTER_REGNUM)
17585 /* We only allow integers in the fake hard registers. */
17586 return GET_MODE_CLASS (mode) == MODE_INT;
17588 /* The only registers left are the FPA registers
17589 which we only allow to hold FP values. */
17590 return (TARGET_HARD_FLOAT && TARGET_FPA
17591 && GET_MODE_CLASS (mode) == MODE_FLOAT
17592 && regno >= FIRST_FPA_REGNUM
17593 && regno <= LAST_FPA_REGNUM);
17596 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17597 not used in arm mode. */
17599 enum reg_class
17600 arm_regno_class (int regno)
17602 if (TARGET_THUMB1)
17604 if (regno == STACK_POINTER_REGNUM)
17605 return STACK_REG;
17606 if (regno == CC_REGNUM)
17607 return CC_REG;
17608 if (regno < 8)
17609 return LO_REGS;
17610 return HI_REGS;
17613 if (TARGET_THUMB2 && regno < 8)
17614 return LO_REGS;
17616 if ( regno <= LAST_ARM_REGNUM
17617 || regno == FRAME_POINTER_REGNUM
17618 || regno == ARG_POINTER_REGNUM)
17619 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17621 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17622 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17624 if (IS_CIRRUS_REGNUM (regno))
17625 return CIRRUS_REGS;
17627 if (IS_VFP_REGNUM (regno))
17629 if (regno <= D7_VFP_REGNUM)
17630 return VFP_D0_D7_REGS;
17631 else if (regno <= LAST_LO_VFP_REGNUM)
17632 return VFP_LO_REGS;
17633 else
17634 return VFP_HI_REGS;
17637 if (IS_IWMMXT_REGNUM (regno))
17638 return IWMMXT_REGS;
17640 if (IS_IWMMXT_GR_REGNUM (regno))
17641 return IWMMXT_GR_REGS;
17643 return FPA_REGS;
17646 /* Handle a special case when computing the offset
17647 of an argument from the frame pointer. */
17649 arm_debugger_arg_offset (int value, rtx addr)
17651 rtx insn;
17653 /* We are only interested if dbxout_parms() failed to compute the offset. */
17654 if (value != 0)
17655 return 0;
17657 /* We can only cope with the case where the address is held in a register. */
17658 if (GET_CODE (addr) != REG)
17659 return 0;
17661 /* If we are using the frame pointer to point at the argument, then
17662 an offset of 0 is correct. */
17663 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17664 return 0;
17666 /* If we are using the stack pointer to point at the
17667 argument, then an offset of 0 is correct. */
17668 /* ??? Check this is consistent with thumb2 frame layout. */
17669 if ((TARGET_THUMB || !frame_pointer_needed)
17670 && REGNO (addr) == SP_REGNUM)
17671 return 0;
17673 /* Oh dear. The argument is pointed to by a register rather
17674 than being held in a register, or being stored at a known
17675 offset from the frame pointer. Since GDB only understands
17676 those two kinds of argument we must translate the address
17677 held in the register into an offset from the frame pointer.
17678 We do this by searching through the insns for the function
17679 looking to see where this register gets its value. If the
17680 register is initialized from the frame pointer plus an offset
17681 then we are in luck and we can continue, otherwise we give up.
17683 This code is exercised by producing debugging information
17684 for a function with arguments like this:
17686 double func (double a, double b, int c, double d) {return d;}
17688 Without this code the stab for parameter 'd' will be set to
17689 an offset of 0 from the frame pointer, rather than 8. */
17691 /* The if() statement says:
17693 If the insn is a normal instruction
17694 and if the insn is setting the value in a register
17695 and if the register being set is the register holding the address of the argument
17696 and if the address is computing by an addition
17697 that involves adding to a register
17698 which is the frame pointer
17699 a constant integer
17701 then... */
17703 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17705 if ( GET_CODE (insn) == INSN
17706 && GET_CODE (PATTERN (insn)) == SET
17707 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17708 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17709 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17710 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17711 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17714 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17716 break;
17720 if (value == 0)
17722 debug_rtx (addr);
17723 warning (0, "unable to compute real location of stacked parameter");
17724 value = 8; /* XXX magic hack */
17727 return value;
17730 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17731 do \
17733 if ((MASK) & insn_flags) \
17734 add_builtin_function ((NAME), (TYPE), (CODE), \
17735 BUILT_IN_MD, NULL, NULL_TREE); \
17737 while (0)
17739 struct builtin_description
17741 const unsigned int mask;
17742 const enum insn_code icode;
17743 const char * const name;
17744 const enum arm_builtins code;
17745 const enum rtx_code comparison;
17746 const unsigned int flag;
17749 static const struct builtin_description bdesc_2arg[] =
17751 #define IWMMXT_BUILTIN(code, string, builtin) \
17752 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17753 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17755 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17756 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17757 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17758 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17759 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17760 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17761 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17762 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17763 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17764 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17765 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17766 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17767 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17768 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17769 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17770 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17771 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17772 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17773 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17774 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17775 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17776 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17777 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17778 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17779 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17780 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17781 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17782 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17783 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17784 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17785 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17786 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17787 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17788 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17789 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17790 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17791 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17792 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17793 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17794 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17795 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17796 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17797 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17798 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17799 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17800 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17801 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17802 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17803 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17804 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17805 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17806 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17807 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17808 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17809 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17810 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17811 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17812 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17814 #define IWMMXT_BUILTIN2(code, builtin) \
17815 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17817 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17818 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17819 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17820 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17821 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17822 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17823 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17824 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17825 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17826 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17827 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17828 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17829 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17830 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17831 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17832 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17833 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17834 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17835 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17836 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17837 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17838 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17839 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17840 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17841 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17842 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17843 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17844 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17845 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17846 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17847 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17848 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17851 static const struct builtin_description bdesc_1arg[] =
17853 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17854 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17855 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17856 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17857 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17858 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17859 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17860 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17861 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17862 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17863 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17864 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17865 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17866 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17867 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17868 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17869 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17870 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17873 /* Set up all the iWMMXt builtins. This is
17874 not called if TARGET_IWMMXT is zero. */
17876 static void
17877 arm_init_iwmmxt_builtins (void)
17879 const struct builtin_description * d;
17880 size_t i;
17881 tree endlink = void_list_node;
17883 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17884 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17885 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17887 tree int_ftype_int
17888 = build_function_type (integer_type_node,
17889 tree_cons (NULL_TREE, integer_type_node, endlink));
17890 tree v8qi_ftype_v8qi_v8qi_int
17891 = build_function_type (V8QI_type_node,
17892 tree_cons (NULL_TREE, V8QI_type_node,
17893 tree_cons (NULL_TREE, V8QI_type_node,
17894 tree_cons (NULL_TREE,
17895 integer_type_node,
17896 endlink))));
17897 tree v4hi_ftype_v4hi_int
17898 = build_function_type (V4HI_type_node,
17899 tree_cons (NULL_TREE, V4HI_type_node,
17900 tree_cons (NULL_TREE, integer_type_node,
17901 endlink)));
17902 tree v2si_ftype_v2si_int
17903 = build_function_type (V2SI_type_node,
17904 tree_cons (NULL_TREE, V2SI_type_node,
17905 tree_cons (NULL_TREE, integer_type_node,
17906 endlink)));
17907 tree v2si_ftype_di_di
17908 = build_function_type (V2SI_type_node,
17909 tree_cons (NULL_TREE, long_long_integer_type_node,
17910 tree_cons (NULL_TREE, long_long_integer_type_node,
17911 endlink)));
17912 tree di_ftype_di_int
17913 = build_function_type (long_long_integer_type_node,
17914 tree_cons (NULL_TREE, long_long_integer_type_node,
17915 tree_cons (NULL_TREE, integer_type_node,
17916 endlink)));
17917 tree di_ftype_di_int_int
17918 = build_function_type (long_long_integer_type_node,
17919 tree_cons (NULL_TREE, long_long_integer_type_node,
17920 tree_cons (NULL_TREE, integer_type_node,
17921 tree_cons (NULL_TREE,
17922 integer_type_node,
17923 endlink))));
17924 tree int_ftype_v8qi
17925 = build_function_type (integer_type_node,
17926 tree_cons (NULL_TREE, V8QI_type_node,
17927 endlink));
17928 tree int_ftype_v4hi
17929 = build_function_type (integer_type_node,
17930 tree_cons (NULL_TREE, V4HI_type_node,
17931 endlink));
17932 tree int_ftype_v2si
17933 = build_function_type (integer_type_node,
17934 tree_cons (NULL_TREE, V2SI_type_node,
17935 endlink));
17936 tree int_ftype_v8qi_int
17937 = build_function_type (integer_type_node,
17938 tree_cons (NULL_TREE, V8QI_type_node,
17939 tree_cons (NULL_TREE, integer_type_node,
17940 endlink)));
17941 tree int_ftype_v4hi_int
17942 = build_function_type (integer_type_node,
17943 tree_cons (NULL_TREE, V4HI_type_node,
17944 tree_cons (NULL_TREE, integer_type_node,
17945 endlink)));
17946 tree int_ftype_v2si_int
17947 = build_function_type (integer_type_node,
17948 tree_cons (NULL_TREE, V2SI_type_node,
17949 tree_cons (NULL_TREE, integer_type_node,
17950 endlink)));
17951 tree v8qi_ftype_v8qi_int_int
17952 = build_function_type (V8QI_type_node,
17953 tree_cons (NULL_TREE, V8QI_type_node,
17954 tree_cons (NULL_TREE, integer_type_node,
17955 tree_cons (NULL_TREE,
17956 integer_type_node,
17957 endlink))));
17958 tree v4hi_ftype_v4hi_int_int
17959 = build_function_type (V4HI_type_node,
17960 tree_cons (NULL_TREE, V4HI_type_node,
17961 tree_cons (NULL_TREE, integer_type_node,
17962 tree_cons (NULL_TREE,
17963 integer_type_node,
17964 endlink))));
17965 tree v2si_ftype_v2si_int_int
17966 = build_function_type (V2SI_type_node,
17967 tree_cons (NULL_TREE, V2SI_type_node,
17968 tree_cons (NULL_TREE, integer_type_node,
17969 tree_cons (NULL_TREE,
17970 integer_type_node,
17971 endlink))));
17972 /* Miscellaneous. */
17973 tree v8qi_ftype_v4hi_v4hi
17974 = build_function_type (V8QI_type_node,
17975 tree_cons (NULL_TREE, V4HI_type_node,
17976 tree_cons (NULL_TREE, V4HI_type_node,
17977 endlink)));
17978 tree v4hi_ftype_v2si_v2si
17979 = build_function_type (V4HI_type_node,
17980 tree_cons (NULL_TREE, V2SI_type_node,
17981 tree_cons (NULL_TREE, V2SI_type_node,
17982 endlink)));
17983 tree v2si_ftype_v4hi_v4hi
17984 = build_function_type (V2SI_type_node,
17985 tree_cons (NULL_TREE, V4HI_type_node,
17986 tree_cons (NULL_TREE, V4HI_type_node,
17987 endlink)));
17988 tree v2si_ftype_v8qi_v8qi
17989 = build_function_type (V2SI_type_node,
17990 tree_cons (NULL_TREE, V8QI_type_node,
17991 tree_cons (NULL_TREE, V8QI_type_node,
17992 endlink)));
17993 tree v4hi_ftype_v4hi_di
17994 = build_function_type (V4HI_type_node,
17995 tree_cons (NULL_TREE, V4HI_type_node,
17996 tree_cons (NULL_TREE,
17997 long_long_integer_type_node,
17998 endlink)));
17999 tree v2si_ftype_v2si_di
18000 = build_function_type (V2SI_type_node,
18001 tree_cons (NULL_TREE, V2SI_type_node,
18002 tree_cons (NULL_TREE,
18003 long_long_integer_type_node,
18004 endlink)));
18005 tree void_ftype_int_int
18006 = build_function_type (void_type_node,
18007 tree_cons (NULL_TREE, integer_type_node,
18008 tree_cons (NULL_TREE, integer_type_node,
18009 endlink)));
18010 tree di_ftype_void
18011 = build_function_type (long_long_unsigned_type_node, endlink);
18012 tree di_ftype_v8qi
18013 = build_function_type (long_long_integer_type_node,
18014 tree_cons (NULL_TREE, V8QI_type_node,
18015 endlink));
18016 tree di_ftype_v4hi
18017 = build_function_type (long_long_integer_type_node,
18018 tree_cons (NULL_TREE, V4HI_type_node,
18019 endlink));
18020 tree di_ftype_v2si
18021 = build_function_type (long_long_integer_type_node,
18022 tree_cons (NULL_TREE, V2SI_type_node,
18023 endlink));
18024 tree v2si_ftype_v4hi
18025 = build_function_type (V2SI_type_node,
18026 tree_cons (NULL_TREE, V4HI_type_node,
18027 endlink));
18028 tree v4hi_ftype_v8qi
18029 = build_function_type (V4HI_type_node,
18030 tree_cons (NULL_TREE, V8QI_type_node,
18031 endlink));
18033 tree di_ftype_di_v4hi_v4hi
18034 = build_function_type (long_long_unsigned_type_node,
18035 tree_cons (NULL_TREE,
18036 long_long_unsigned_type_node,
18037 tree_cons (NULL_TREE, V4HI_type_node,
18038 tree_cons (NULL_TREE,
18039 V4HI_type_node,
18040 endlink))));
18042 tree di_ftype_v4hi_v4hi
18043 = build_function_type (long_long_unsigned_type_node,
18044 tree_cons (NULL_TREE, V4HI_type_node,
18045 tree_cons (NULL_TREE, V4HI_type_node,
18046 endlink)));
18048 /* Normal vector binops. */
18049 tree v8qi_ftype_v8qi_v8qi
18050 = build_function_type (V8QI_type_node,
18051 tree_cons (NULL_TREE, V8QI_type_node,
18052 tree_cons (NULL_TREE, V8QI_type_node,
18053 endlink)));
18054 tree v4hi_ftype_v4hi_v4hi
18055 = build_function_type (V4HI_type_node,
18056 tree_cons (NULL_TREE, V4HI_type_node,
18057 tree_cons (NULL_TREE, V4HI_type_node,
18058 endlink)));
18059 tree v2si_ftype_v2si_v2si
18060 = build_function_type (V2SI_type_node,
18061 tree_cons (NULL_TREE, V2SI_type_node,
18062 tree_cons (NULL_TREE, V2SI_type_node,
18063 endlink)));
18064 tree di_ftype_di_di
18065 = build_function_type (long_long_unsigned_type_node,
18066 tree_cons (NULL_TREE, long_long_unsigned_type_node,
18067 tree_cons (NULL_TREE,
18068 long_long_unsigned_type_node,
18069 endlink)));
18071 /* Add all builtins that are more or less simple operations on two
18072 operands. */
18073 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18075 /* Use one of the operands; the target can have a different mode for
18076 mask-generating compares. */
18077 enum machine_mode mode;
18078 tree type;
18080 if (d->name == 0)
18081 continue;
18083 mode = insn_data[d->icode].operand[1].mode;
18085 switch (mode)
18087 case V8QImode:
18088 type = v8qi_ftype_v8qi_v8qi;
18089 break;
18090 case V4HImode:
18091 type = v4hi_ftype_v4hi_v4hi;
18092 break;
18093 case V2SImode:
18094 type = v2si_ftype_v2si_v2si;
18095 break;
18096 case DImode:
18097 type = di_ftype_di_di;
18098 break;
18100 default:
18101 gcc_unreachable ();
18104 def_mbuiltin (d->mask, d->name, type, d->code);
18107 /* Add the remaining MMX insns with somewhat more complicated types. */
18108 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18109 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18110 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18112 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18113 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18114 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18115 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18116 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18117 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18119 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18120 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18121 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18122 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18123 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18124 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18126 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18127 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18128 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18129 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18130 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18131 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18133 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18134 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18135 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18136 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18137 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18138 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18140 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18142 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18143 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18144 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18145 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18147 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18148 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18149 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18150 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18151 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18152 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18153 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18154 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18155 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18157 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18158 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18159 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18161 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18162 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18163 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18165 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18166 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18167 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18168 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18169 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18170 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18172 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18173 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18174 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18175 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18176 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18177 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18178 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18179 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18180 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18181 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18182 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18183 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18185 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18186 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18187 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18188 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18190 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18191 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18192 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18193 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18194 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18195 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18196 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18199 static void
18200 arm_init_tls_builtins (void)
18202 tree ftype, decl;
18204 ftype = build_function_type (ptr_type_node, void_list_node);
18205 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18206 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18207 NULL, NULL_TREE);
18208 TREE_NOTHROW (decl) = 1;
18209 TREE_READONLY (decl) = 1;
18212 enum neon_builtin_type_bits {
18213 T_V8QI = 0x0001,
18214 T_V4HI = 0x0002,
18215 T_V2SI = 0x0004,
18216 T_V2SF = 0x0008,
18217 T_DI = 0x0010,
18218 T_V16QI = 0x0020,
18219 T_V8HI = 0x0040,
18220 T_V4SI = 0x0080,
18221 T_V4SF = 0x0100,
18222 T_V2DI = 0x0200,
18223 T_TI = 0x0400,
18224 T_EI = 0x0800,
18225 T_OI = 0x1000
18228 #define v8qi_UP T_V8QI
18229 #define v4hi_UP T_V4HI
18230 #define v2si_UP T_V2SI
18231 #define v2sf_UP T_V2SF
18232 #define di_UP T_DI
18233 #define v16qi_UP T_V16QI
18234 #define v8hi_UP T_V8HI
18235 #define v4si_UP T_V4SI
18236 #define v4sf_UP T_V4SF
18237 #define v2di_UP T_V2DI
18238 #define ti_UP T_TI
18239 #define ei_UP T_EI
18240 #define oi_UP T_OI
18242 #define UP(X) X##_UP
18244 #define T_MAX 13
18246 typedef enum {
18247 NEON_BINOP,
18248 NEON_TERNOP,
18249 NEON_UNOP,
18250 NEON_GETLANE,
18251 NEON_SETLANE,
18252 NEON_CREATE,
18253 NEON_DUP,
18254 NEON_DUPLANE,
18255 NEON_COMBINE,
18256 NEON_SPLIT,
18257 NEON_LANEMUL,
18258 NEON_LANEMULL,
18259 NEON_LANEMULH,
18260 NEON_LANEMAC,
18261 NEON_SCALARMUL,
18262 NEON_SCALARMULL,
18263 NEON_SCALARMULH,
18264 NEON_SCALARMAC,
18265 NEON_CONVERT,
18266 NEON_FIXCONV,
18267 NEON_SELECT,
18268 NEON_RESULTPAIR,
18269 NEON_REINTERP,
18270 NEON_VTBL,
18271 NEON_VTBX,
18272 NEON_LOAD1,
18273 NEON_LOAD1LANE,
18274 NEON_STORE1,
18275 NEON_STORE1LANE,
18276 NEON_LOADSTRUCT,
18277 NEON_LOADSTRUCTLANE,
18278 NEON_STORESTRUCT,
18279 NEON_STORESTRUCTLANE,
18280 NEON_LOGICBINOP,
18281 NEON_SHIFTINSERT,
18282 NEON_SHIFTIMM,
18283 NEON_SHIFTACC
18284 } neon_itype;
18286 typedef struct {
18287 const char *name;
18288 const neon_itype itype;
18289 const int bits;
18290 const enum insn_code codes[T_MAX];
18291 const unsigned int num_vars;
18292 unsigned int base_fcode;
18293 } neon_builtin_datum;
18295 #define CF(N,X) CODE_FOR_neon_##N##X
18297 #define VAR1(T, N, A) \
18298 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18299 #define VAR2(T, N, A, B) \
18300 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18301 #define VAR3(T, N, A, B, C) \
18302 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18303 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18304 #define VAR4(T, N, A, B, C, D) \
18305 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18306 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18307 #define VAR5(T, N, A, B, C, D, E) \
18308 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18309 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18310 #define VAR6(T, N, A, B, C, D, E, F) \
18311 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18312 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18313 #define VAR7(T, N, A, B, C, D, E, F, G) \
18314 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18315 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18316 CF (N, G) }, 7, 0
18317 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18318 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18319 | UP (H), \
18320 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18321 CF (N, G), CF (N, H) }, 8, 0
18322 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18323 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18324 | UP (H) | UP (I), \
18325 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18326 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18327 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18328 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18329 | UP (H) | UP (I) | UP (J), \
18330 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18331 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18333 /* The mode entries in the following table correspond to the "key" type of the
18334 instruction variant, i.e. equivalent to that which would be specified after
18335 the assembler mnemonic, which usually refers to the last vector operand.
18336 (Signed/unsigned/polynomial types are not differentiated between though, and
18337 are all mapped onto the same mode for a given element size.) The modes
18338 listed per instruction should be the same as those defined for that
18339 instruction's pattern in neon.md.
18340 WARNING: Variants should be listed in the same increasing order as
18341 neon_builtin_type_bits. */
18343 static neon_builtin_datum neon_builtin_data[] =
18345 { VAR10 (BINOP, vadd,
18346 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18347 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18348 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18349 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18350 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18351 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18352 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18353 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18354 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18355 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18356 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18357 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18358 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18359 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18360 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18361 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18362 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18363 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18364 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18365 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18366 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18367 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18368 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18369 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18370 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18371 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18372 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18373 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18374 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18375 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18376 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18377 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18378 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18379 { VAR10 (BINOP, vsub,
18380 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18381 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18382 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18383 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18384 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18385 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18386 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18387 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18388 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18389 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18390 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18391 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18392 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18393 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18394 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18395 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18396 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18397 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18398 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18399 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18400 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18401 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18402 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18403 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18404 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18405 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18406 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18407 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18408 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18409 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18410 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18411 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18412 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18413 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18414 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18415 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18416 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18417 /* FIXME: vget_lane supports more variants than this! */
18418 { VAR10 (GETLANE, vget_lane,
18419 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18420 { VAR10 (SETLANE, vset_lane,
18421 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18422 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18423 { VAR10 (DUP, vdup_n,
18424 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18425 { VAR10 (DUPLANE, vdup_lane,
18426 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18427 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18428 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18429 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18430 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18431 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18432 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18433 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18434 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18435 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18436 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18437 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18438 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18439 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18440 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18441 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18442 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18443 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18444 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18445 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18446 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18447 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18448 { VAR10 (BINOP, vext,
18449 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18450 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18451 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18452 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18453 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18454 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18455 { VAR10 (SELECT, vbsl,
18456 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18457 { VAR1 (VTBL, vtbl1, v8qi) },
18458 { VAR1 (VTBL, vtbl2, v8qi) },
18459 { VAR1 (VTBL, vtbl3, v8qi) },
18460 { VAR1 (VTBL, vtbl4, v8qi) },
18461 { VAR1 (VTBX, vtbx1, v8qi) },
18462 { VAR1 (VTBX, vtbx2, v8qi) },
18463 { VAR1 (VTBX, vtbx3, v8qi) },
18464 { VAR1 (VTBX, vtbx4, v8qi) },
18465 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18466 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18467 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18468 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18469 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18470 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18471 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18472 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18473 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18474 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18475 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18476 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18477 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18478 { VAR10 (LOAD1, vld1,
18479 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18480 { VAR10 (LOAD1LANE, vld1_lane,
18481 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18482 { VAR10 (LOAD1, vld1_dup,
18483 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18484 { VAR10 (STORE1, vst1,
18485 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18486 { VAR10 (STORE1LANE, vst1_lane,
18487 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18488 { VAR9 (LOADSTRUCT,
18489 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18490 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18491 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18492 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18493 { VAR9 (STORESTRUCT, vst2,
18494 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18495 { VAR7 (STORESTRUCTLANE, vst2_lane,
18496 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18497 { VAR9 (LOADSTRUCT,
18498 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18499 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18500 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18501 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18502 { VAR9 (STORESTRUCT, vst3,
18503 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18504 { VAR7 (STORESTRUCTLANE, vst3_lane,
18505 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18506 { VAR9 (LOADSTRUCT, vld4,
18507 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18508 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18509 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18510 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18511 { VAR9 (STORESTRUCT, vst4,
18512 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18513 { VAR7 (STORESTRUCTLANE, vst4_lane,
18514 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18515 { VAR10 (LOGICBINOP, vand,
18516 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18517 { VAR10 (LOGICBINOP, vorr,
18518 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18519 { VAR10 (BINOP, veor,
18520 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18521 { VAR10 (LOGICBINOP, vbic,
18522 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18523 { VAR10 (LOGICBINOP, vorn,
18524 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18527 #undef CF
18528 #undef VAR1
18529 #undef VAR2
18530 #undef VAR3
18531 #undef VAR4
18532 #undef VAR5
18533 #undef VAR6
18534 #undef VAR7
18535 #undef VAR8
18536 #undef VAR9
18537 #undef VAR10
18539 static void
18540 arm_init_neon_builtins (void)
18542 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18544 tree neon_intQI_type_node;
18545 tree neon_intHI_type_node;
18546 tree neon_polyQI_type_node;
18547 tree neon_polyHI_type_node;
18548 tree neon_intSI_type_node;
18549 tree neon_intDI_type_node;
18550 tree neon_float_type_node;
18552 tree intQI_pointer_node;
18553 tree intHI_pointer_node;
18554 tree intSI_pointer_node;
18555 tree intDI_pointer_node;
18556 tree float_pointer_node;
18558 tree const_intQI_node;
18559 tree const_intHI_node;
18560 tree const_intSI_node;
18561 tree const_intDI_node;
18562 tree const_float_node;
18564 tree const_intQI_pointer_node;
18565 tree const_intHI_pointer_node;
18566 tree const_intSI_pointer_node;
18567 tree const_intDI_pointer_node;
18568 tree const_float_pointer_node;
18570 tree V8QI_type_node;
18571 tree V4HI_type_node;
18572 tree V2SI_type_node;
18573 tree V2SF_type_node;
18574 tree V16QI_type_node;
18575 tree V8HI_type_node;
18576 tree V4SI_type_node;
18577 tree V4SF_type_node;
18578 tree V2DI_type_node;
18580 tree intUQI_type_node;
18581 tree intUHI_type_node;
18582 tree intUSI_type_node;
18583 tree intUDI_type_node;
18585 tree intEI_type_node;
18586 tree intOI_type_node;
18587 tree intCI_type_node;
18588 tree intXI_type_node;
18590 tree V8QI_pointer_node;
18591 tree V4HI_pointer_node;
18592 tree V2SI_pointer_node;
18593 tree V2SF_pointer_node;
18594 tree V16QI_pointer_node;
18595 tree V8HI_pointer_node;
18596 tree V4SI_pointer_node;
18597 tree V4SF_pointer_node;
18598 tree V2DI_pointer_node;
18600 tree void_ftype_pv8qi_v8qi_v8qi;
18601 tree void_ftype_pv4hi_v4hi_v4hi;
18602 tree void_ftype_pv2si_v2si_v2si;
18603 tree void_ftype_pv2sf_v2sf_v2sf;
18604 tree void_ftype_pdi_di_di;
18605 tree void_ftype_pv16qi_v16qi_v16qi;
18606 tree void_ftype_pv8hi_v8hi_v8hi;
18607 tree void_ftype_pv4si_v4si_v4si;
18608 tree void_ftype_pv4sf_v4sf_v4sf;
18609 tree void_ftype_pv2di_v2di_v2di;
18611 tree reinterp_ftype_dreg[5][5];
18612 tree reinterp_ftype_qreg[5][5];
18613 tree dreg_types[5], qreg_types[5];
18615 /* Create distinguished type nodes for NEON vector element types,
18616 and pointers to values of such types, so we can detect them later. */
18617 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18618 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18619 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18620 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18621 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18622 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18623 neon_float_type_node = make_node (REAL_TYPE);
18624 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18625 layout_type (neon_float_type_node);
18627 /* Define typedefs which exactly correspond to the modes we are basing vector
18628 types on. If you change these names you'll need to change
18629 the table used by arm_mangle_type too. */
18630 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18631 "__builtin_neon_qi");
18632 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18633 "__builtin_neon_hi");
18634 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18635 "__builtin_neon_si");
18636 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18637 "__builtin_neon_sf");
18638 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18639 "__builtin_neon_di");
18640 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18641 "__builtin_neon_poly8");
18642 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18643 "__builtin_neon_poly16");
18645 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18646 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18647 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18648 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18649 float_pointer_node = build_pointer_type (neon_float_type_node);
18651 /* Next create constant-qualified versions of the above types. */
18652 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18653 TYPE_QUAL_CONST);
18654 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18655 TYPE_QUAL_CONST);
18656 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18657 TYPE_QUAL_CONST);
18658 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18659 TYPE_QUAL_CONST);
18660 const_float_node = build_qualified_type (neon_float_type_node,
18661 TYPE_QUAL_CONST);
18663 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18664 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18665 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18666 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18667 const_float_pointer_node = build_pointer_type (const_float_node);
18669 /* Now create vector types based on our NEON element types. */
18670 /* 64-bit vectors. */
18671 V8QI_type_node =
18672 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18673 V4HI_type_node =
18674 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18675 V2SI_type_node =
18676 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18677 V2SF_type_node =
18678 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18679 /* 128-bit vectors. */
18680 V16QI_type_node =
18681 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18682 V8HI_type_node =
18683 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18684 V4SI_type_node =
18685 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18686 V4SF_type_node =
18687 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18688 V2DI_type_node =
18689 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18691 /* Unsigned integer types for various mode sizes. */
18692 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18693 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18694 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18695 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18697 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18698 "__builtin_neon_uqi");
18699 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18700 "__builtin_neon_uhi");
18701 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18702 "__builtin_neon_usi");
18703 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18704 "__builtin_neon_udi");
18706 /* Opaque integer types for structures of vectors. */
18707 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18708 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18709 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18710 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18712 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18713 "__builtin_neon_ti");
18714 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18715 "__builtin_neon_ei");
18716 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18717 "__builtin_neon_oi");
18718 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18719 "__builtin_neon_ci");
18720 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18721 "__builtin_neon_xi");
18723 /* Pointers to vector types. */
18724 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18725 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18726 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18727 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18728 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18729 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18730 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18731 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18732 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18734 /* Operations which return results as pairs. */
18735 void_ftype_pv8qi_v8qi_v8qi =
18736 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18737 V8QI_type_node, NULL);
18738 void_ftype_pv4hi_v4hi_v4hi =
18739 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18740 V4HI_type_node, NULL);
18741 void_ftype_pv2si_v2si_v2si =
18742 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18743 V2SI_type_node, NULL);
18744 void_ftype_pv2sf_v2sf_v2sf =
18745 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18746 V2SF_type_node, NULL);
18747 void_ftype_pdi_di_di =
18748 build_function_type_list (void_type_node, intDI_pointer_node,
18749 neon_intDI_type_node, neon_intDI_type_node, NULL);
18750 void_ftype_pv16qi_v16qi_v16qi =
18751 build_function_type_list (void_type_node, V16QI_pointer_node,
18752 V16QI_type_node, V16QI_type_node, NULL);
18753 void_ftype_pv8hi_v8hi_v8hi =
18754 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18755 V8HI_type_node, NULL);
18756 void_ftype_pv4si_v4si_v4si =
18757 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18758 V4SI_type_node, NULL);
18759 void_ftype_pv4sf_v4sf_v4sf =
18760 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18761 V4SF_type_node, NULL);
18762 void_ftype_pv2di_v2di_v2di =
18763 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18764 V2DI_type_node, NULL);
18766 dreg_types[0] = V8QI_type_node;
18767 dreg_types[1] = V4HI_type_node;
18768 dreg_types[2] = V2SI_type_node;
18769 dreg_types[3] = V2SF_type_node;
18770 dreg_types[4] = neon_intDI_type_node;
18772 qreg_types[0] = V16QI_type_node;
18773 qreg_types[1] = V8HI_type_node;
18774 qreg_types[2] = V4SI_type_node;
18775 qreg_types[3] = V4SF_type_node;
18776 qreg_types[4] = V2DI_type_node;
18778 for (i = 0; i < 5; i++)
18780 int j;
18781 for (j = 0; j < 5; j++)
18783 reinterp_ftype_dreg[i][j]
18784 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18785 reinterp_ftype_qreg[i][j]
18786 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18790 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18792 neon_builtin_datum *d = &neon_builtin_data[i];
18793 unsigned int j, codeidx = 0;
18795 d->base_fcode = fcode;
18797 for (j = 0; j < T_MAX; j++)
18799 const char* const modenames[] = {
18800 "v8qi", "v4hi", "v2si", "v2sf", "di",
18801 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18803 char namebuf[60];
18804 tree ftype = NULL;
18805 enum insn_code icode;
18806 int is_load = 0, is_store = 0;
18808 if ((d->bits & (1 << j)) == 0)
18809 continue;
18811 icode = d->codes[codeidx++];
18813 switch (d->itype)
18815 case NEON_LOAD1:
18816 case NEON_LOAD1LANE:
18817 case NEON_LOADSTRUCT:
18818 case NEON_LOADSTRUCTLANE:
18819 is_load = 1;
18820 /* Fall through. */
18821 case NEON_STORE1:
18822 case NEON_STORE1LANE:
18823 case NEON_STORESTRUCT:
18824 case NEON_STORESTRUCTLANE:
18825 if (!is_load)
18826 is_store = 1;
18827 /* Fall through. */
18828 case NEON_UNOP:
18829 case NEON_BINOP:
18830 case NEON_LOGICBINOP:
18831 case NEON_SHIFTINSERT:
18832 case NEON_TERNOP:
18833 case NEON_GETLANE:
18834 case NEON_SETLANE:
18835 case NEON_CREATE:
18836 case NEON_DUP:
18837 case NEON_DUPLANE:
18838 case NEON_SHIFTIMM:
18839 case NEON_SHIFTACC:
18840 case NEON_COMBINE:
18841 case NEON_SPLIT:
18842 case NEON_CONVERT:
18843 case NEON_FIXCONV:
18844 case NEON_LANEMUL:
18845 case NEON_LANEMULL:
18846 case NEON_LANEMULH:
18847 case NEON_LANEMAC:
18848 case NEON_SCALARMUL:
18849 case NEON_SCALARMULL:
18850 case NEON_SCALARMULH:
18851 case NEON_SCALARMAC:
18852 case NEON_SELECT:
18853 case NEON_VTBL:
18854 case NEON_VTBX:
18856 int k;
18857 tree return_type = void_type_node, args = void_list_node;
18859 /* Build a function type directly from the insn_data for this
18860 builtin. The build_function_type() function takes care of
18861 removing duplicates for us. */
18862 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18864 tree eltype;
18866 if (is_load && k == 1)
18868 /* Neon load patterns always have the memory operand
18869 (a SImode pointer) in the operand 1 position. We
18870 want a const pointer to the element type in that
18871 position. */
18872 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18874 switch (1 << j)
18876 case T_V8QI:
18877 case T_V16QI:
18878 eltype = const_intQI_pointer_node;
18879 break;
18881 case T_V4HI:
18882 case T_V8HI:
18883 eltype = const_intHI_pointer_node;
18884 break;
18886 case T_V2SI:
18887 case T_V4SI:
18888 eltype = const_intSI_pointer_node;
18889 break;
18891 case T_V2SF:
18892 case T_V4SF:
18893 eltype = const_float_pointer_node;
18894 break;
18896 case T_DI:
18897 case T_V2DI:
18898 eltype = const_intDI_pointer_node;
18899 break;
18901 default: gcc_unreachable ();
18904 else if (is_store && k == 0)
18906 /* Similarly, Neon store patterns use operand 0 as
18907 the memory location to store to (a SImode pointer).
18908 Use a pointer to the element type of the store in
18909 that position. */
18910 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18912 switch (1 << j)
18914 case T_V8QI:
18915 case T_V16QI:
18916 eltype = intQI_pointer_node;
18917 break;
18919 case T_V4HI:
18920 case T_V8HI:
18921 eltype = intHI_pointer_node;
18922 break;
18924 case T_V2SI:
18925 case T_V4SI:
18926 eltype = intSI_pointer_node;
18927 break;
18929 case T_V2SF:
18930 case T_V4SF:
18931 eltype = float_pointer_node;
18932 break;
18934 case T_DI:
18935 case T_V2DI:
18936 eltype = intDI_pointer_node;
18937 break;
18939 default: gcc_unreachable ();
18942 else
18944 switch (insn_data[icode].operand[k].mode)
18946 case VOIDmode: eltype = void_type_node; break;
18947 /* Scalars. */
18948 case QImode: eltype = neon_intQI_type_node; break;
18949 case HImode: eltype = neon_intHI_type_node; break;
18950 case SImode: eltype = neon_intSI_type_node; break;
18951 case SFmode: eltype = neon_float_type_node; break;
18952 case DImode: eltype = neon_intDI_type_node; break;
18953 case TImode: eltype = intTI_type_node; break;
18954 case EImode: eltype = intEI_type_node; break;
18955 case OImode: eltype = intOI_type_node; break;
18956 case CImode: eltype = intCI_type_node; break;
18957 case XImode: eltype = intXI_type_node; break;
18958 /* 64-bit vectors. */
18959 case V8QImode: eltype = V8QI_type_node; break;
18960 case V4HImode: eltype = V4HI_type_node; break;
18961 case V2SImode: eltype = V2SI_type_node; break;
18962 case V2SFmode: eltype = V2SF_type_node; break;
18963 /* 128-bit vectors. */
18964 case V16QImode: eltype = V16QI_type_node; break;
18965 case V8HImode: eltype = V8HI_type_node; break;
18966 case V4SImode: eltype = V4SI_type_node; break;
18967 case V4SFmode: eltype = V4SF_type_node; break;
18968 case V2DImode: eltype = V2DI_type_node; break;
18969 default: gcc_unreachable ();
18973 if (k == 0 && !is_store)
18974 return_type = eltype;
18975 else
18976 args = tree_cons (NULL_TREE, eltype, args);
18979 ftype = build_function_type (return_type, args);
18981 break;
18983 case NEON_RESULTPAIR:
18985 switch (insn_data[icode].operand[1].mode)
18987 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18988 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18989 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18990 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18991 case DImode: ftype = void_ftype_pdi_di_di; break;
18992 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18993 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18994 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18995 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18996 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18997 default: gcc_unreachable ();
19000 break;
19002 case NEON_REINTERP:
19004 /* We iterate over 5 doubleword types, then 5 quadword
19005 types. */
19006 int rhs = j % 5;
19007 switch (insn_data[icode].operand[0].mode)
19009 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19010 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19011 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19012 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19013 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19014 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19015 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19016 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19017 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19018 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19019 default: gcc_unreachable ();
19022 break;
19024 default:
19025 gcc_unreachable ();
19028 gcc_assert (ftype != NULL);
19030 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
19032 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
19033 NULL_TREE);
19038 static void
19039 arm_init_fp16_builtins (void)
19041 tree fp16_type = make_node (REAL_TYPE);
19042 TYPE_PRECISION (fp16_type) = 16;
19043 layout_type (fp16_type);
19044 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19047 static void
19048 arm_init_builtins (void)
19050 arm_init_tls_builtins ();
19052 if (TARGET_REALLY_IWMMXT)
19053 arm_init_iwmmxt_builtins ();
19055 if (TARGET_NEON)
19056 arm_init_neon_builtins ();
19058 if (arm_fp16_format)
19059 arm_init_fp16_builtins ();
19062 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19064 static const char *
19065 arm_invalid_parameter_type (const_tree t)
19067 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19068 return N_("function parameters cannot have __fp16 type");
19069 return NULL;
19072 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19074 static const char *
19075 arm_invalid_return_type (const_tree t)
19077 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19078 return N_("functions cannot return __fp16 type");
19079 return NULL;
19082 /* Implement TARGET_PROMOTED_TYPE. */
19084 static tree
19085 arm_promoted_type (const_tree t)
19087 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19088 return float_type_node;
19089 return NULL_TREE;
19092 /* Implement TARGET_CONVERT_TO_TYPE.
19093 Specifically, this hook implements the peculiarity of the ARM
19094 half-precision floating-point C semantics that requires conversions between
19095 __fp16 to or from double to do an intermediate conversion to float. */
19097 static tree
19098 arm_convert_to_type (tree type, tree expr)
19100 tree fromtype = TREE_TYPE (expr);
19101 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19102 return NULL_TREE;
19103 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19104 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19105 return convert (type, convert (float_type_node, expr));
19106 return NULL_TREE;
19109 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19110 This simply adds HFmode as a supported mode; even though we don't
19111 implement arithmetic on this type directly, it's supported by
19112 optabs conversions, much the way the double-word arithmetic is
19113 special-cased in the default hook. */
19115 static bool
19116 arm_scalar_mode_supported_p (enum machine_mode mode)
19118 if (mode == HFmode)
19119 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19120 else
19121 return default_scalar_mode_supported_p (mode);
19124 /* Errors in the source file can cause expand_expr to return const0_rtx
19125 where we expect a vector. To avoid crashing, use one of the vector
19126 clear instructions. */
19128 static rtx
19129 safe_vector_operand (rtx x, enum machine_mode mode)
19131 if (x != const0_rtx)
19132 return x;
19133 x = gen_reg_rtx (mode);
19135 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19136 : gen_rtx_SUBREG (DImode, x, 0)));
19137 return x;
19140 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19142 static rtx
19143 arm_expand_binop_builtin (enum insn_code icode,
19144 tree exp, rtx target)
19146 rtx pat;
19147 tree arg0 = CALL_EXPR_ARG (exp, 0);
19148 tree arg1 = CALL_EXPR_ARG (exp, 1);
19149 rtx op0 = expand_normal (arg0);
19150 rtx op1 = expand_normal (arg1);
19151 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19152 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19153 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19155 if (VECTOR_MODE_P (mode0))
19156 op0 = safe_vector_operand (op0, mode0);
19157 if (VECTOR_MODE_P (mode1))
19158 op1 = safe_vector_operand (op1, mode1);
19160 if (! target
19161 || GET_MODE (target) != tmode
19162 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19163 target = gen_reg_rtx (tmode);
19165 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19167 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19168 op0 = copy_to_mode_reg (mode0, op0);
19169 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19170 op1 = copy_to_mode_reg (mode1, op1);
19172 pat = GEN_FCN (icode) (target, op0, op1);
19173 if (! pat)
19174 return 0;
19175 emit_insn (pat);
19176 return target;
19179 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19181 static rtx
19182 arm_expand_unop_builtin (enum insn_code icode,
19183 tree exp, rtx target, int do_load)
19185 rtx pat;
19186 tree arg0 = CALL_EXPR_ARG (exp, 0);
19187 rtx op0 = expand_normal (arg0);
19188 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19189 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19191 if (! target
19192 || GET_MODE (target) != tmode
19193 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19194 target = gen_reg_rtx (tmode);
19195 if (do_load)
19196 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19197 else
19199 if (VECTOR_MODE_P (mode0))
19200 op0 = safe_vector_operand (op0, mode0);
19202 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19203 op0 = copy_to_mode_reg (mode0, op0);
19206 pat = GEN_FCN (icode) (target, op0);
19207 if (! pat)
19208 return 0;
19209 emit_insn (pat);
19210 return target;
19213 static int
19214 neon_builtin_compare (const void *a, const void *b)
19216 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19217 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19218 unsigned int soughtcode = key->base_fcode;
19220 if (soughtcode >= memb->base_fcode
19221 && soughtcode < memb->base_fcode + memb->num_vars)
19222 return 0;
19223 else if (soughtcode < memb->base_fcode)
19224 return -1;
19225 else
19226 return 1;
19229 static enum insn_code
19230 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19232 neon_builtin_datum key
19233 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19234 neon_builtin_datum *found;
19235 int idx;
19237 key.base_fcode = fcode;
19238 found = (neon_builtin_datum *)
19239 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19240 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19241 gcc_assert (found);
19242 idx = fcode - (int) found->base_fcode;
19243 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19245 if (itype)
19246 *itype = found->itype;
19248 return found->codes[idx];
19251 typedef enum {
19252 NEON_ARG_COPY_TO_REG,
19253 NEON_ARG_CONSTANT,
19254 NEON_ARG_STOP
19255 } builtin_arg;
19257 #define NEON_MAX_BUILTIN_ARGS 5
19259 /* Expand a Neon builtin. */
19260 static rtx
19261 arm_expand_neon_args (rtx target, int icode, int have_retval,
19262 tree exp, ...)
19264 va_list ap;
19265 rtx pat;
19266 tree arg[NEON_MAX_BUILTIN_ARGS];
19267 rtx op[NEON_MAX_BUILTIN_ARGS];
19268 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19269 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19270 int argc = 0;
19272 if (have_retval
19273 && (!target
19274 || GET_MODE (target) != tmode
19275 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19276 target = gen_reg_rtx (tmode);
19278 va_start (ap, exp);
19280 for (;;)
19282 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19284 if (thisarg == NEON_ARG_STOP)
19285 break;
19286 else
19288 arg[argc] = CALL_EXPR_ARG (exp, argc);
19289 op[argc] = expand_normal (arg[argc]);
19290 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19292 switch (thisarg)
19294 case NEON_ARG_COPY_TO_REG:
19295 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19296 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19297 (op[argc], mode[argc]))
19298 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19299 break;
19301 case NEON_ARG_CONSTANT:
19302 /* FIXME: This error message is somewhat unhelpful. */
19303 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19304 (op[argc], mode[argc]))
19305 error ("argument must be a constant");
19306 break;
19308 case NEON_ARG_STOP:
19309 gcc_unreachable ();
19312 argc++;
19316 va_end (ap);
19318 if (have_retval)
19319 switch (argc)
19321 case 1:
19322 pat = GEN_FCN (icode) (target, op[0]);
19323 break;
19325 case 2:
19326 pat = GEN_FCN (icode) (target, op[0], op[1]);
19327 break;
19329 case 3:
19330 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19331 break;
19333 case 4:
19334 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19335 break;
19337 case 5:
19338 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19339 break;
19341 default:
19342 gcc_unreachable ();
19344 else
19345 switch (argc)
19347 case 1:
19348 pat = GEN_FCN (icode) (op[0]);
19349 break;
19351 case 2:
19352 pat = GEN_FCN (icode) (op[0], op[1]);
19353 break;
19355 case 3:
19356 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19357 break;
19359 case 4:
19360 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19361 break;
19363 case 5:
19364 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19365 break;
19367 default:
19368 gcc_unreachable ();
19371 if (!pat)
19372 return 0;
19374 emit_insn (pat);
19376 return target;
19379 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19380 constants defined per-instruction or per instruction-variant. Instead, the
19381 required info is looked up in the table neon_builtin_data. */
19382 static rtx
19383 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19385 neon_itype itype;
19386 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19388 switch (itype)
19390 case NEON_UNOP:
19391 case NEON_CONVERT:
19392 case NEON_DUPLANE:
19393 return arm_expand_neon_args (target, icode, 1, exp,
19394 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19396 case NEON_BINOP:
19397 case NEON_SETLANE:
19398 case NEON_SCALARMUL:
19399 case NEON_SCALARMULL:
19400 case NEON_SCALARMULH:
19401 case NEON_SHIFTINSERT:
19402 case NEON_LOGICBINOP:
19403 return arm_expand_neon_args (target, icode, 1, exp,
19404 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19405 NEON_ARG_STOP);
19407 case NEON_TERNOP:
19408 return arm_expand_neon_args (target, icode, 1, exp,
19409 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19410 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19412 case NEON_GETLANE:
19413 case NEON_FIXCONV:
19414 case NEON_SHIFTIMM:
19415 return arm_expand_neon_args (target, icode, 1, exp,
19416 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19417 NEON_ARG_STOP);
19419 case NEON_CREATE:
19420 return arm_expand_neon_args (target, icode, 1, exp,
19421 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19423 case NEON_DUP:
19424 case NEON_SPLIT:
19425 case NEON_REINTERP:
19426 return arm_expand_neon_args (target, icode, 1, exp,
19427 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19429 case NEON_COMBINE:
19430 case NEON_VTBL:
19431 return arm_expand_neon_args (target, icode, 1, exp,
19432 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19434 case NEON_RESULTPAIR:
19435 return arm_expand_neon_args (target, icode, 0, exp,
19436 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19437 NEON_ARG_STOP);
19439 case NEON_LANEMUL:
19440 case NEON_LANEMULL:
19441 case NEON_LANEMULH:
19442 return arm_expand_neon_args (target, icode, 1, exp,
19443 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19444 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19446 case NEON_LANEMAC:
19447 return arm_expand_neon_args (target, icode, 1, exp,
19448 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19449 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19451 case NEON_SHIFTACC:
19452 return arm_expand_neon_args (target, icode, 1, exp,
19453 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19454 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19456 case NEON_SCALARMAC:
19457 return arm_expand_neon_args (target, icode, 1, exp,
19458 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19459 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19461 case NEON_SELECT:
19462 case NEON_VTBX:
19463 return arm_expand_neon_args (target, icode, 1, exp,
19464 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19465 NEON_ARG_STOP);
19467 case NEON_LOAD1:
19468 case NEON_LOADSTRUCT:
19469 return arm_expand_neon_args (target, icode, 1, exp,
19470 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19472 case NEON_LOAD1LANE:
19473 case NEON_LOADSTRUCTLANE:
19474 return arm_expand_neon_args (target, icode, 1, exp,
19475 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19476 NEON_ARG_STOP);
19478 case NEON_STORE1:
19479 case NEON_STORESTRUCT:
19480 return arm_expand_neon_args (target, icode, 0, exp,
19481 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19483 case NEON_STORE1LANE:
19484 case NEON_STORESTRUCTLANE:
19485 return arm_expand_neon_args (target, icode, 0, exp,
19486 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19487 NEON_ARG_STOP);
19490 gcc_unreachable ();
19493 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19494 void
19495 neon_reinterpret (rtx dest, rtx src)
19497 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19500 /* Emit code to place a Neon pair result in memory locations (with equal
19501 registers). */
19502 void
19503 neon_emit_pair_result_insn (enum machine_mode mode,
19504 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19505 rtx op1, rtx op2)
19507 rtx mem = gen_rtx_MEM (mode, destaddr);
19508 rtx tmp1 = gen_reg_rtx (mode);
19509 rtx tmp2 = gen_reg_rtx (mode);
19511 emit_insn (intfn (tmp1, op1, tmp2, op2));
19513 emit_move_insn (mem, tmp1);
19514 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19515 emit_move_insn (mem, tmp2);
19518 /* Set up operands for a register copy from src to dest, taking care not to
19519 clobber registers in the process.
19520 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19521 be called with a large N, so that should be OK. */
19523 void
19524 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19526 unsigned int copied = 0, opctr = 0;
19527 unsigned int done = (1 << count) - 1;
19528 unsigned int i, j;
19530 while (copied != done)
19532 for (i = 0; i < count; i++)
19534 int good = 1;
19536 for (j = 0; good && j < count; j++)
19537 if (i != j && (copied & (1 << j)) == 0
19538 && reg_overlap_mentioned_p (src[j], dest[i]))
19539 good = 0;
19541 if (good)
19543 operands[opctr++] = dest[i];
19544 operands[opctr++] = src[i];
19545 copied |= 1 << i;
19550 gcc_assert (opctr == count * 2);
19553 /* Expand an expression EXP that calls a built-in function,
19554 with result going to TARGET if that's convenient
19555 (and in mode MODE if that's convenient).
19556 SUBTARGET may be used as the target for computing one of EXP's operands.
19557 IGNORE is nonzero if the value is to be ignored. */
19559 static rtx
19560 arm_expand_builtin (tree exp,
19561 rtx target,
19562 rtx subtarget ATTRIBUTE_UNUSED,
19563 enum machine_mode mode ATTRIBUTE_UNUSED,
19564 int ignore ATTRIBUTE_UNUSED)
19566 const struct builtin_description * d;
19567 enum insn_code icode;
19568 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19569 tree arg0;
19570 tree arg1;
19571 tree arg2;
19572 rtx op0;
19573 rtx op1;
19574 rtx op2;
19575 rtx pat;
19576 int fcode = DECL_FUNCTION_CODE (fndecl);
19577 size_t i;
19578 enum machine_mode tmode;
19579 enum machine_mode mode0;
19580 enum machine_mode mode1;
19581 enum machine_mode mode2;
19583 if (fcode >= ARM_BUILTIN_NEON_BASE)
19584 return arm_expand_neon_builtin (fcode, exp, target);
19586 switch (fcode)
19588 case ARM_BUILTIN_TEXTRMSB:
19589 case ARM_BUILTIN_TEXTRMUB:
19590 case ARM_BUILTIN_TEXTRMSH:
19591 case ARM_BUILTIN_TEXTRMUH:
19592 case ARM_BUILTIN_TEXTRMSW:
19593 case ARM_BUILTIN_TEXTRMUW:
19594 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19595 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19596 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19597 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19598 : CODE_FOR_iwmmxt_textrmw);
19600 arg0 = CALL_EXPR_ARG (exp, 0);
19601 arg1 = CALL_EXPR_ARG (exp, 1);
19602 op0 = expand_normal (arg0);
19603 op1 = expand_normal (arg1);
19604 tmode = insn_data[icode].operand[0].mode;
19605 mode0 = insn_data[icode].operand[1].mode;
19606 mode1 = insn_data[icode].operand[2].mode;
19608 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19609 op0 = copy_to_mode_reg (mode0, op0);
19610 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19612 /* @@@ better error message */
19613 error ("selector must be an immediate");
19614 return gen_reg_rtx (tmode);
19616 if (target == 0
19617 || GET_MODE (target) != tmode
19618 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19619 target = gen_reg_rtx (tmode);
19620 pat = GEN_FCN (icode) (target, op0, op1);
19621 if (! pat)
19622 return 0;
19623 emit_insn (pat);
19624 return target;
19626 case ARM_BUILTIN_TINSRB:
19627 case ARM_BUILTIN_TINSRH:
19628 case ARM_BUILTIN_TINSRW:
19629 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19630 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19631 : CODE_FOR_iwmmxt_tinsrw);
19632 arg0 = CALL_EXPR_ARG (exp, 0);
19633 arg1 = CALL_EXPR_ARG (exp, 1);
19634 arg2 = CALL_EXPR_ARG (exp, 2);
19635 op0 = expand_normal (arg0);
19636 op1 = expand_normal (arg1);
19637 op2 = expand_normal (arg2);
19638 tmode = insn_data[icode].operand[0].mode;
19639 mode0 = insn_data[icode].operand[1].mode;
19640 mode1 = insn_data[icode].operand[2].mode;
19641 mode2 = insn_data[icode].operand[3].mode;
19643 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19644 op0 = copy_to_mode_reg (mode0, op0);
19645 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19646 op1 = copy_to_mode_reg (mode1, op1);
19647 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19649 /* @@@ better error message */
19650 error ("selector must be an immediate");
19651 return const0_rtx;
19653 if (target == 0
19654 || GET_MODE (target) != tmode
19655 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19656 target = gen_reg_rtx (tmode);
19657 pat = GEN_FCN (icode) (target, op0, op1, op2);
19658 if (! pat)
19659 return 0;
19660 emit_insn (pat);
19661 return target;
19663 case ARM_BUILTIN_SETWCX:
19664 arg0 = CALL_EXPR_ARG (exp, 0);
19665 arg1 = CALL_EXPR_ARG (exp, 1);
19666 op0 = force_reg (SImode, expand_normal (arg0));
19667 op1 = expand_normal (arg1);
19668 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19669 return 0;
19671 case ARM_BUILTIN_GETWCX:
19672 arg0 = CALL_EXPR_ARG (exp, 0);
19673 op0 = expand_normal (arg0);
19674 target = gen_reg_rtx (SImode);
19675 emit_insn (gen_iwmmxt_tmrc (target, op0));
19676 return target;
19678 case ARM_BUILTIN_WSHUFH:
19679 icode = CODE_FOR_iwmmxt_wshufh;
19680 arg0 = CALL_EXPR_ARG (exp, 0);
19681 arg1 = CALL_EXPR_ARG (exp, 1);
19682 op0 = expand_normal (arg0);
19683 op1 = expand_normal (arg1);
19684 tmode = insn_data[icode].operand[0].mode;
19685 mode1 = insn_data[icode].operand[1].mode;
19686 mode2 = insn_data[icode].operand[2].mode;
19688 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19689 op0 = copy_to_mode_reg (mode1, op0);
19690 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19692 /* @@@ better error message */
19693 error ("mask must be an immediate");
19694 return const0_rtx;
19696 if (target == 0
19697 || GET_MODE (target) != tmode
19698 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19699 target = gen_reg_rtx (tmode);
19700 pat = GEN_FCN (icode) (target, op0, op1);
19701 if (! pat)
19702 return 0;
19703 emit_insn (pat);
19704 return target;
19706 case ARM_BUILTIN_WSADB:
19707 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19708 case ARM_BUILTIN_WSADH:
19709 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19710 case ARM_BUILTIN_WSADBZ:
19711 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19712 case ARM_BUILTIN_WSADHZ:
19713 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19715 /* Several three-argument builtins. */
19716 case ARM_BUILTIN_WMACS:
19717 case ARM_BUILTIN_WMACU:
19718 case ARM_BUILTIN_WALIGN:
19719 case ARM_BUILTIN_TMIA:
19720 case ARM_BUILTIN_TMIAPH:
19721 case ARM_BUILTIN_TMIATT:
19722 case ARM_BUILTIN_TMIATB:
19723 case ARM_BUILTIN_TMIABT:
19724 case ARM_BUILTIN_TMIABB:
19725 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19726 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19727 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19728 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19729 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19730 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19731 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19732 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19733 : CODE_FOR_iwmmxt_walign);
19734 arg0 = CALL_EXPR_ARG (exp, 0);
19735 arg1 = CALL_EXPR_ARG (exp, 1);
19736 arg2 = CALL_EXPR_ARG (exp, 2);
19737 op0 = expand_normal (arg0);
19738 op1 = expand_normal (arg1);
19739 op2 = expand_normal (arg2);
19740 tmode = insn_data[icode].operand[0].mode;
19741 mode0 = insn_data[icode].operand[1].mode;
19742 mode1 = insn_data[icode].operand[2].mode;
19743 mode2 = insn_data[icode].operand[3].mode;
19745 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19746 op0 = copy_to_mode_reg (mode0, op0);
19747 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19748 op1 = copy_to_mode_reg (mode1, op1);
19749 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19750 op2 = copy_to_mode_reg (mode2, op2);
19751 if (target == 0
19752 || GET_MODE (target) != tmode
19753 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19754 target = gen_reg_rtx (tmode);
19755 pat = GEN_FCN (icode) (target, op0, op1, op2);
19756 if (! pat)
19757 return 0;
19758 emit_insn (pat);
19759 return target;
19761 case ARM_BUILTIN_WZERO:
19762 target = gen_reg_rtx (DImode);
19763 emit_insn (gen_iwmmxt_clrdi (target));
19764 return target;
19766 case ARM_BUILTIN_THREAD_POINTER:
19767 return arm_load_tp (target);
19769 default:
19770 break;
19773 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19774 if (d->code == (const enum arm_builtins) fcode)
19775 return arm_expand_binop_builtin (d->icode, exp, target);
19777 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19778 if (d->code == (const enum arm_builtins) fcode)
19779 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19781 /* @@@ Should really do something sensible here. */
19782 return NULL_RTX;
19785 /* Return the number (counting from 0) of
19786 the least significant set bit in MASK. */
19788 inline static int
19789 number_of_first_bit_set (unsigned mask)
19791 int bit;
19793 for (bit = 0;
19794 (mask & (1 << bit)) == 0;
19795 ++bit)
19796 continue;
19798 return bit;
19801 /* Emit code to push or pop registers to or from the stack. F is the
19802 assembly file. MASK is the registers to push or pop. PUSH is
19803 nonzero if we should push, and zero if we should pop. For debugging
19804 output, if pushing, adjust CFA_OFFSET by the amount of space added
19805 to the stack. REAL_REGS should have the same number of bits set as
19806 MASK, and will be used instead (in the same order) to describe which
19807 registers were saved - this is used to mark the save slots when we
19808 push high registers after moving them to low registers. */
19809 static void
19810 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19811 unsigned long real_regs)
19813 int regno;
19814 int lo_mask = mask & 0xFF;
19815 int pushed_words = 0;
19817 gcc_assert (mask);
19819 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19821 /* Special case. Do not generate a POP PC statement here, do it in
19822 thumb_exit() */
19823 thumb_exit (f, -1);
19824 return;
19827 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19829 fprintf (f, "\t.save\t{");
19830 for (regno = 0; regno < 15; regno++)
19832 if (real_regs & (1 << regno))
19834 if (real_regs & ((1 << regno) -1))
19835 fprintf (f, ", ");
19836 asm_fprintf (f, "%r", regno);
19839 fprintf (f, "}\n");
19842 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19844 /* Look at the low registers first. */
19845 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19847 if (lo_mask & 1)
19849 asm_fprintf (f, "%r", regno);
19851 if ((lo_mask & ~1) != 0)
19852 fprintf (f, ", ");
19854 pushed_words++;
19858 if (push && (mask & (1 << LR_REGNUM)))
19860 /* Catch pushing the LR. */
19861 if (mask & 0xFF)
19862 fprintf (f, ", ");
19864 asm_fprintf (f, "%r", LR_REGNUM);
19866 pushed_words++;
19868 else if (!push && (mask & (1 << PC_REGNUM)))
19870 /* Catch popping the PC. */
19871 if (TARGET_INTERWORK || TARGET_BACKTRACE
19872 || crtl->calls_eh_return)
19874 /* The PC is never poped directly, instead
19875 it is popped into r3 and then BX is used. */
19876 fprintf (f, "}\n");
19878 thumb_exit (f, -1);
19880 return;
19882 else
19884 if (mask & 0xFF)
19885 fprintf (f, ", ");
19887 asm_fprintf (f, "%r", PC_REGNUM);
19891 fprintf (f, "}\n");
19893 if (push && pushed_words && dwarf2out_do_frame ())
19895 char *l = dwarf2out_cfi_label (false);
19896 int pushed_mask = real_regs;
19898 *cfa_offset += pushed_words * 4;
19899 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19901 pushed_words = 0;
19902 pushed_mask = real_regs;
19903 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19905 if (pushed_mask & 1)
19906 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19911 /* Generate code to return from a thumb function.
19912 If 'reg_containing_return_addr' is -1, then the return address is
19913 actually on the stack, at the stack pointer. */
19914 static void
19915 thumb_exit (FILE *f, int reg_containing_return_addr)
19917 unsigned regs_available_for_popping;
19918 unsigned regs_to_pop;
19919 int pops_needed;
19920 unsigned available;
19921 unsigned required;
19922 int mode;
19923 int size;
19924 int restore_a4 = FALSE;
19926 /* Compute the registers we need to pop. */
19927 regs_to_pop = 0;
19928 pops_needed = 0;
19930 if (reg_containing_return_addr == -1)
19932 regs_to_pop |= 1 << LR_REGNUM;
19933 ++pops_needed;
19936 if (TARGET_BACKTRACE)
19938 /* Restore the (ARM) frame pointer and stack pointer. */
19939 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19940 pops_needed += 2;
19943 /* If there is nothing to pop then just emit the BX instruction and
19944 return. */
19945 if (pops_needed == 0)
19947 if (crtl->calls_eh_return)
19948 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19950 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19951 return;
19953 /* Otherwise if we are not supporting interworking and we have not created
19954 a backtrace structure and the function was not entered in ARM mode then
19955 just pop the return address straight into the PC. */
19956 else if (!TARGET_INTERWORK
19957 && !TARGET_BACKTRACE
19958 && !is_called_in_ARM_mode (current_function_decl)
19959 && !crtl->calls_eh_return)
19961 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19962 return;
19965 /* Find out how many of the (return) argument registers we can corrupt. */
19966 regs_available_for_popping = 0;
19968 /* If returning via __builtin_eh_return, the bottom three registers
19969 all contain information needed for the return. */
19970 if (crtl->calls_eh_return)
19971 size = 12;
19972 else
19974 /* If we can deduce the registers used from the function's
19975 return value. This is more reliable that examining
19976 df_regs_ever_live_p () because that will be set if the register is
19977 ever used in the function, not just if the register is used
19978 to hold a return value. */
19980 if (crtl->return_rtx != 0)
19981 mode = GET_MODE (crtl->return_rtx);
19982 else
19983 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19985 size = GET_MODE_SIZE (mode);
19987 if (size == 0)
19989 /* In a void function we can use any argument register.
19990 In a function that returns a structure on the stack
19991 we can use the second and third argument registers. */
19992 if (mode == VOIDmode)
19993 regs_available_for_popping =
19994 (1 << ARG_REGISTER (1))
19995 | (1 << ARG_REGISTER (2))
19996 | (1 << ARG_REGISTER (3));
19997 else
19998 regs_available_for_popping =
19999 (1 << ARG_REGISTER (2))
20000 | (1 << ARG_REGISTER (3));
20002 else if (size <= 4)
20003 regs_available_for_popping =
20004 (1 << ARG_REGISTER (2))
20005 | (1 << ARG_REGISTER (3));
20006 else if (size <= 8)
20007 regs_available_for_popping =
20008 (1 << ARG_REGISTER (3));
20011 /* Match registers to be popped with registers into which we pop them. */
20012 for (available = regs_available_for_popping,
20013 required = regs_to_pop;
20014 required != 0 && available != 0;
20015 available &= ~(available & - available),
20016 required &= ~(required & - required))
20017 -- pops_needed;
20019 /* If we have any popping registers left over, remove them. */
20020 if (available > 0)
20021 regs_available_for_popping &= ~available;
20023 /* Otherwise if we need another popping register we can use
20024 the fourth argument register. */
20025 else if (pops_needed)
20027 /* If we have not found any free argument registers and
20028 reg a4 contains the return address, we must move it. */
20029 if (regs_available_for_popping == 0
20030 && reg_containing_return_addr == LAST_ARG_REGNUM)
20032 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20033 reg_containing_return_addr = LR_REGNUM;
20035 else if (size > 12)
20037 /* Register a4 is being used to hold part of the return value,
20038 but we have dire need of a free, low register. */
20039 restore_a4 = TRUE;
20041 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20044 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20046 /* The fourth argument register is available. */
20047 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20049 --pops_needed;
20053 /* Pop as many registers as we can. */
20054 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20055 regs_available_for_popping);
20057 /* Process the registers we popped. */
20058 if (reg_containing_return_addr == -1)
20060 /* The return address was popped into the lowest numbered register. */
20061 regs_to_pop &= ~(1 << LR_REGNUM);
20063 reg_containing_return_addr =
20064 number_of_first_bit_set (regs_available_for_popping);
20066 /* Remove this register for the mask of available registers, so that
20067 the return address will not be corrupted by further pops. */
20068 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20071 /* If we popped other registers then handle them here. */
20072 if (regs_available_for_popping)
20074 int frame_pointer;
20076 /* Work out which register currently contains the frame pointer. */
20077 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20079 /* Move it into the correct place. */
20080 asm_fprintf (f, "\tmov\t%r, %r\n",
20081 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20083 /* (Temporarily) remove it from the mask of popped registers. */
20084 regs_available_for_popping &= ~(1 << frame_pointer);
20085 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20087 if (regs_available_for_popping)
20089 int stack_pointer;
20091 /* We popped the stack pointer as well,
20092 find the register that contains it. */
20093 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20095 /* Move it into the stack register. */
20096 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20098 /* At this point we have popped all necessary registers, so
20099 do not worry about restoring regs_available_for_popping
20100 to its correct value:
20102 assert (pops_needed == 0)
20103 assert (regs_available_for_popping == (1 << frame_pointer))
20104 assert (regs_to_pop == (1 << STACK_POINTER)) */
20106 else
20108 /* Since we have just move the popped value into the frame
20109 pointer, the popping register is available for reuse, and
20110 we know that we still have the stack pointer left to pop. */
20111 regs_available_for_popping |= (1 << frame_pointer);
20115 /* If we still have registers left on the stack, but we no longer have
20116 any registers into which we can pop them, then we must move the return
20117 address into the link register and make available the register that
20118 contained it. */
20119 if (regs_available_for_popping == 0 && pops_needed > 0)
20121 regs_available_for_popping |= 1 << reg_containing_return_addr;
20123 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20124 reg_containing_return_addr);
20126 reg_containing_return_addr = LR_REGNUM;
20129 /* If we have registers left on the stack then pop some more.
20130 We know that at most we will want to pop FP and SP. */
20131 if (pops_needed > 0)
20133 int popped_into;
20134 int move_to;
20136 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20137 regs_available_for_popping);
20139 /* We have popped either FP or SP.
20140 Move whichever one it is into the correct register. */
20141 popped_into = number_of_first_bit_set (regs_available_for_popping);
20142 move_to = number_of_first_bit_set (regs_to_pop);
20144 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20146 regs_to_pop &= ~(1 << move_to);
20148 --pops_needed;
20151 /* If we still have not popped everything then we must have only
20152 had one register available to us and we are now popping the SP. */
20153 if (pops_needed > 0)
20155 int popped_into;
20157 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20158 regs_available_for_popping);
20160 popped_into = number_of_first_bit_set (regs_available_for_popping);
20162 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20164 assert (regs_to_pop == (1 << STACK_POINTER))
20165 assert (pops_needed == 1)
20169 /* If necessary restore the a4 register. */
20170 if (restore_a4)
20172 if (reg_containing_return_addr != LR_REGNUM)
20174 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20175 reg_containing_return_addr = LR_REGNUM;
20178 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20181 if (crtl->calls_eh_return)
20182 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20184 /* Return to caller. */
20185 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20188 /* Scan INSN just before assembler is output for it.
20189 For Thumb-1, we track the status of the condition codes; this
20190 information is used in the cbranchsi4_insn pattern. */
20191 void
20192 thumb1_final_prescan_insn (rtx insn)
20194 if (flag_print_asm_name)
20195 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20196 INSN_ADDRESSES (INSN_UID (insn)));
20197 /* Don't overwrite the previous setter when we get to a cbranch. */
20198 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20200 enum attr_conds conds;
20202 if (cfun->machine->thumb1_cc_insn)
20204 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20205 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20206 CC_STATUS_INIT;
20208 conds = get_attr_conds (insn);
20209 if (conds == CONDS_SET)
20211 rtx set = single_set (insn);
20212 cfun->machine->thumb1_cc_insn = insn;
20213 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20214 cfun->machine->thumb1_cc_op1 = const0_rtx;
20215 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20216 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20218 rtx src1 = XEXP (SET_SRC (set), 1);
20219 if (src1 == const0_rtx)
20220 cfun->machine->thumb1_cc_mode = CCmode;
20223 else if (conds != CONDS_NOCOND)
20224 cfun->machine->thumb1_cc_insn = NULL_RTX;
20229 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20231 unsigned HOST_WIDE_INT mask = 0xff;
20232 int i;
20234 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20235 if (val == 0) /* XXX */
20236 return 0;
20238 for (i = 0; i < 25; i++)
20239 if ((val & (mask << i)) == val)
20240 return 1;
20242 return 0;
20245 /* Returns nonzero if the current function contains,
20246 or might contain a far jump. */
20247 static int
20248 thumb_far_jump_used_p (void)
20250 rtx insn;
20252 /* This test is only important for leaf functions. */
20253 /* assert (!leaf_function_p ()); */
20255 /* If we have already decided that far jumps may be used,
20256 do not bother checking again, and always return true even if
20257 it turns out that they are not being used. Once we have made
20258 the decision that far jumps are present (and that hence the link
20259 register will be pushed onto the stack) we cannot go back on it. */
20260 if (cfun->machine->far_jump_used)
20261 return 1;
20263 /* If this function is not being called from the prologue/epilogue
20264 generation code then it must be being called from the
20265 INITIAL_ELIMINATION_OFFSET macro. */
20266 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20268 /* In this case we know that we are being asked about the elimination
20269 of the arg pointer register. If that register is not being used,
20270 then there are no arguments on the stack, and we do not have to
20271 worry that a far jump might force the prologue to push the link
20272 register, changing the stack offsets. In this case we can just
20273 return false, since the presence of far jumps in the function will
20274 not affect stack offsets.
20276 If the arg pointer is live (or if it was live, but has now been
20277 eliminated and so set to dead) then we do have to test to see if
20278 the function might contain a far jump. This test can lead to some
20279 false negatives, since before reload is completed, then length of
20280 branch instructions is not known, so gcc defaults to returning their
20281 longest length, which in turn sets the far jump attribute to true.
20283 A false negative will not result in bad code being generated, but it
20284 will result in a needless push and pop of the link register. We
20285 hope that this does not occur too often.
20287 If we need doubleword stack alignment this could affect the other
20288 elimination offsets so we can't risk getting it wrong. */
20289 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20290 cfun->machine->arg_pointer_live = 1;
20291 else if (!cfun->machine->arg_pointer_live)
20292 return 0;
20295 /* Check to see if the function contains a branch
20296 insn with the far jump attribute set. */
20297 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20299 if (GET_CODE (insn) == JUMP_INSN
20300 /* Ignore tablejump patterns. */
20301 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20302 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20303 && get_attr_far_jump (insn) == FAR_JUMP_YES
20306 /* Record the fact that we have decided that
20307 the function does use far jumps. */
20308 cfun->machine->far_jump_used = 1;
20309 return 1;
20313 return 0;
20316 /* Return nonzero if FUNC must be entered in ARM mode. */
20318 is_called_in_ARM_mode (tree func)
20320 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20322 /* Ignore the problem about functions whose address is taken. */
20323 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20324 return TRUE;
20326 #ifdef ARM_PE
20327 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20328 #else
20329 return FALSE;
20330 #endif
20333 /* Given the stack offsets and register mask in OFFSETS, decide how
20334 many additional registers to push instead of subtracting a constant
20335 from SP. For epilogues the principle is the same except we use pop.
20336 FOR_PROLOGUE indicates which we're generating. */
20337 static int
20338 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20340 HOST_WIDE_INT amount;
20341 unsigned long live_regs_mask = offsets->saved_regs_mask;
20342 /* Extract a mask of the ones we can give to the Thumb's push/pop
20343 instruction. */
20344 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20345 /* Then count how many other high registers will need to be pushed. */
20346 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20347 int n_free, reg_base;
20349 if (!for_prologue && frame_pointer_needed)
20350 amount = offsets->locals_base - offsets->saved_regs;
20351 else
20352 amount = offsets->outgoing_args - offsets->saved_regs;
20354 /* If the stack frame size is 512 exactly, we can save one load
20355 instruction, which should make this a win even when optimizing
20356 for speed. */
20357 if (!optimize_size && amount != 512)
20358 return 0;
20360 /* Can't do this if there are high registers to push. */
20361 if (high_regs_pushed != 0)
20362 return 0;
20364 /* Shouldn't do it in the prologue if no registers would normally
20365 be pushed at all. In the epilogue, also allow it if we'll have
20366 a pop insn for the PC. */
20367 if (l_mask == 0
20368 && (for_prologue
20369 || TARGET_BACKTRACE
20370 || (live_regs_mask & 1 << LR_REGNUM) == 0
20371 || TARGET_INTERWORK
20372 || crtl->args.pretend_args_size != 0))
20373 return 0;
20375 /* Don't do this if thumb_expand_prologue wants to emit instructions
20376 between the push and the stack frame allocation. */
20377 if (for_prologue
20378 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20379 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20380 return 0;
20382 reg_base = 0;
20383 n_free = 0;
20384 if (!for_prologue)
20386 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20387 live_regs_mask >>= reg_base;
20390 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20391 && (for_prologue || call_used_regs[reg_base + n_free]))
20393 live_regs_mask >>= 1;
20394 n_free++;
20397 if (n_free == 0)
20398 return 0;
20399 gcc_assert (amount / 4 * 4 == amount);
20401 if (amount >= 512 && (amount - n_free * 4) < 512)
20402 return (amount - 508) / 4;
20403 if (amount <= n_free * 4)
20404 return amount / 4;
20405 return 0;
20408 /* The bits which aren't usefully expanded as rtl. */
20409 const char *
20410 thumb_unexpanded_epilogue (void)
20412 arm_stack_offsets *offsets;
20413 int regno;
20414 unsigned long live_regs_mask = 0;
20415 int high_regs_pushed = 0;
20416 int extra_pop;
20417 int had_to_push_lr;
20418 int size;
20420 if (cfun->machine->return_used_this_function != 0)
20421 return "";
20423 if (IS_NAKED (arm_current_func_type ()))
20424 return "";
20426 offsets = arm_get_frame_offsets ();
20427 live_regs_mask = offsets->saved_regs_mask;
20428 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20430 /* If we can deduce the registers used from the function's return value.
20431 This is more reliable that examining df_regs_ever_live_p () because that
20432 will be set if the register is ever used in the function, not just if
20433 the register is used to hold a return value. */
20434 size = arm_size_return_regs ();
20436 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20437 if (extra_pop > 0)
20439 unsigned long extra_mask = (1 << extra_pop) - 1;
20440 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20443 /* The prolog may have pushed some high registers to use as
20444 work registers. e.g. the testsuite file:
20445 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20446 compiles to produce:
20447 push {r4, r5, r6, r7, lr}
20448 mov r7, r9
20449 mov r6, r8
20450 push {r6, r7}
20451 as part of the prolog. We have to undo that pushing here. */
20453 if (high_regs_pushed)
20455 unsigned long mask = live_regs_mask & 0xff;
20456 int next_hi_reg;
20458 /* The available low registers depend on the size of the value we are
20459 returning. */
20460 if (size <= 12)
20461 mask |= 1 << 3;
20462 if (size <= 8)
20463 mask |= 1 << 2;
20465 if (mask == 0)
20466 /* Oh dear! We have no low registers into which we can pop
20467 high registers! */
20468 internal_error
20469 ("no low registers available for popping high registers");
20471 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20472 if (live_regs_mask & (1 << next_hi_reg))
20473 break;
20475 while (high_regs_pushed)
20477 /* Find lo register(s) into which the high register(s) can
20478 be popped. */
20479 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20481 if (mask & (1 << regno))
20482 high_regs_pushed--;
20483 if (high_regs_pushed == 0)
20484 break;
20487 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20489 /* Pop the values into the low register(s). */
20490 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20492 /* Move the value(s) into the high registers. */
20493 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20495 if (mask & (1 << regno))
20497 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20498 regno);
20500 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20501 if (live_regs_mask & (1 << next_hi_reg))
20502 break;
20506 live_regs_mask &= ~0x0f00;
20509 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20510 live_regs_mask &= 0xff;
20512 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20514 /* Pop the return address into the PC. */
20515 if (had_to_push_lr)
20516 live_regs_mask |= 1 << PC_REGNUM;
20518 /* Either no argument registers were pushed or a backtrace
20519 structure was created which includes an adjusted stack
20520 pointer, so just pop everything. */
20521 if (live_regs_mask)
20522 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20523 live_regs_mask);
20525 /* We have either just popped the return address into the
20526 PC or it is was kept in LR for the entire function.
20527 Note that thumb_pushpop has already called thumb_exit if the
20528 PC was in the list. */
20529 if (!had_to_push_lr)
20530 thumb_exit (asm_out_file, LR_REGNUM);
20532 else
20534 /* Pop everything but the return address. */
20535 if (live_regs_mask)
20536 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20537 live_regs_mask);
20539 if (had_to_push_lr)
20541 if (size > 12)
20543 /* We have no free low regs, so save one. */
20544 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20545 LAST_ARG_REGNUM);
20548 /* Get the return address into a temporary register. */
20549 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20550 1 << LAST_ARG_REGNUM);
20552 if (size > 12)
20554 /* Move the return address to lr. */
20555 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20556 LAST_ARG_REGNUM);
20557 /* Restore the low register. */
20558 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20559 IP_REGNUM);
20560 regno = LR_REGNUM;
20562 else
20563 regno = LAST_ARG_REGNUM;
20565 else
20566 regno = LR_REGNUM;
20568 /* Remove the argument registers that were pushed onto the stack. */
20569 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20570 SP_REGNUM, SP_REGNUM,
20571 crtl->args.pretend_args_size);
20573 thumb_exit (asm_out_file, regno);
20576 return "";
20579 /* Functions to save and restore machine-specific function data. */
20580 static struct machine_function *
20581 arm_init_machine_status (void)
20583 struct machine_function *machine;
20584 machine = ggc_alloc_cleared_machine_function ();
20586 #if ARM_FT_UNKNOWN != 0
20587 machine->func_type = ARM_FT_UNKNOWN;
20588 #endif
20589 return machine;
20592 /* Return an RTX indicating where the return address to the
20593 calling function can be found. */
20595 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20597 if (count != 0)
20598 return NULL_RTX;
20600 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20603 /* Do anything needed before RTL is emitted for each function. */
20604 void
20605 arm_init_expanders (void)
20607 /* Arrange to initialize and mark the machine per-function status. */
20608 init_machine_status = arm_init_machine_status;
20610 /* This is to stop the combine pass optimizing away the alignment
20611 adjustment of va_arg. */
20612 /* ??? It is claimed that this should not be necessary. */
20613 if (cfun)
20614 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20618 /* Like arm_compute_initial_elimination offset. Simpler because there
20619 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20620 to point at the base of the local variables after static stack
20621 space for a function has been allocated. */
20623 HOST_WIDE_INT
20624 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20626 arm_stack_offsets *offsets;
20628 offsets = arm_get_frame_offsets ();
20630 switch (from)
20632 case ARG_POINTER_REGNUM:
20633 switch (to)
20635 case STACK_POINTER_REGNUM:
20636 return offsets->outgoing_args - offsets->saved_args;
20638 case FRAME_POINTER_REGNUM:
20639 return offsets->soft_frame - offsets->saved_args;
20641 case ARM_HARD_FRAME_POINTER_REGNUM:
20642 return offsets->saved_regs - offsets->saved_args;
20644 case THUMB_HARD_FRAME_POINTER_REGNUM:
20645 return offsets->locals_base - offsets->saved_args;
20647 default:
20648 gcc_unreachable ();
20650 break;
20652 case FRAME_POINTER_REGNUM:
20653 switch (to)
20655 case STACK_POINTER_REGNUM:
20656 return offsets->outgoing_args - offsets->soft_frame;
20658 case ARM_HARD_FRAME_POINTER_REGNUM:
20659 return offsets->saved_regs - offsets->soft_frame;
20661 case THUMB_HARD_FRAME_POINTER_REGNUM:
20662 return offsets->locals_base - offsets->soft_frame;
20664 default:
20665 gcc_unreachable ();
20667 break;
20669 default:
20670 gcc_unreachable ();
20674 /* Generate the rest of a function's prologue. */
20675 void
20676 thumb1_expand_prologue (void)
20678 rtx insn, dwarf;
20680 HOST_WIDE_INT amount;
20681 arm_stack_offsets *offsets;
20682 unsigned long func_type;
20683 int regno;
20684 unsigned long live_regs_mask;
20686 func_type = arm_current_func_type ();
20688 /* Naked functions don't have prologues. */
20689 if (IS_NAKED (func_type))
20690 return;
20692 if (IS_INTERRUPT (func_type))
20694 error ("interrupt Service Routines cannot be coded in Thumb mode");
20695 return;
20698 offsets = arm_get_frame_offsets ();
20699 live_regs_mask = offsets->saved_regs_mask;
20700 /* Load the pic register before setting the frame pointer,
20701 so we can use r7 as a temporary work register. */
20702 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20703 arm_load_pic_register (live_regs_mask);
20705 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20706 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20707 stack_pointer_rtx);
20709 if (flag_stack_usage)
20710 current_function_static_stack_size
20711 = offsets->outgoing_args - offsets->saved_args;
20713 amount = offsets->outgoing_args - offsets->saved_regs;
20714 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20715 if (amount)
20717 if (amount < 512)
20719 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20720 GEN_INT (- amount)));
20721 RTX_FRAME_RELATED_P (insn) = 1;
20723 else
20725 rtx reg;
20727 /* The stack decrement is too big for an immediate value in a single
20728 insn. In theory we could issue multiple subtracts, but after
20729 three of them it becomes more space efficient to place the full
20730 value in the constant pool and load into a register. (Also the
20731 ARM debugger really likes to see only one stack decrement per
20732 function). So instead we look for a scratch register into which
20733 we can load the decrement, and then we subtract this from the
20734 stack pointer. Unfortunately on the thumb the only available
20735 scratch registers are the argument registers, and we cannot use
20736 these as they may hold arguments to the function. Instead we
20737 attempt to locate a call preserved register which is used by this
20738 function. If we can find one, then we know that it will have
20739 been pushed at the start of the prologue and so we can corrupt
20740 it now. */
20741 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20742 if (live_regs_mask & (1 << regno))
20743 break;
20745 gcc_assert(regno <= LAST_LO_REGNUM);
20747 reg = gen_rtx_REG (SImode, regno);
20749 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20751 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20752 stack_pointer_rtx, reg));
20753 RTX_FRAME_RELATED_P (insn) = 1;
20754 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20755 plus_constant (stack_pointer_rtx,
20756 -amount));
20757 RTX_FRAME_RELATED_P (dwarf) = 1;
20758 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20762 if (frame_pointer_needed)
20763 thumb_set_frame_pointer (offsets);
20765 /* If we are profiling, make sure no instructions are scheduled before
20766 the call to mcount. Similarly if the user has requested no
20767 scheduling in the prolog. Similarly if we want non-call exceptions
20768 using the EABI unwinder, to prevent faulting instructions from being
20769 swapped with a stack adjustment. */
20770 if (crtl->profile || !TARGET_SCHED_PROLOG
20771 || (arm_except_unwind_info (&global_options) == UI_TARGET
20772 && cfun->can_throw_non_call_exceptions))
20773 emit_insn (gen_blockage ());
20775 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20776 if (live_regs_mask & 0xff)
20777 cfun->machine->lr_save_eliminated = 0;
20781 void
20782 thumb1_expand_epilogue (void)
20784 HOST_WIDE_INT amount;
20785 arm_stack_offsets *offsets;
20786 int regno;
20788 /* Naked functions don't have prologues. */
20789 if (IS_NAKED (arm_current_func_type ()))
20790 return;
20792 offsets = arm_get_frame_offsets ();
20793 amount = offsets->outgoing_args - offsets->saved_regs;
20795 if (frame_pointer_needed)
20797 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20798 amount = offsets->locals_base - offsets->saved_regs;
20800 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20802 gcc_assert (amount >= 0);
20803 if (amount)
20805 if (amount < 512)
20806 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20807 GEN_INT (amount)));
20808 else
20810 /* r3 is always free in the epilogue. */
20811 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20813 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20814 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20818 /* Emit a USE (stack_pointer_rtx), so that
20819 the stack adjustment will not be deleted. */
20820 emit_insn (gen_prologue_use (stack_pointer_rtx));
20822 if (crtl->profile || !TARGET_SCHED_PROLOG)
20823 emit_insn (gen_blockage ());
20825 /* Emit a clobber for each insn that will be restored in the epilogue,
20826 so that flow2 will get register lifetimes correct. */
20827 for (regno = 0; regno < 13; regno++)
20828 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20829 emit_clobber (gen_rtx_REG (SImode, regno));
20831 if (! df_regs_ever_live_p (LR_REGNUM))
20832 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20835 static void
20836 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20838 arm_stack_offsets *offsets;
20839 unsigned long live_regs_mask = 0;
20840 unsigned long l_mask;
20841 unsigned high_regs_pushed = 0;
20842 int cfa_offset = 0;
20843 int regno;
20845 if (IS_NAKED (arm_current_func_type ()))
20846 return;
20848 if (is_called_in_ARM_mode (current_function_decl))
20850 const char * name;
20852 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20853 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20854 == SYMBOL_REF);
20855 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20857 /* Generate code sequence to switch us into Thumb mode. */
20858 /* The .code 32 directive has already been emitted by
20859 ASM_DECLARE_FUNCTION_NAME. */
20860 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20861 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20863 /* Generate a label, so that the debugger will notice the
20864 change in instruction sets. This label is also used by
20865 the assembler to bypass the ARM code when this function
20866 is called from a Thumb encoded function elsewhere in the
20867 same file. Hence the definition of STUB_NAME here must
20868 agree with the definition in gas/config/tc-arm.c. */
20870 #define STUB_NAME ".real_start_of"
20872 fprintf (f, "\t.code\t16\n");
20873 #ifdef ARM_PE
20874 if (arm_dllexport_name_p (name))
20875 name = arm_strip_name_encoding (name);
20876 #endif
20877 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20878 fprintf (f, "\t.thumb_func\n");
20879 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20882 if (crtl->args.pretend_args_size)
20884 /* Output unwind directive for the stack adjustment. */
20885 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20886 fprintf (f, "\t.pad #%d\n",
20887 crtl->args.pretend_args_size);
20889 if (cfun->machine->uses_anonymous_args)
20891 int num_pushes;
20893 fprintf (f, "\tpush\t{");
20895 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20897 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20898 regno <= LAST_ARG_REGNUM;
20899 regno++)
20900 asm_fprintf (f, "%r%s", regno,
20901 regno == LAST_ARG_REGNUM ? "" : ", ");
20903 fprintf (f, "}\n");
20905 else
20906 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20907 SP_REGNUM, SP_REGNUM,
20908 crtl->args.pretend_args_size);
20910 /* We don't need to record the stores for unwinding (would it
20911 help the debugger any if we did?), but record the change in
20912 the stack pointer. */
20913 if (dwarf2out_do_frame ())
20915 char *l = dwarf2out_cfi_label (false);
20917 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20918 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20922 /* Get the registers we are going to push. */
20923 offsets = arm_get_frame_offsets ();
20924 live_regs_mask = offsets->saved_regs_mask;
20925 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20926 l_mask = live_regs_mask & 0x40ff;
20927 /* Then count how many other high registers will need to be pushed. */
20928 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20930 if (TARGET_BACKTRACE)
20932 unsigned offset;
20933 unsigned work_register;
20935 /* We have been asked to create a stack backtrace structure.
20936 The code looks like this:
20938 0 .align 2
20939 0 func:
20940 0 sub SP, #16 Reserve space for 4 registers.
20941 2 push {R7} Push low registers.
20942 4 add R7, SP, #20 Get the stack pointer before the push.
20943 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20944 8 mov R7, PC Get hold of the start of this code plus 12.
20945 10 str R7, [SP, #16] Store it.
20946 12 mov R7, FP Get hold of the current frame pointer.
20947 14 str R7, [SP, #4] Store it.
20948 16 mov R7, LR Get hold of the current return address.
20949 18 str R7, [SP, #12] Store it.
20950 20 add R7, SP, #16 Point at the start of the backtrace structure.
20951 22 mov FP, R7 Put this value into the frame pointer. */
20953 work_register = thumb_find_work_register (live_regs_mask);
20955 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20956 asm_fprintf (f, "\t.pad #16\n");
20958 asm_fprintf
20959 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20960 SP_REGNUM, SP_REGNUM);
20962 if (dwarf2out_do_frame ())
20964 char *l = dwarf2out_cfi_label (false);
20966 cfa_offset = cfa_offset + 16;
20967 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20970 if (l_mask)
20972 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20973 offset = bit_count (l_mask) * UNITS_PER_WORD;
20975 else
20976 offset = 0;
20978 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20979 offset + 16 + crtl->args.pretend_args_size);
20981 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20982 offset + 4);
20984 /* Make sure that the instruction fetching the PC is in the right place
20985 to calculate "start of backtrace creation code + 12". */
20986 if (l_mask)
20988 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20989 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20990 offset + 12);
20991 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20992 ARM_HARD_FRAME_POINTER_REGNUM);
20993 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20994 offset);
20996 else
20998 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20999 ARM_HARD_FRAME_POINTER_REGNUM);
21000 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21001 offset);
21002 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21003 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21004 offset + 12);
21007 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21008 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21009 offset + 8);
21010 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21011 offset + 12);
21012 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21013 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21015 /* Optimization: If we are not pushing any low registers but we are going
21016 to push some high registers then delay our first push. This will just
21017 be a push of LR and we can combine it with the push of the first high
21018 register. */
21019 else if ((l_mask & 0xff) != 0
21020 || (high_regs_pushed == 0 && l_mask))
21022 unsigned long mask = l_mask;
21023 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21024 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21027 if (high_regs_pushed)
21029 unsigned pushable_regs;
21030 unsigned next_hi_reg;
21032 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21033 if (live_regs_mask & (1 << next_hi_reg))
21034 break;
21036 pushable_regs = l_mask & 0xff;
21038 if (pushable_regs == 0)
21039 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21041 while (high_regs_pushed > 0)
21043 unsigned long real_regs_mask = 0;
21045 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21047 if (pushable_regs & (1 << regno))
21049 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21051 high_regs_pushed --;
21052 real_regs_mask |= (1 << next_hi_reg);
21054 if (high_regs_pushed)
21056 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21057 next_hi_reg --)
21058 if (live_regs_mask & (1 << next_hi_reg))
21059 break;
21061 else
21063 pushable_regs &= ~((1 << regno) - 1);
21064 break;
21069 /* If we had to find a work register and we have not yet
21070 saved the LR then add it to the list of regs to push. */
21071 if (l_mask == (1 << LR_REGNUM))
21073 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21074 1, &cfa_offset,
21075 real_regs_mask | (1 << LR_REGNUM));
21076 l_mask = 0;
21078 else
21079 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21084 /* Handle the case of a double word load into a low register from
21085 a computed memory address. The computed address may involve a
21086 register which is overwritten by the load. */
21087 const char *
21088 thumb_load_double_from_address (rtx *operands)
21090 rtx addr;
21091 rtx base;
21092 rtx offset;
21093 rtx arg1;
21094 rtx arg2;
21096 gcc_assert (GET_CODE (operands[0]) == REG);
21097 gcc_assert (GET_CODE (operands[1]) == MEM);
21099 /* Get the memory address. */
21100 addr = XEXP (operands[1], 0);
21102 /* Work out how the memory address is computed. */
21103 switch (GET_CODE (addr))
21105 case REG:
21106 operands[2] = adjust_address (operands[1], SImode, 4);
21108 if (REGNO (operands[0]) == REGNO (addr))
21110 output_asm_insn ("ldr\t%H0, %2", operands);
21111 output_asm_insn ("ldr\t%0, %1", operands);
21113 else
21115 output_asm_insn ("ldr\t%0, %1", operands);
21116 output_asm_insn ("ldr\t%H0, %2", operands);
21118 break;
21120 case CONST:
21121 /* Compute <address> + 4 for the high order load. */
21122 operands[2] = adjust_address (operands[1], SImode, 4);
21124 output_asm_insn ("ldr\t%0, %1", operands);
21125 output_asm_insn ("ldr\t%H0, %2", operands);
21126 break;
21128 case PLUS:
21129 arg1 = XEXP (addr, 0);
21130 arg2 = XEXP (addr, 1);
21132 if (CONSTANT_P (arg1))
21133 base = arg2, offset = arg1;
21134 else
21135 base = arg1, offset = arg2;
21137 gcc_assert (GET_CODE (base) == REG);
21139 /* Catch the case of <address> = <reg> + <reg> */
21140 if (GET_CODE (offset) == REG)
21142 int reg_offset = REGNO (offset);
21143 int reg_base = REGNO (base);
21144 int reg_dest = REGNO (operands[0]);
21146 /* Add the base and offset registers together into the
21147 higher destination register. */
21148 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21149 reg_dest + 1, reg_base, reg_offset);
21151 /* Load the lower destination register from the address in
21152 the higher destination register. */
21153 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21154 reg_dest, reg_dest + 1);
21156 /* Load the higher destination register from its own address
21157 plus 4. */
21158 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21159 reg_dest + 1, reg_dest + 1);
21161 else
21163 /* Compute <address> + 4 for the high order load. */
21164 operands[2] = adjust_address (operands[1], SImode, 4);
21166 /* If the computed address is held in the low order register
21167 then load the high order register first, otherwise always
21168 load the low order register first. */
21169 if (REGNO (operands[0]) == REGNO (base))
21171 output_asm_insn ("ldr\t%H0, %2", operands);
21172 output_asm_insn ("ldr\t%0, %1", operands);
21174 else
21176 output_asm_insn ("ldr\t%0, %1", operands);
21177 output_asm_insn ("ldr\t%H0, %2", operands);
21180 break;
21182 case LABEL_REF:
21183 /* With no registers to worry about we can just load the value
21184 directly. */
21185 operands[2] = adjust_address (operands[1], SImode, 4);
21187 output_asm_insn ("ldr\t%H0, %2", operands);
21188 output_asm_insn ("ldr\t%0, %1", operands);
21189 break;
21191 default:
21192 gcc_unreachable ();
21195 return "";
21198 const char *
21199 thumb_output_move_mem_multiple (int n, rtx *operands)
21201 rtx tmp;
21203 switch (n)
21205 case 2:
21206 if (REGNO (operands[4]) > REGNO (operands[5]))
21208 tmp = operands[4];
21209 operands[4] = operands[5];
21210 operands[5] = tmp;
21212 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21213 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21214 break;
21216 case 3:
21217 if (REGNO (operands[4]) > REGNO (operands[5]))
21219 tmp = operands[4];
21220 operands[4] = operands[5];
21221 operands[5] = tmp;
21223 if (REGNO (operands[5]) > REGNO (operands[6]))
21225 tmp = operands[5];
21226 operands[5] = operands[6];
21227 operands[6] = tmp;
21229 if (REGNO (operands[4]) > REGNO (operands[5]))
21231 tmp = operands[4];
21232 operands[4] = operands[5];
21233 operands[5] = tmp;
21236 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21237 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21238 break;
21240 default:
21241 gcc_unreachable ();
21244 return "";
21247 /* Output a call-via instruction for thumb state. */
21248 const char *
21249 thumb_call_via_reg (rtx reg)
21251 int regno = REGNO (reg);
21252 rtx *labelp;
21254 gcc_assert (regno < LR_REGNUM);
21256 /* If we are in the normal text section we can use a single instance
21257 per compilation unit. If we are doing function sections, then we need
21258 an entry per section, since we can't rely on reachability. */
21259 if (in_section == text_section)
21261 thumb_call_reg_needed = 1;
21263 if (thumb_call_via_label[regno] == NULL)
21264 thumb_call_via_label[regno] = gen_label_rtx ();
21265 labelp = thumb_call_via_label + regno;
21267 else
21269 if (cfun->machine->call_via[regno] == NULL)
21270 cfun->machine->call_via[regno] = gen_label_rtx ();
21271 labelp = cfun->machine->call_via + regno;
21274 output_asm_insn ("bl\t%a0", labelp);
21275 return "";
21278 /* Routines for generating rtl. */
21279 void
21280 thumb_expand_movmemqi (rtx *operands)
21282 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21283 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21284 HOST_WIDE_INT len = INTVAL (operands[2]);
21285 HOST_WIDE_INT offset = 0;
21287 while (len >= 12)
21289 emit_insn (gen_movmem12b (out, in, out, in));
21290 len -= 12;
21293 if (len >= 8)
21295 emit_insn (gen_movmem8b (out, in, out, in));
21296 len -= 8;
21299 if (len >= 4)
21301 rtx reg = gen_reg_rtx (SImode);
21302 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21303 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21304 len -= 4;
21305 offset += 4;
21308 if (len >= 2)
21310 rtx reg = gen_reg_rtx (HImode);
21311 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21312 plus_constant (in, offset))));
21313 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21314 reg));
21315 len -= 2;
21316 offset += 2;
21319 if (len)
21321 rtx reg = gen_reg_rtx (QImode);
21322 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21323 plus_constant (in, offset))));
21324 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21325 reg));
21329 void
21330 thumb_reload_out_hi (rtx *operands)
21332 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21335 /* Handle reading a half-word from memory during reload. */
21336 void
21337 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21339 gcc_unreachable ();
21342 /* Return the length of a function name prefix
21343 that starts with the character 'c'. */
21344 static int
21345 arm_get_strip_length (int c)
21347 switch (c)
21349 ARM_NAME_ENCODING_LENGTHS
21350 default: return 0;
21354 /* Return a pointer to a function's name with any
21355 and all prefix encodings stripped from it. */
21356 const char *
21357 arm_strip_name_encoding (const char *name)
21359 int skip;
21361 while ((skip = arm_get_strip_length (* name)))
21362 name += skip;
21364 return name;
21367 /* If there is a '*' anywhere in the name's prefix, then
21368 emit the stripped name verbatim, otherwise prepend an
21369 underscore if leading underscores are being used. */
21370 void
21371 arm_asm_output_labelref (FILE *stream, const char *name)
21373 int skip;
21374 int verbatim = 0;
21376 while ((skip = arm_get_strip_length (* name)))
21378 verbatim |= (*name == '*');
21379 name += skip;
21382 if (verbatim)
21383 fputs (name, stream);
21384 else
21385 asm_fprintf (stream, "%U%s", name);
21388 static void
21389 arm_file_start (void)
21391 int val;
21393 if (TARGET_UNIFIED_ASM)
21394 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21396 if (TARGET_BPABI)
21398 const char *fpu_name;
21399 if (arm_selected_arch)
21400 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21401 else
21402 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21404 if (TARGET_SOFT_FLOAT)
21406 if (TARGET_VFP)
21407 fpu_name = "softvfp";
21408 else
21409 fpu_name = "softfpa";
21411 else
21413 fpu_name = arm_fpu_desc->name;
21414 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21416 if (TARGET_HARD_FLOAT)
21417 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21418 if (TARGET_HARD_FLOAT_ABI)
21419 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21422 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21424 /* Some of these attributes only apply when the corresponding features
21425 are used. However we don't have any easy way of figuring this out.
21426 Conservatively record the setting that would have been used. */
21428 /* Tag_ABI_FP_rounding. */
21429 if (flag_rounding_math)
21430 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21431 if (!flag_unsafe_math_optimizations)
21433 /* Tag_ABI_FP_denomal. */
21434 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21435 /* Tag_ABI_FP_exceptions. */
21436 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21438 /* Tag_ABI_FP_user_exceptions. */
21439 if (flag_signaling_nans)
21440 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21441 /* Tag_ABI_FP_number_model. */
21442 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21443 flag_finite_math_only ? 1 : 3);
21445 /* Tag_ABI_align8_needed. */
21446 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21447 /* Tag_ABI_align8_preserved. */
21448 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21449 /* Tag_ABI_enum_size. */
21450 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21451 flag_short_enums ? 1 : 2);
21453 /* Tag_ABI_optimization_goals. */
21454 if (optimize_size)
21455 val = 4;
21456 else if (optimize >= 2)
21457 val = 2;
21458 else if (optimize)
21459 val = 1;
21460 else
21461 val = 6;
21462 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21464 /* Tag_ABI_FP_16bit_format. */
21465 if (arm_fp16_format)
21466 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21467 (int)arm_fp16_format);
21469 if (arm_lang_output_object_attributes_hook)
21470 arm_lang_output_object_attributes_hook();
21472 default_file_start();
21475 static void
21476 arm_file_end (void)
21478 int regno;
21480 if (NEED_INDICATE_EXEC_STACK)
21481 /* Add .note.GNU-stack. */
21482 file_end_indicate_exec_stack ();
21484 if (! thumb_call_reg_needed)
21485 return;
21487 switch_to_section (text_section);
21488 asm_fprintf (asm_out_file, "\t.code 16\n");
21489 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21491 for (regno = 0; regno < LR_REGNUM; regno++)
21493 rtx label = thumb_call_via_label[regno];
21495 if (label != 0)
21497 targetm.asm_out.internal_label (asm_out_file, "L",
21498 CODE_LABEL_NUMBER (label));
21499 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21504 #ifndef ARM_PE
21505 /* Symbols in the text segment can be accessed without indirecting via the
21506 constant pool; it may take an extra binary operation, but this is still
21507 faster than indirecting via memory. Don't do this when not optimizing,
21508 since we won't be calculating al of the offsets necessary to do this
21509 simplification. */
21511 static void
21512 arm_encode_section_info (tree decl, rtx rtl, int first)
21514 if (optimize > 0 && TREE_CONSTANT (decl))
21515 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21517 default_encode_section_info (decl, rtl, first);
21519 #endif /* !ARM_PE */
21521 static void
21522 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21524 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21525 && !strcmp (prefix, "L"))
21527 arm_ccfsm_state = 0;
21528 arm_target_insn = NULL;
21530 default_internal_label (stream, prefix, labelno);
21533 /* Output code to add DELTA to the first argument, and then jump
21534 to FUNCTION. Used for C++ multiple inheritance. */
21535 static void
21536 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21537 HOST_WIDE_INT delta,
21538 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21539 tree function)
21541 static int thunk_label = 0;
21542 char label[256];
21543 char labelpc[256];
21544 int mi_delta = delta;
21545 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21546 int shift = 0;
21547 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21548 ? 1 : 0);
21549 if (mi_delta < 0)
21550 mi_delta = - mi_delta;
21552 if (TARGET_THUMB1)
21554 int labelno = thunk_label++;
21555 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21556 /* Thunks are entered in arm mode when avaiable. */
21557 if (TARGET_THUMB1_ONLY)
21559 /* push r3 so we can use it as a temporary. */
21560 /* TODO: Omit this save if r3 is not used. */
21561 fputs ("\tpush {r3}\n", file);
21562 fputs ("\tldr\tr3, ", file);
21564 else
21566 fputs ("\tldr\tr12, ", file);
21568 assemble_name (file, label);
21569 fputc ('\n', file);
21570 if (flag_pic)
21572 /* If we are generating PIC, the ldr instruction below loads
21573 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21574 the address of the add + 8, so we have:
21576 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21577 = target + 1.
21579 Note that we have "+ 1" because some versions of GNU ld
21580 don't set the low bit of the result for R_ARM_REL32
21581 relocations against thumb function symbols.
21582 On ARMv6M this is +4, not +8. */
21583 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21584 assemble_name (file, labelpc);
21585 fputs (":\n", file);
21586 if (TARGET_THUMB1_ONLY)
21588 /* This is 2 insns after the start of the thunk, so we know it
21589 is 4-byte aligned. */
21590 fputs ("\tadd\tr3, pc, r3\n", file);
21591 fputs ("\tmov r12, r3\n", file);
21593 else
21594 fputs ("\tadd\tr12, pc, r12\n", file);
21596 else if (TARGET_THUMB1_ONLY)
21597 fputs ("\tmov r12, r3\n", file);
21599 if (TARGET_THUMB1_ONLY)
21601 if (mi_delta > 255)
21603 fputs ("\tldr\tr3, ", file);
21604 assemble_name (file, label);
21605 fputs ("+4\n", file);
21606 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21607 mi_op, this_regno, this_regno);
21609 else if (mi_delta != 0)
21611 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21612 mi_op, this_regno, this_regno,
21613 mi_delta);
21616 else
21618 /* TODO: Use movw/movt for large constants when available. */
21619 while (mi_delta != 0)
21621 if ((mi_delta & (3 << shift)) == 0)
21622 shift += 2;
21623 else
21625 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21626 mi_op, this_regno, this_regno,
21627 mi_delta & (0xff << shift));
21628 mi_delta &= ~(0xff << shift);
21629 shift += 8;
21633 if (TARGET_THUMB1)
21635 if (TARGET_THUMB1_ONLY)
21636 fputs ("\tpop\t{r3}\n", file);
21638 fprintf (file, "\tbx\tr12\n");
21639 ASM_OUTPUT_ALIGN (file, 2);
21640 assemble_name (file, label);
21641 fputs (":\n", file);
21642 if (flag_pic)
21644 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21645 rtx tem = XEXP (DECL_RTL (function), 0);
21646 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21647 tem = gen_rtx_MINUS (GET_MODE (tem),
21648 tem,
21649 gen_rtx_SYMBOL_REF (Pmode,
21650 ggc_strdup (labelpc)));
21651 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21653 else
21654 /* Output ".word .LTHUNKn". */
21655 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21657 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21658 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21660 else
21662 fputs ("\tb\t", file);
21663 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21664 if (NEED_PLT_RELOC)
21665 fputs ("(PLT)", file);
21666 fputc ('\n', file);
21671 arm_emit_vector_const (FILE *file, rtx x)
21673 int i;
21674 const char * pattern;
21676 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21678 switch (GET_MODE (x))
21680 case V2SImode: pattern = "%08x"; break;
21681 case V4HImode: pattern = "%04x"; break;
21682 case V8QImode: pattern = "%02x"; break;
21683 default: gcc_unreachable ();
21686 fprintf (file, "0x");
21687 for (i = CONST_VECTOR_NUNITS (x); i--;)
21689 rtx element;
21691 element = CONST_VECTOR_ELT (x, i);
21692 fprintf (file, pattern, INTVAL (element));
21695 return 1;
21698 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21699 HFmode constant pool entries are actually loaded with ldr. */
21700 void
21701 arm_emit_fp16_const (rtx c)
21703 REAL_VALUE_TYPE r;
21704 long bits;
21706 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21707 bits = real_to_target (NULL, &r, HFmode);
21708 if (WORDS_BIG_ENDIAN)
21709 assemble_zeros (2);
21710 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21711 if (!WORDS_BIG_ENDIAN)
21712 assemble_zeros (2);
21715 const char *
21716 arm_output_load_gr (rtx *operands)
21718 rtx reg;
21719 rtx offset;
21720 rtx wcgr;
21721 rtx sum;
21723 if (GET_CODE (operands [1]) != MEM
21724 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21725 || GET_CODE (reg = XEXP (sum, 0)) != REG
21726 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21727 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21728 return "wldrw%?\t%0, %1";
21730 /* Fix up an out-of-range load of a GR register. */
21731 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21732 wcgr = operands[0];
21733 operands[0] = reg;
21734 output_asm_insn ("ldr%?\t%0, %1", operands);
21736 operands[0] = wcgr;
21737 operands[1] = reg;
21738 output_asm_insn ("tmcr%?\t%0, %1", operands);
21739 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21741 return "";
21744 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21746 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21747 named arg and all anonymous args onto the stack.
21748 XXX I know the prologue shouldn't be pushing registers, but it is faster
21749 that way. */
21751 static void
21752 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21753 enum machine_mode mode,
21754 tree type,
21755 int *pretend_size,
21756 int second_time ATTRIBUTE_UNUSED)
21758 int nregs;
21760 cfun->machine->uses_anonymous_args = 1;
21761 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21763 nregs = pcum->aapcs_ncrn;
21764 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21765 nregs++;
21767 else
21768 nregs = pcum->nregs;
21770 if (nregs < NUM_ARG_REGS)
21771 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21774 /* Return nonzero if the CONSUMER instruction (a store) does not need
21775 PRODUCER's value to calculate the address. */
21778 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21780 rtx value = PATTERN (producer);
21781 rtx addr = PATTERN (consumer);
21783 if (GET_CODE (value) == COND_EXEC)
21784 value = COND_EXEC_CODE (value);
21785 if (GET_CODE (value) == PARALLEL)
21786 value = XVECEXP (value, 0, 0);
21787 value = XEXP (value, 0);
21788 if (GET_CODE (addr) == COND_EXEC)
21789 addr = COND_EXEC_CODE (addr);
21790 if (GET_CODE (addr) == PARALLEL)
21791 addr = XVECEXP (addr, 0, 0);
21792 addr = XEXP (addr, 0);
21794 return !reg_overlap_mentioned_p (value, addr);
21797 /* Return nonzero if the CONSUMER instruction (a store) does need
21798 PRODUCER's value to calculate the address. */
21801 arm_early_store_addr_dep (rtx producer, rtx consumer)
21803 return !arm_no_early_store_addr_dep (producer, consumer);
21806 /* Return nonzero if the CONSUMER instruction (a load) does need
21807 PRODUCER's value to calculate the address. */
21810 arm_early_load_addr_dep (rtx producer, rtx consumer)
21812 rtx value = PATTERN (producer);
21813 rtx addr = PATTERN (consumer);
21815 if (GET_CODE (value) == COND_EXEC)
21816 value = COND_EXEC_CODE (value);
21817 if (GET_CODE (value) == PARALLEL)
21818 value = XVECEXP (value, 0, 0);
21819 value = XEXP (value, 0);
21820 if (GET_CODE (addr) == COND_EXEC)
21821 addr = COND_EXEC_CODE (addr);
21822 if (GET_CODE (addr) == PARALLEL)
21823 addr = XVECEXP (addr, 0, 0);
21824 addr = XEXP (addr, 1);
21826 return reg_overlap_mentioned_p (value, addr);
21829 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21830 have an early register shift value or amount dependency on the
21831 result of PRODUCER. */
21834 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21836 rtx value = PATTERN (producer);
21837 rtx op = PATTERN (consumer);
21838 rtx early_op;
21840 if (GET_CODE (value) == COND_EXEC)
21841 value = COND_EXEC_CODE (value);
21842 if (GET_CODE (value) == PARALLEL)
21843 value = XVECEXP (value, 0, 0);
21844 value = XEXP (value, 0);
21845 if (GET_CODE (op) == COND_EXEC)
21846 op = COND_EXEC_CODE (op);
21847 if (GET_CODE (op) == PARALLEL)
21848 op = XVECEXP (op, 0, 0);
21849 op = XEXP (op, 1);
21851 early_op = XEXP (op, 0);
21852 /* This is either an actual independent shift, or a shift applied to
21853 the first operand of another operation. We want the whole shift
21854 operation. */
21855 if (GET_CODE (early_op) == REG)
21856 early_op = op;
21858 return !reg_overlap_mentioned_p (value, early_op);
21861 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21862 have an early register shift value dependency on the result of
21863 PRODUCER. */
21866 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21868 rtx value = PATTERN (producer);
21869 rtx op = PATTERN (consumer);
21870 rtx early_op;
21872 if (GET_CODE (value) == COND_EXEC)
21873 value = COND_EXEC_CODE (value);
21874 if (GET_CODE (value) == PARALLEL)
21875 value = XVECEXP (value, 0, 0);
21876 value = XEXP (value, 0);
21877 if (GET_CODE (op) == COND_EXEC)
21878 op = COND_EXEC_CODE (op);
21879 if (GET_CODE (op) == PARALLEL)
21880 op = XVECEXP (op, 0, 0);
21881 op = XEXP (op, 1);
21883 early_op = XEXP (op, 0);
21885 /* This is either an actual independent shift, or a shift applied to
21886 the first operand of another operation. We want the value being
21887 shifted, in either case. */
21888 if (GET_CODE (early_op) != REG)
21889 early_op = XEXP (early_op, 0);
21891 return !reg_overlap_mentioned_p (value, early_op);
21894 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21895 have an early register mult dependency on the result of
21896 PRODUCER. */
21899 arm_no_early_mul_dep (rtx producer, rtx consumer)
21901 rtx value = PATTERN (producer);
21902 rtx op = PATTERN (consumer);
21904 if (GET_CODE (value) == COND_EXEC)
21905 value = COND_EXEC_CODE (value);
21906 if (GET_CODE (value) == PARALLEL)
21907 value = XVECEXP (value, 0, 0);
21908 value = XEXP (value, 0);
21909 if (GET_CODE (op) == COND_EXEC)
21910 op = COND_EXEC_CODE (op);
21911 if (GET_CODE (op) == PARALLEL)
21912 op = XVECEXP (op, 0, 0);
21913 op = XEXP (op, 1);
21915 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21917 if (GET_CODE (XEXP (op, 0)) == MULT)
21918 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21919 else
21920 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21923 return 0;
21926 /* We can't rely on the caller doing the proper promotion when
21927 using APCS or ATPCS. */
21929 static bool
21930 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21932 return !TARGET_AAPCS_BASED;
21935 static enum machine_mode
21936 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21937 enum machine_mode mode,
21938 int *punsignedp ATTRIBUTE_UNUSED,
21939 const_tree fntype ATTRIBUTE_UNUSED,
21940 int for_return ATTRIBUTE_UNUSED)
21942 if (GET_MODE_CLASS (mode) == MODE_INT
21943 && GET_MODE_SIZE (mode) < 4)
21944 return SImode;
21946 return mode;
21949 /* AAPCS based ABIs use short enums by default. */
21951 static bool
21952 arm_default_short_enums (void)
21954 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21958 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21960 static bool
21961 arm_align_anon_bitfield (void)
21963 return TARGET_AAPCS_BASED;
21967 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21969 static tree
21970 arm_cxx_guard_type (void)
21972 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21975 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21976 has an accumulator dependency on the result of the producer (a
21977 multiplication instruction) and no other dependency on that result. */
21979 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21981 rtx mul = PATTERN (producer);
21982 rtx mac = PATTERN (consumer);
21983 rtx mul_result;
21984 rtx mac_op0, mac_op1, mac_acc;
21986 if (GET_CODE (mul) == COND_EXEC)
21987 mul = COND_EXEC_CODE (mul);
21988 if (GET_CODE (mac) == COND_EXEC)
21989 mac = COND_EXEC_CODE (mac);
21991 /* Check that mul is of the form (set (...) (mult ...))
21992 and mla is of the form (set (...) (plus (mult ...) (...))). */
21993 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21994 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21995 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21996 return 0;
21998 mul_result = XEXP (mul, 0);
21999 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22000 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22001 mac_acc = XEXP (XEXP (mac, 1), 1);
22003 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22004 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22005 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22009 /* The EABI says test the least significant bit of a guard variable. */
22011 static bool
22012 arm_cxx_guard_mask_bit (void)
22014 return TARGET_AAPCS_BASED;
22018 /* The EABI specifies that all array cookies are 8 bytes long. */
22020 static tree
22021 arm_get_cookie_size (tree type)
22023 tree size;
22025 if (!TARGET_AAPCS_BASED)
22026 return default_cxx_get_cookie_size (type);
22028 size = build_int_cst (sizetype, 8);
22029 return size;
22033 /* The EABI says that array cookies should also contain the element size. */
22035 static bool
22036 arm_cookie_has_size (void)
22038 return TARGET_AAPCS_BASED;
22042 /* The EABI says constructors and destructors should return a pointer to
22043 the object constructed/destroyed. */
22045 static bool
22046 arm_cxx_cdtor_returns_this (void)
22048 return TARGET_AAPCS_BASED;
22051 /* The EABI says that an inline function may never be the key
22052 method. */
22054 static bool
22055 arm_cxx_key_method_may_be_inline (void)
22057 return !TARGET_AAPCS_BASED;
22060 static void
22061 arm_cxx_determine_class_data_visibility (tree decl)
22063 if (!TARGET_AAPCS_BASED
22064 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22065 return;
22067 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22068 is exported. However, on systems without dynamic vague linkage,
22069 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22070 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22071 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22072 else
22073 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22074 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22077 static bool
22078 arm_cxx_class_data_always_comdat (void)
22080 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22081 vague linkage if the class has no key function. */
22082 return !TARGET_AAPCS_BASED;
22086 /* The EABI says __aeabi_atexit should be used to register static
22087 destructors. */
22089 static bool
22090 arm_cxx_use_aeabi_atexit (void)
22092 return TARGET_AAPCS_BASED;
22096 void
22097 arm_set_return_address (rtx source, rtx scratch)
22099 arm_stack_offsets *offsets;
22100 HOST_WIDE_INT delta;
22101 rtx addr;
22102 unsigned long saved_regs;
22104 offsets = arm_get_frame_offsets ();
22105 saved_regs = offsets->saved_regs_mask;
22107 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22108 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22109 else
22111 if (frame_pointer_needed)
22112 addr = plus_constant(hard_frame_pointer_rtx, -4);
22113 else
22115 /* LR will be the first saved register. */
22116 delta = offsets->outgoing_args - (offsets->frame + 4);
22119 if (delta >= 4096)
22121 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22122 GEN_INT (delta & ~4095)));
22123 addr = scratch;
22124 delta &= 4095;
22126 else
22127 addr = stack_pointer_rtx;
22129 addr = plus_constant (addr, delta);
22131 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22136 void
22137 thumb_set_return_address (rtx source, rtx scratch)
22139 arm_stack_offsets *offsets;
22140 HOST_WIDE_INT delta;
22141 HOST_WIDE_INT limit;
22142 int reg;
22143 rtx addr;
22144 unsigned long mask;
22146 emit_use (source);
22148 offsets = arm_get_frame_offsets ();
22149 mask = offsets->saved_regs_mask;
22150 if (mask & (1 << LR_REGNUM))
22152 limit = 1024;
22153 /* Find the saved regs. */
22154 if (frame_pointer_needed)
22156 delta = offsets->soft_frame - offsets->saved_args;
22157 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22158 if (TARGET_THUMB1)
22159 limit = 128;
22161 else
22163 delta = offsets->outgoing_args - offsets->saved_args;
22164 reg = SP_REGNUM;
22166 /* Allow for the stack frame. */
22167 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22168 delta -= 16;
22169 /* The link register is always the first saved register. */
22170 delta -= 4;
22172 /* Construct the address. */
22173 addr = gen_rtx_REG (SImode, reg);
22174 if (delta > limit)
22176 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22177 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22178 addr = scratch;
22180 else
22181 addr = plus_constant (addr, delta);
22183 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22185 else
22186 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22189 /* Implements target hook vector_mode_supported_p. */
22190 bool
22191 arm_vector_mode_supported_p (enum machine_mode mode)
22193 /* Neon also supports V2SImode, etc. listed in the clause below. */
22194 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22195 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22196 return true;
22198 if ((TARGET_NEON || TARGET_IWMMXT)
22199 && ((mode == V2SImode)
22200 || (mode == V4HImode)
22201 || (mode == V8QImode)))
22202 return true;
22204 return false;
22207 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22208 registers when autovectorizing for Neon, at least until multiple vector
22209 widths are supported properly by the middle-end. */
22211 static enum machine_mode
22212 arm_preferred_simd_mode (enum machine_mode mode)
22214 if (TARGET_NEON)
22215 switch (mode)
22217 case SFmode:
22218 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22219 case SImode:
22220 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22221 case HImode:
22222 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22223 case QImode:
22224 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22225 case DImode:
22226 if (TARGET_NEON_VECTORIZE_QUAD)
22227 return V2DImode;
22228 break;
22230 default:;
22233 if (TARGET_REALLY_IWMMXT)
22234 switch (mode)
22236 case SImode:
22237 return V2SImode;
22238 case HImode:
22239 return V4HImode;
22240 case QImode:
22241 return V8QImode;
22243 default:;
22246 return word_mode;
22249 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22251 We need to define this for LO_REGS on thumb. Otherwise we can end up
22252 using r0-r4 for function arguments, r7 for the stack frame and don't
22253 have enough left over to do doubleword arithmetic. */
22255 static bool
22256 arm_class_likely_spilled_p (reg_class_t rclass)
22258 if ((TARGET_THUMB && rclass == LO_REGS)
22259 || rclass == CC_REG)
22260 return true;
22262 return false;
22265 /* Implements target hook small_register_classes_for_mode_p. */
22266 bool
22267 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22269 return TARGET_THUMB1;
22272 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22273 ARM insns and therefore guarantee that the shift count is modulo 256.
22274 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22275 guarantee no particular behavior for out-of-range counts. */
22277 static unsigned HOST_WIDE_INT
22278 arm_shift_truncation_mask (enum machine_mode mode)
22280 return mode == SImode ? 255 : 0;
22284 /* Map internal gcc register numbers to DWARF2 register numbers. */
22286 unsigned int
22287 arm_dbx_register_number (unsigned int regno)
22289 if (regno < 16)
22290 return regno;
22292 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22293 compatibility. The EABI defines them as registers 96-103. */
22294 if (IS_FPA_REGNUM (regno))
22295 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22297 if (IS_VFP_REGNUM (regno))
22299 /* See comment in arm_dwarf_register_span. */
22300 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22301 return 64 + regno - FIRST_VFP_REGNUM;
22302 else
22303 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22306 if (IS_IWMMXT_GR_REGNUM (regno))
22307 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22309 if (IS_IWMMXT_REGNUM (regno))
22310 return 112 + regno - FIRST_IWMMXT_REGNUM;
22312 gcc_unreachable ();
22315 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22316 GCC models tham as 64 32-bit registers, so we need to describe this to
22317 the DWARF generation code. Other registers can use the default. */
22318 static rtx
22319 arm_dwarf_register_span (rtx rtl)
22321 unsigned regno;
22322 int nregs;
22323 int i;
22324 rtx p;
22326 regno = REGNO (rtl);
22327 if (!IS_VFP_REGNUM (regno))
22328 return NULL_RTX;
22330 /* XXX FIXME: The EABI defines two VFP register ranges:
22331 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22332 256-287: D0-D31
22333 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22334 corresponding D register. Until GDB supports this, we shall use the
22335 legacy encodings. We also use these encodings for D0-D15 for
22336 compatibility with older debuggers. */
22337 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22338 return NULL_RTX;
22340 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22341 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22342 regno = (regno - FIRST_VFP_REGNUM) / 2;
22343 for (i = 0; i < nregs; i++)
22344 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22346 return p;
22349 #if ARM_UNWIND_INFO
22350 /* Emit unwind directives for a store-multiple instruction or stack pointer
22351 push during alignment.
22352 These should only ever be generated by the function prologue code, so
22353 expect them to have a particular form. */
22355 static void
22356 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22358 int i;
22359 HOST_WIDE_INT offset;
22360 HOST_WIDE_INT nregs;
22361 int reg_size;
22362 unsigned reg;
22363 unsigned lastreg;
22364 rtx e;
22366 e = XVECEXP (p, 0, 0);
22367 if (GET_CODE (e) != SET)
22368 abort ();
22370 /* First insn will adjust the stack pointer. */
22371 if (GET_CODE (e) != SET
22372 || GET_CODE (XEXP (e, 0)) != REG
22373 || REGNO (XEXP (e, 0)) != SP_REGNUM
22374 || GET_CODE (XEXP (e, 1)) != PLUS)
22375 abort ();
22377 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22378 nregs = XVECLEN (p, 0) - 1;
22380 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22381 if (reg < 16)
22383 /* The function prologue may also push pc, but not annotate it as it is
22384 never restored. We turn this into a stack pointer adjustment. */
22385 if (nregs * 4 == offset - 4)
22387 fprintf (asm_out_file, "\t.pad #4\n");
22388 offset -= 4;
22390 reg_size = 4;
22391 fprintf (asm_out_file, "\t.save {");
22393 else if (IS_VFP_REGNUM (reg))
22395 reg_size = 8;
22396 fprintf (asm_out_file, "\t.vsave {");
22398 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22400 /* FPA registers are done differently. */
22401 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22402 return;
22404 else
22405 /* Unknown register type. */
22406 abort ();
22408 /* If the stack increment doesn't match the size of the saved registers,
22409 something has gone horribly wrong. */
22410 if (offset != nregs * reg_size)
22411 abort ();
22413 offset = 0;
22414 lastreg = 0;
22415 /* The remaining insns will describe the stores. */
22416 for (i = 1; i <= nregs; i++)
22418 /* Expect (set (mem <addr>) (reg)).
22419 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22420 e = XVECEXP (p, 0, i);
22421 if (GET_CODE (e) != SET
22422 || GET_CODE (XEXP (e, 0)) != MEM
22423 || GET_CODE (XEXP (e, 1)) != REG)
22424 abort ();
22426 reg = REGNO (XEXP (e, 1));
22427 if (reg < lastreg)
22428 abort ();
22430 if (i != 1)
22431 fprintf (asm_out_file, ", ");
22432 /* We can't use %r for vfp because we need to use the
22433 double precision register names. */
22434 if (IS_VFP_REGNUM (reg))
22435 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22436 else
22437 asm_fprintf (asm_out_file, "%r", reg);
22439 #ifdef ENABLE_CHECKING
22440 /* Check that the addresses are consecutive. */
22441 e = XEXP (XEXP (e, 0), 0);
22442 if (GET_CODE (e) == PLUS)
22444 offset += reg_size;
22445 if (GET_CODE (XEXP (e, 0)) != REG
22446 || REGNO (XEXP (e, 0)) != SP_REGNUM
22447 || GET_CODE (XEXP (e, 1)) != CONST_INT
22448 || offset != INTVAL (XEXP (e, 1)))
22449 abort ();
22451 else if (i != 1
22452 || GET_CODE (e) != REG
22453 || REGNO (e) != SP_REGNUM)
22454 abort ();
22455 #endif
22457 fprintf (asm_out_file, "}\n");
22460 /* Emit unwind directives for a SET. */
22462 static void
22463 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22465 rtx e0;
22466 rtx e1;
22467 unsigned reg;
22469 e0 = XEXP (p, 0);
22470 e1 = XEXP (p, 1);
22471 switch (GET_CODE (e0))
22473 case MEM:
22474 /* Pushing a single register. */
22475 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22476 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22477 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22478 abort ();
22480 asm_fprintf (asm_out_file, "\t.save ");
22481 if (IS_VFP_REGNUM (REGNO (e1)))
22482 asm_fprintf(asm_out_file, "{d%d}\n",
22483 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22484 else
22485 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22486 break;
22488 case REG:
22489 if (REGNO (e0) == SP_REGNUM)
22491 /* A stack increment. */
22492 if (GET_CODE (e1) != PLUS
22493 || GET_CODE (XEXP (e1, 0)) != REG
22494 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22495 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22496 abort ();
22498 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22499 -INTVAL (XEXP (e1, 1)));
22501 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22503 HOST_WIDE_INT offset;
22505 if (GET_CODE (e1) == PLUS)
22507 if (GET_CODE (XEXP (e1, 0)) != REG
22508 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22509 abort ();
22510 reg = REGNO (XEXP (e1, 0));
22511 offset = INTVAL (XEXP (e1, 1));
22512 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22513 HARD_FRAME_POINTER_REGNUM, reg,
22514 offset);
22516 else if (GET_CODE (e1) == REG)
22518 reg = REGNO (e1);
22519 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22520 HARD_FRAME_POINTER_REGNUM, reg);
22522 else
22523 abort ();
22525 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22527 /* Move from sp to reg. */
22528 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22530 else if (GET_CODE (e1) == PLUS
22531 && GET_CODE (XEXP (e1, 0)) == REG
22532 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22533 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22535 /* Set reg to offset from sp. */
22536 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22537 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22539 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22541 /* Stack pointer save before alignment. */
22542 reg = REGNO (e0);
22543 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22544 reg + 0x90, reg);
22546 else
22547 abort ();
22548 break;
22550 default:
22551 abort ();
22556 /* Emit unwind directives for the given insn. */
22558 static void
22559 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22561 rtx pat;
22563 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22564 return;
22566 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22567 && (TREE_NOTHROW (current_function_decl)
22568 || crtl->all_throwers_are_sibcalls))
22569 return;
22571 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22572 return;
22574 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22575 if (pat)
22576 pat = XEXP (pat, 0);
22577 else
22578 pat = PATTERN (insn);
22580 switch (GET_CODE (pat))
22582 case SET:
22583 arm_unwind_emit_set (asm_out_file, pat);
22584 break;
22586 case SEQUENCE:
22587 /* Store multiple. */
22588 arm_unwind_emit_sequence (asm_out_file, pat);
22589 break;
22591 default:
22592 abort();
22597 /* Output a reference from a function exception table to the type_info
22598 object X. The EABI specifies that the symbol should be relocated by
22599 an R_ARM_TARGET2 relocation. */
22601 static bool
22602 arm_output_ttype (rtx x)
22604 fputs ("\t.word\t", asm_out_file);
22605 output_addr_const (asm_out_file, x);
22606 /* Use special relocations for symbol references. */
22607 if (GET_CODE (x) != CONST_INT)
22608 fputs ("(TARGET2)", asm_out_file);
22609 fputc ('\n', asm_out_file);
22611 return TRUE;
22614 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22616 static void
22617 arm_asm_emit_except_personality (rtx personality)
22619 fputs ("\t.personality\t", asm_out_file);
22620 output_addr_const (asm_out_file, personality);
22621 fputc ('\n', asm_out_file);
22624 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22626 static void
22627 arm_asm_init_sections (void)
22629 exception_section = get_unnamed_section (0, output_section_asm_op,
22630 "\t.handlerdata");
22632 #endif /* ARM_UNWIND_INFO */
22634 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22636 static enum unwind_info_type
22637 arm_except_unwind_info (struct gcc_options *opts)
22639 /* Honor the --enable-sjlj-exceptions configure switch. */
22640 #ifdef CONFIG_SJLJ_EXCEPTIONS
22641 if (CONFIG_SJLJ_EXCEPTIONS)
22642 return UI_SJLJ;
22643 #endif
22645 /* If not using ARM EABI unwind tables... */
22646 if (ARM_UNWIND_INFO)
22648 /* For simplicity elsewhere in this file, indicate that all unwind
22649 info is disabled if we're not emitting unwind tables. */
22650 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22651 return UI_NONE;
22652 else
22653 return UI_TARGET;
22656 /* ... we use sjlj exceptions for backwards compatibility. */
22657 return UI_SJLJ;
22661 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22662 stack alignment. */
22664 static void
22665 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22667 rtx unspec = SET_SRC (pattern);
22668 gcc_assert (GET_CODE (unspec) == UNSPEC);
22670 switch (index)
22672 case UNSPEC_STACK_ALIGN:
22673 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22674 put anything on the stack, so hopefully it won't matter.
22675 CFA = SP will be correct after alignment. */
22676 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22677 SET_DEST (pattern));
22678 break;
22679 default:
22680 gcc_unreachable ();
22685 /* Output unwind directives for the start/end of a function. */
22687 void
22688 arm_output_fn_unwind (FILE * f, bool prologue)
22690 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22691 return;
22693 if (prologue)
22694 fputs ("\t.fnstart\n", f);
22695 else
22697 /* If this function will never be unwound, then mark it as such.
22698 The came condition is used in arm_unwind_emit to suppress
22699 the frame annotations. */
22700 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22701 && (TREE_NOTHROW (current_function_decl)
22702 || crtl->all_throwers_are_sibcalls))
22703 fputs("\t.cantunwind\n", f);
22705 fputs ("\t.fnend\n", f);
22709 static bool
22710 arm_emit_tls_decoration (FILE *fp, rtx x)
22712 enum tls_reloc reloc;
22713 rtx val;
22715 val = XVECEXP (x, 0, 0);
22716 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22718 output_addr_const (fp, val);
22720 switch (reloc)
22722 case TLS_GD32:
22723 fputs ("(tlsgd)", fp);
22724 break;
22725 case TLS_LDM32:
22726 fputs ("(tlsldm)", fp);
22727 break;
22728 case TLS_LDO32:
22729 fputs ("(tlsldo)", fp);
22730 break;
22731 case TLS_IE32:
22732 fputs ("(gottpoff)", fp);
22733 break;
22734 case TLS_LE32:
22735 fputs ("(tpoff)", fp);
22736 break;
22737 default:
22738 gcc_unreachable ();
22741 switch (reloc)
22743 case TLS_GD32:
22744 case TLS_LDM32:
22745 case TLS_IE32:
22746 fputs (" + (. - ", fp);
22747 output_addr_const (fp, XVECEXP (x, 0, 2));
22748 fputs (" - ", fp);
22749 output_addr_const (fp, XVECEXP (x, 0, 3));
22750 fputc (')', fp);
22751 break;
22752 default:
22753 break;
22756 return TRUE;
22759 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22761 static void
22762 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22764 gcc_assert (size == 4);
22765 fputs ("\t.word\t", file);
22766 output_addr_const (file, x);
22767 fputs ("(tlsldo)", file);
22770 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22772 static bool
22773 arm_output_addr_const_extra (FILE *fp, rtx x)
22775 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22776 return arm_emit_tls_decoration (fp, x);
22777 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22779 char label[256];
22780 int labelno = INTVAL (XVECEXP (x, 0, 0));
22782 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22783 assemble_name_raw (fp, label);
22785 return TRUE;
22787 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22789 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22790 if (GOT_PCREL)
22791 fputs ("+.", fp);
22792 fputs ("-(", fp);
22793 output_addr_const (fp, XVECEXP (x, 0, 0));
22794 fputc (')', fp);
22795 return TRUE;
22797 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22799 output_addr_const (fp, XVECEXP (x, 0, 0));
22800 if (GOT_PCREL)
22801 fputs ("+.", fp);
22802 fputs ("-(", fp);
22803 output_addr_const (fp, XVECEXP (x, 0, 1));
22804 fputc (')', fp);
22805 return TRUE;
22807 else if (GET_CODE (x) == CONST_VECTOR)
22808 return arm_emit_vector_const (fp, x);
22810 return FALSE;
22813 /* Output assembly for a shift instruction.
22814 SET_FLAGS determines how the instruction modifies the condition codes.
22815 0 - Do not set condition codes.
22816 1 - Set condition codes.
22817 2 - Use smallest instruction. */
22818 const char *
22819 arm_output_shift(rtx * operands, int set_flags)
22821 char pattern[100];
22822 static const char flag_chars[3] = {'?', '.', '!'};
22823 const char *shift;
22824 HOST_WIDE_INT val;
22825 char c;
22827 c = flag_chars[set_flags];
22828 if (TARGET_UNIFIED_ASM)
22830 shift = shift_op(operands[3], &val);
22831 if (shift)
22833 if (val != -1)
22834 operands[2] = GEN_INT(val);
22835 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22837 else
22838 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22840 else
22841 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22842 output_asm_insn (pattern, operands);
22843 return "";
22846 /* Output a Thumb-1 casesi dispatch sequence. */
22847 const char *
22848 thumb1_output_casesi (rtx *operands)
22850 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22852 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22854 switch (GET_MODE(diff_vec))
22856 case QImode:
22857 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22858 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22859 case HImode:
22860 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22861 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22862 case SImode:
22863 return "bl\t%___gnu_thumb1_case_si";
22864 default:
22865 gcc_unreachable ();
22869 /* Output a Thumb-2 casesi instruction. */
22870 const char *
22871 thumb2_output_casesi (rtx *operands)
22873 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22875 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22877 output_asm_insn ("cmp\t%0, %1", operands);
22878 output_asm_insn ("bhi\t%l3", operands);
22879 switch (GET_MODE(diff_vec))
22881 case QImode:
22882 return "tbb\t[%|pc, %0]";
22883 case HImode:
22884 return "tbh\t[%|pc, %0, lsl #1]";
22885 case SImode:
22886 if (flag_pic)
22888 output_asm_insn ("adr\t%4, %l2", operands);
22889 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22890 output_asm_insn ("add\t%4, %4, %5", operands);
22891 return "bx\t%4";
22893 else
22895 output_asm_insn ("adr\t%4, %l2", operands);
22896 return "ldr\t%|pc, [%4, %0, lsl #2]";
22898 default:
22899 gcc_unreachable ();
22903 /* Most ARM cores are single issue, but some newer ones can dual issue.
22904 The scheduler descriptions rely on this being correct. */
22905 static int
22906 arm_issue_rate (void)
22908 switch (arm_tune)
22910 case cortexr4:
22911 case cortexr4f:
22912 case cortexa5:
22913 case cortexa8:
22914 case cortexa9:
22915 case fa726te:
22916 return 2;
22918 default:
22919 return 1;
22923 /* A table and a function to perform ARM-specific name mangling for
22924 NEON vector types in order to conform to the AAPCS (see "Procedure
22925 Call Standard for the ARM Architecture", Appendix A). To qualify
22926 for emission with the mangled names defined in that document, a
22927 vector type must not only be of the correct mode but also be
22928 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22929 typedef struct
22931 enum machine_mode mode;
22932 const char *element_type_name;
22933 const char *aapcs_name;
22934 } arm_mangle_map_entry;
22936 static arm_mangle_map_entry arm_mangle_map[] = {
22937 /* 64-bit containerized types. */
22938 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22939 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22940 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22941 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22942 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22943 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22944 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22945 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22946 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22947 /* 128-bit containerized types. */
22948 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22949 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22950 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22951 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22952 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22953 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22954 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22955 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22956 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22957 { VOIDmode, NULL, NULL }
22960 const char *
22961 arm_mangle_type (const_tree type)
22963 arm_mangle_map_entry *pos = arm_mangle_map;
22965 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22966 has to be managled as if it is in the "std" namespace. */
22967 if (TARGET_AAPCS_BASED
22968 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22970 static bool warned;
22971 if (!warned && warn_psabi && !in_system_header)
22973 warned = true;
22974 inform (input_location,
22975 "the mangling of %<va_list%> has changed in GCC 4.4");
22977 return "St9__va_list";
22980 /* Half-precision float. */
22981 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22982 return "Dh";
22984 if (TREE_CODE (type) != VECTOR_TYPE)
22985 return NULL;
22987 /* Check the mode of the vector type, and the name of the vector
22988 element type, against the table. */
22989 while (pos->mode != VOIDmode)
22991 tree elt_type = TREE_TYPE (type);
22993 if (pos->mode == TYPE_MODE (type)
22994 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22995 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22996 pos->element_type_name))
22997 return pos->aapcs_name;
22999 pos++;
23002 /* Use the default mangling for unrecognized (possibly user-defined)
23003 vector types. */
23004 return NULL;
23007 /* Order of allocation of core registers for Thumb: this allocation is
23008 written over the corresponding initial entries of the array
23009 initialized with REG_ALLOC_ORDER. We allocate all low registers
23010 first. Saving and restoring a low register is usually cheaper than
23011 using a call-clobbered high register. */
23013 static const int thumb_core_reg_alloc_order[] =
23015 3, 2, 1, 0, 4, 5, 6, 7,
23016 14, 12, 8, 9, 10, 11, 13, 15
23019 /* Adjust register allocation order when compiling for Thumb. */
23021 void
23022 arm_order_regs_for_local_alloc (void)
23024 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23025 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23026 if (TARGET_THUMB)
23027 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23028 sizeof (thumb_core_reg_alloc_order));
23031 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23033 bool
23034 arm_frame_pointer_required (void)
23036 return (cfun->has_nonlocal_label
23037 || SUBTARGET_FRAME_POINTER_REQUIRED
23038 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23041 /* Only thumb1 can't support conditional execution, so return true if
23042 the target is not thumb1. */
23043 static bool
23044 arm_have_conditional_execution (void)
23046 return !TARGET_THUMB1;
23049 /* Legitimize a memory reference for sync primitive implemented using
23050 ldrex / strex. We currently force the form of the reference to be
23051 indirect without offset. We do not yet support the indirect offset
23052 addressing supported by some ARM targets for these
23053 instructions. */
23054 static rtx
23055 arm_legitimize_sync_memory (rtx memory)
23057 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23058 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23060 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23061 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23062 return legitimate_memory;
23065 /* An instruction emitter. */
23066 typedef void (* emit_f) (int label, const char *, rtx *);
23068 /* An instruction emitter that emits via the conventional
23069 output_asm_insn. */
23070 static void
23071 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23073 output_asm_insn (pattern, operands);
23076 /* Count the number of emitted synchronization instructions. */
23077 static unsigned arm_insn_count;
23079 /* An emitter that counts emitted instructions but does not actually
23080 emit instruction into the the instruction stream. */
23081 static void
23082 arm_count (int label,
23083 const char *pattern ATTRIBUTE_UNUSED,
23084 rtx *operands ATTRIBUTE_UNUSED)
23086 if (! label)
23087 ++ arm_insn_count;
23090 /* Construct a pattern using conventional output formatting and feed
23091 it to output_asm_insn. Provides a mechanism to construct the
23092 output pattern on the fly. Note the hard limit on the pattern
23093 buffer size. */
23094 static void ATTRIBUTE_PRINTF_4
23095 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23096 const char *pattern, ...)
23098 va_list ap;
23099 char buffer[256];
23101 va_start (ap, pattern);
23102 vsprintf (buffer, pattern, ap);
23103 va_end (ap);
23104 emit (label, buffer, operands);
23107 /* Emit the memory barrier instruction, if any, provided by this
23108 target to a specified emitter. */
23109 static void
23110 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23112 if (TARGET_HAVE_DMB)
23114 /* Note we issue a system level barrier. We should consider
23115 issuing a inner shareabilty zone barrier here instead, ie.
23116 "DMB ISH". */
23117 emit (0, "dmb\tsy", operands);
23118 return;
23121 if (TARGET_HAVE_DMB_MCR)
23123 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23124 return;
23127 gcc_unreachable ();
23130 /* Emit the memory barrier instruction, if any, provided by this
23131 target. */
23132 const char *
23133 arm_output_memory_barrier (rtx *operands)
23135 arm_process_output_memory_barrier (arm_emit, operands);
23136 return "";
23139 /* Helper to figure out the instruction suffix required on ldrex/strex
23140 for operations on an object of the specified mode. */
23141 static const char *
23142 arm_ldrex_suffix (enum machine_mode mode)
23144 switch (mode)
23146 case QImode: return "b";
23147 case HImode: return "h";
23148 case SImode: return "";
23149 case DImode: return "d";
23150 default:
23151 gcc_unreachable ();
23153 return "";
23156 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23157 mode. */
23158 static void
23159 arm_output_ldrex (emit_f emit,
23160 enum machine_mode mode,
23161 rtx target,
23162 rtx memory)
23164 const char *suffix = arm_ldrex_suffix (mode);
23165 rtx operands[2];
23167 operands[0] = target;
23168 operands[1] = memory;
23169 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23172 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23173 mode. */
23174 static void
23175 arm_output_strex (emit_f emit,
23176 enum machine_mode mode,
23177 const char *cc,
23178 rtx result,
23179 rtx value,
23180 rtx memory)
23182 const char *suffix = arm_ldrex_suffix (mode);
23183 rtx operands[3];
23185 operands[0] = result;
23186 operands[1] = value;
23187 operands[2] = memory;
23188 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23189 cc);
23192 /* Helper to emit a two operand instruction. */
23193 static void
23194 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23196 rtx operands[2];
23198 operands[0] = d;
23199 operands[1] = s;
23200 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23203 /* Helper to emit a three operand instruction. */
23204 static void
23205 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23207 rtx operands[3];
23209 operands[0] = d;
23210 operands[1] = a;
23211 operands[2] = b;
23212 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23215 /* Emit a load store exclusive synchronization loop.
23218 old_value = [mem]
23219 if old_value != required_value
23220 break;
23221 t1 = sync_op (old_value, new_value)
23222 [mem] = t1, t2 = [0|1]
23223 while ! t2
23225 Note:
23226 t1 == t2 is not permitted
23227 t1 == old_value is permitted
23229 required_value:
23231 RTX register or const_int representing the required old_value for
23232 the modify to continue, if NULL no comparsion is performed. */
23233 static void
23234 arm_output_sync_loop (emit_f emit,
23235 enum machine_mode mode,
23236 rtx old_value,
23237 rtx memory,
23238 rtx required_value,
23239 rtx new_value,
23240 rtx t1,
23241 rtx t2,
23242 enum attr_sync_op sync_op,
23243 int early_barrier_required)
23245 rtx operands[1];
23247 gcc_assert (t1 != t2);
23249 if (early_barrier_required)
23250 arm_process_output_memory_barrier (emit, NULL);
23252 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23254 arm_output_ldrex (emit, mode, old_value, memory);
23256 if (required_value)
23258 rtx operands[2];
23260 operands[0] = old_value;
23261 operands[1] = required_value;
23262 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23263 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23266 switch (sync_op)
23268 case SYNC_OP_ADD:
23269 arm_output_op3 (emit, "add", t1, old_value, new_value);
23270 break;
23272 case SYNC_OP_SUB:
23273 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23274 break;
23276 case SYNC_OP_IOR:
23277 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23278 break;
23280 case SYNC_OP_XOR:
23281 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23282 break;
23284 case SYNC_OP_AND:
23285 arm_output_op3 (emit,"and", t1, old_value, new_value);
23286 break;
23288 case SYNC_OP_NAND:
23289 arm_output_op3 (emit, "and", t1, old_value, new_value);
23290 arm_output_op2 (emit, "mvn", t1, t1);
23291 break;
23293 case SYNC_OP_NONE:
23294 t1 = new_value;
23295 break;
23298 if (t2)
23300 arm_output_strex (emit, mode, "", t2, t1, memory);
23301 operands[0] = t2;
23302 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23303 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23304 LOCAL_LABEL_PREFIX);
23306 else
23308 /* Use old_value for the return value because for some operations
23309 the old_value can easily be restored. This saves one register. */
23310 arm_output_strex (emit, mode, "", old_value, t1, memory);
23311 operands[0] = old_value;
23312 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23313 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23314 LOCAL_LABEL_PREFIX);
23316 switch (sync_op)
23318 case SYNC_OP_ADD:
23319 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23320 break;
23322 case SYNC_OP_SUB:
23323 arm_output_op3 (emit, "add", old_value, t1, new_value);
23324 break;
23326 case SYNC_OP_XOR:
23327 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23328 break;
23330 case SYNC_OP_NONE:
23331 arm_output_op2 (emit, "mov", old_value, required_value);
23332 break;
23334 default:
23335 gcc_unreachable ();
23339 arm_process_output_memory_barrier (emit, NULL);
23340 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23343 static rtx
23344 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23346 if (index > 0)
23347 default_value = operands[index - 1];
23349 return default_value;
23352 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23353 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23355 /* Extract the operands for a synchroniztion instruction from the
23356 instructions attributes and emit the instruction. */
23357 static void
23358 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23360 rtx result, memory, required_value, new_value, t1, t2;
23361 int early_barrier;
23362 enum machine_mode mode;
23363 enum attr_sync_op sync_op;
23365 result = FETCH_SYNC_OPERAND(result, 0);
23366 memory = FETCH_SYNC_OPERAND(memory, 0);
23367 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23368 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23369 t1 = FETCH_SYNC_OPERAND(t1, 0);
23370 t2 = FETCH_SYNC_OPERAND(t2, 0);
23371 early_barrier =
23372 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23373 sync_op = get_attr_sync_op (insn);
23374 mode = GET_MODE (memory);
23376 arm_output_sync_loop (emit, mode, result, memory, required_value,
23377 new_value, t1, t2, sync_op, early_barrier);
23380 /* Emit a synchronization instruction loop. */
23381 const char *
23382 arm_output_sync_insn (rtx insn, rtx *operands)
23384 arm_process_output_sync_insn (arm_emit, insn, operands);
23385 return "";
23388 /* Count the number of machine instruction that will be emitted for a
23389 synchronization instruction. Note that the emitter used does not
23390 emit instructions, it just counts instructions being carefull not
23391 to count labels. */
23392 unsigned int
23393 arm_sync_loop_insns (rtx insn, rtx *operands)
23395 arm_insn_count = 0;
23396 arm_process_output_sync_insn (arm_count, insn, operands);
23397 return arm_insn_count;
23400 /* Helper to call a target sync instruction generator, dealing with
23401 the variation in operands required by the different generators. */
23402 static rtx
23403 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23404 rtx memory, rtx required_value, rtx new_value)
23406 switch (generator->op)
23408 case arm_sync_generator_omn:
23409 gcc_assert (! required_value);
23410 return generator->u.omn (old_value, memory, new_value);
23412 case arm_sync_generator_omrn:
23413 gcc_assert (required_value);
23414 return generator->u.omrn (old_value, memory, required_value, new_value);
23417 return NULL;
23420 /* Expand a synchronization loop. The synchronization loop is expanded
23421 as an opaque block of instructions in order to ensure that we do
23422 not subsequently get extraneous memory accesses inserted within the
23423 critical region. The exclusive access property of ldrex/strex is
23424 only guaranteed in there are no intervening memory accesses. */
23425 void
23426 arm_expand_sync (enum machine_mode mode,
23427 struct arm_sync_generator *generator,
23428 rtx target, rtx memory, rtx required_value, rtx new_value)
23430 if (target == NULL)
23431 target = gen_reg_rtx (mode);
23433 memory = arm_legitimize_sync_memory (memory);
23434 if (mode != SImode)
23436 rtx load_temp = gen_reg_rtx (SImode);
23438 if (required_value)
23439 required_value = convert_modes (SImode, mode, required_value, true);
23441 new_value = convert_modes (SImode, mode, new_value, true);
23442 emit_insn (arm_call_generator (generator, load_temp, memory,
23443 required_value, new_value));
23444 emit_move_insn (target, gen_lowpart (mode, load_temp));
23446 else
23448 emit_insn (arm_call_generator (generator, target, memory, required_value,
23449 new_value));
23453 static bool
23454 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23456 /* Vectors which aren't in packed structures will not be less aligned than
23457 the natural alignment of their element type, so this is safe. */
23458 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23459 return !is_packed;
23461 return default_builtin_vector_alignment_reachable (type, is_packed);
23464 static bool
23465 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23466 const_tree type, int misalignment,
23467 bool is_packed)
23469 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23471 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23473 if (is_packed)
23474 return align == 1;
23476 /* If the misalignment is unknown, we should be able to handle the access
23477 so long as it is not to a member of a packed data structure. */
23478 if (misalignment == -1)
23479 return true;
23481 /* Return true if the misalignment is a multiple of the natural alignment
23482 of the vector's element type. This is probably always going to be
23483 true in practice, since we've already established that this isn't a
23484 packed access. */
23485 return ((misalignment % align) == 0);
23488 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23489 is_packed);
23492 static void
23493 arm_conditional_register_usage (void)
23495 int regno;
23497 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23499 for (regno = FIRST_FPA_REGNUM;
23500 regno <= LAST_FPA_REGNUM; ++regno)
23501 fixed_regs[regno] = call_used_regs[regno] = 1;
23504 if (TARGET_THUMB1 && optimize_size)
23506 /* When optimizing for size on Thumb-1, it's better not
23507 to use the HI regs, because of the overhead of
23508 stacking them. */
23509 for (regno = FIRST_HI_REGNUM;
23510 regno <= LAST_HI_REGNUM; ++regno)
23511 fixed_regs[regno] = call_used_regs[regno] = 1;
23514 /* The link register can be clobbered by any branch insn,
23515 but we have no way to track that at present, so mark
23516 it as unavailable. */
23517 if (TARGET_THUMB1)
23518 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23520 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23522 if (TARGET_MAVERICK)
23524 for (regno = FIRST_FPA_REGNUM;
23525 regno <= LAST_FPA_REGNUM; ++ regno)
23526 fixed_regs[regno] = call_used_regs[regno] = 1;
23527 for (regno = FIRST_CIRRUS_FP_REGNUM;
23528 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23530 fixed_regs[regno] = 0;
23531 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23534 if (TARGET_VFP)
23536 /* VFPv3 registers are disabled when earlier VFP
23537 versions are selected due to the definition of
23538 LAST_VFP_REGNUM. */
23539 for (regno = FIRST_VFP_REGNUM;
23540 regno <= LAST_VFP_REGNUM; ++ regno)
23542 fixed_regs[regno] = 0;
23543 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23544 || regno >= FIRST_VFP_REGNUM + 32;
23549 if (TARGET_REALLY_IWMMXT)
23551 regno = FIRST_IWMMXT_GR_REGNUM;
23552 /* The 2002/10/09 revision of the XScale ABI has wCG0
23553 and wCG1 as call-preserved registers. The 2002/11/21
23554 revision changed this so that all wCG registers are
23555 scratch registers. */
23556 for (regno = FIRST_IWMMXT_GR_REGNUM;
23557 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23558 fixed_regs[regno] = 0;
23559 /* The XScale ABI has wR0 - wR9 as scratch registers,
23560 the rest as call-preserved registers. */
23561 for (regno = FIRST_IWMMXT_REGNUM;
23562 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23564 fixed_regs[regno] = 0;
23565 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23569 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23571 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23572 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23574 else if (TARGET_APCS_STACK)
23576 fixed_regs[10] = 1;
23577 call_used_regs[10] = 1;
23579 /* -mcaller-super-interworking reserves r11 for calls to
23580 _interwork_r11_call_via_rN(). Making the register global
23581 is an easy way of ensuring that it remains valid for all
23582 calls. */
23583 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23584 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23586 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23587 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23588 if (TARGET_CALLER_INTERWORKING)
23589 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23591 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23594 static reg_class_t
23595 arm_preferred_rename_class (reg_class_t rclass)
23597 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23598 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23599 and code size can be reduced. */
23600 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23601 return LO_REGS;
23602 else
23603 return NO_REGS;
23606 #include "gt-arm.h"