Merge from mainline (167278:168000).
[official-gcc/graphite-test-results.git] / gcc / config / arm / arm.c
blob2bfdd9fec551f0e5b558310938a3d0cdf170357f
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
96 int, HOST_WIDE_INT);
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
113 rtx);
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
125 #endif
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
137 const_tree, int);
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
144 tree);
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
166 tree, bool);
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
168 const_tree, bool);
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
170 const_tree, bool);
171 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
172 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
173 const_tree);
174 static int aapcs_select_return_coproc (const_tree, const_tree);
176 #ifdef OBJECT_FORMAT_ELF
177 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
178 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
179 #endif
180 #ifndef ARM_PE
181 static void arm_encode_section_info (tree, rtx, int);
182 #endif
184 static void arm_file_end (void);
185 static void arm_file_start (void);
187 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
188 tree, int *, int);
189 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
190 enum machine_mode, const_tree, bool);
191 static bool arm_promote_prototypes (const_tree);
192 static bool arm_default_short_enums (void);
193 static bool arm_align_anon_bitfield (void);
194 static bool arm_return_in_msb (const_tree);
195 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
196 static bool arm_return_in_memory (const_tree, const_tree);
197 #if ARM_UNWIND_INFO
198 static void arm_unwind_emit (FILE *, rtx);
199 static bool arm_output_ttype (rtx);
200 static void arm_asm_emit_except_personality (rtx);
201 static void arm_asm_init_sections (void);
202 #endif
203 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
204 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
205 static rtx arm_dwarf_register_span (rtx);
207 static tree arm_cxx_guard_type (void);
208 static bool arm_cxx_guard_mask_bit (void);
209 static tree arm_get_cookie_size (tree);
210 static bool arm_cookie_has_size (void);
211 static bool arm_cxx_cdtor_returns_this (void);
212 static bool arm_cxx_key_method_may_be_inline (void);
213 static void arm_cxx_determine_class_data_visibility (tree);
214 static bool arm_cxx_class_data_always_comdat (void);
215 static bool arm_cxx_use_aeabi_atexit (void);
216 static void arm_init_libfuncs (void);
217 static tree arm_build_builtin_va_list (void);
218 static void arm_expand_builtin_va_start (tree, rtx);
219 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
220 static void arm_option_override (void);
221 static bool arm_handle_option (size_t, const char *, int);
222 static void arm_target_help (void);
223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
224 static bool arm_cannot_copy_insn_p (rtx);
225 static bool arm_tls_symbol_p (rtx x);
226 static int arm_issue_rate (void);
227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
228 static bool arm_output_addr_const_extra (FILE *, rtx);
229 static bool arm_allocate_stack_slots_for_args (void);
230 static const char *arm_invalid_parameter_type (const_tree t);
231 static const char *arm_invalid_return_type (const_tree t);
232 static tree arm_promoted_type (const_tree t);
233 static tree arm_convert_to_type (tree type, tree expr);
234 static bool arm_scalar_mode_supported_p (enum machine_mode);
235 static bool arm_frame_pointer_required (void);
236 static bool arm_can_eliminate (const int, const int);
237 static void arm_asm_trampoline_template (FILE *);
238 static void arm_trampoline_init (rtx, tree, rtx);
239 static rtx arm_trampoline_adjust_address (rtx);
240 static rtx arm_pic_static_addr (rtx orig, rtx reg);
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
244 static bool arm_class_likely_spilled_p (reg_class_t);
245 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
246 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
247 const_tree type,
248 int misalignment,
249 bool is_packed);
250 static void arm_conditional_register_usage (void);
251 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
254 /* Table of machine attributes. */
255 static const struct attribute_spec arm_attribute_table[] =
257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
258 /* Function calls made to this symbol must be done indirectly, because
259 it may lie outside of the 26 bit addressing range of a normal function
260 call. */
261 { "long_call", 0, 0, false, true, true, NULL },
262 /* Whereas these functions are always known to reside within the 26 bit
263 addressing range. */
264 { "short_call", 0, 0, false, true, true, NULL },
265 /* Specify the procedure call conventions for a function. */
266 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
267 /* Interrupt Service Routines have special prologue and epilogue requirements. */
268 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
269 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
270 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
271 #ifdef ARM_PE
272 /* ARM/PE has three new attributes:
273 interfacearm - ?
274 dllexport - for exporting a function/variable that will live in a dll
275 dllimport - for importing a function/variable from a dll
277 Microsoft allows multiple declspecs in one __declspec, separating
278 them with spaces. We do NOT support this. Instead, use __declspec
279 multiple times.
281 { "dllimport", 0, 0, true, false, false, NULL },
282 { "dllexport", 0, 0, true, false, false, NULL },
283 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
284 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
285 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
286 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
287 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
288 #endif
289 { NULL, 0, 0, false, false, false, NULL }
292 /* Set default optimization options. */
293 static const struct default_options arm_option_optimization_table[] =
295 /* Enable section anchors by default at -O1 or higher. */
296 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
297 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
298 { OPT_LEVELS_NONE, 0, NULL, 0 }
301 /* Initialize the GCC target structure. */
302 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
303 #undef TARGET_MERGE_DECL_ATTRIBUTES
304 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
305 #endif
307 #undef TARGET_LEGITIMIZE_ADDRESS
308 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
310 #undef TARGET_ATTRIBUTE_TABLE
311 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
313 #undef TARGET_ASM_FILE_START
314 #define TARGET_ASM_FILE_START arm_file_start
315 #undef TARGET_ASM_FILE_END
316 #define TARGET_ASM_FILE_END arm_file_end
318 #undef TARGET_ASM_ALIGNED_SI_OP
319 #define TARGET_ASM_ALIGNED_SI_OP NULL
320 #undef TARGET_ASM_INTEGER
321 #define TARGET_ASM_INTEGER arm_assemble_integer
323 #undef TARGET_PRINT_OPERAND
324 #define TARGET_PRINT_OPERAND arm_print_operand
325 #undef TARGET_PRINT_OPERAND_ADDRESS
326 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
327 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
328 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
330 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
331 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
333 #undef TARGET_ASM_FUNCTION_PROLOGUE
334 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
336 #undef TARGET_ASM_FUNCTION_EPILOGUE
337 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
339 #undef TARGET_DEFAULT_TARGET_FLAGS
340 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
341 #undef TARGET_HANDLE_OPTION
342 #define TARGET_HANDLE_OPTION arm_handle_option
343 #undef TARGET_HELP
344 #define TARGET_HELP arm_target_help
345 #undef TARGET_OPTION_OVERRIDE
346 #define TARGET_OPTION_OVERRIDE arm_option_override
347 #undef TARGET_OPTION_OPTIMIZATION_TABLE
348 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
350 #undef TARGET_COMP_TYPE_ATTRIBUTES
351 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
353 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
354 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
356 #undef TARGET_SCHED_ADJUST_COST
357 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
359 #undef TARGET_ENCODE_SECTION_INFO
360 #ifdef ARM_PE
361 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
362 #else
363 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
364 #endif
366 #undef TARGET_STRIP_NAME_ENCODING
367 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
369 #undef TARGET_ASM_INTERNAL_LABEL
370 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
372 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
373 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
375 #undef TARGET_FUNCTION_VALUE
376 #define TARGET_FUNCTION_VALUE arm_function_value
378 #undef TARGET_LIBCALL_VALUE
379 #define TARGET_LIBCALL_VALUE arm_libcall_value
381 #undef TARGET_ASM_OUTPUT_MI_THUNK
382 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
383 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
384 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
386 #undef TARGET_RTX_COSTS
387 #define TARGET_RTX_COSTS arm_rtx_costs
388 #undef TARGET_ADDRESS_COST
389 #define TARGET_ADDRESS_COST arm_address_cost
391 #undef TARGET_SHIFT_TRUNCATION_MASK
392 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
393 #undef TARGET_VECTOR_MODE_SUPPORTED_P
394 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
395 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
396 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
398 #undef TARGET_MACHINE_DEPENDENT_REORG
399 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
401 #undef TARGET_INIT_BUILTINS
402 #define TARGET_INIT_BUILTINS arm_init_builtins
403 #undef TARGET_EXPAND_BUILTIN
404 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
406 #undef TARGET_INIT_LIBFUNCS
407 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
409 #undef TARGET_PROMOTE_FUNCTION_MODE
410 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
411 #undef TARGET_PROMOTE_PROTOTYPES
412 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
413 #undef TARGET_PASS_BY_REFERENCE
414 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
415 #undef TARGET_ARG_PARTIAL_BYTES
416 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
417 #undef TARGET_FUNCTION_ARG
418 #define TARGET_FUNCTION_ARG arm_function_arg
419 #undef TARGET_FUNCTION_ARG_ADVANCE
420 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
421 #undef TARGET_FUNCTION_ARG_BOUNDARY
422 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
424 #undef TARGET_SETUP_INCOMING_VARARGS
425 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
427 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
428 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
430 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
431 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
432 #undef TARGET_TRAMPOLINE_INIT
433 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
434 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
435 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
437 #undef TARGET_DEFAULT_SHORT_ENUMS
438 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
440 #undef TARGET_ALIGN_ANON_BITFIELD
441 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
443 #undef TARGET_NARROW_VOLATILE_BITFIELD
444 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
446 #undef TARGET_CXX_GUARD_TYPE
447 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
449 #undef TARGET_CXX_GUARD_MASK_BIT
450 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
452 #undef TARGET_CXX_GET_COOKIE_SIZE
453 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
455 #undef TARGET_CXX_COOKIE_HAS_SIZE
456 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
458 #undef TARGET_CXX_CDTOR_RETURNS_THIS
459 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
461 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
462 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
464 #undef TARGET_CXX_USE_AEABI_ATEXIT
465 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
467 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
468 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
469 arm_cxx_determine_class_data_visibility
471 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
472 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
474 #undef TARGET_RETURN_IN_MSB
475 #define TARGET_RETURN_IN_MSB arm_return_in_msb
477 #undef TARGET_RETURN_IN_MEMORY
478 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
480 #undef TARGET_MUST_PASS_IN_STACK
481 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
483 #if ARM_UNWIND_INFO
484 #undef TARGET_ASM_UNWIND_EMIT
485 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
487 /* EABI unwinding tables use a different format for the typeinfo tables. */
488 #undef TARGET_ASM_TTYPE
489 #define TARGET_ASM_TTYPE arm_output_ttype
491 #undef TARGET_ARM_EABI_UNWINDER
492 #define TARGET_ARM_EABI_UNWINDER true
494 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
495 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
497 #undef TARGET_ASM_INIT_SECTIONS
498 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
499 #endif /* ARM_UNWIND_INFO */
501 #undef TARGET_EXCEPT_UNWIND_INFO
502 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
504 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
505 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
507 #undef TARGET_DWARF_REGISTER_SPAN
508 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
510 #undef TARGET_CANNOT_COPY_INSN_P
511 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
513 #ifdef HAVE_AS_TLS
514 #undef TARGET_HAVE_TLS
515 #define TARGET_HAVE_TLS true
516 #endif
518 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
519 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
521 #undef TARGET_CANNOT_FORCE_CONST_MEM
522 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
524 #undef TARGET_MAX_ANCHOR_OFFSET
525 #define TARGET_MAX_ANCHOR_OFFSET 4095
527 /* The minimum is set such that the total size of the block
528 for a particular anchor is -4088 + 1 + 4095 bytes, which is
529 divisible by eight, ensuring natural spacing of anchors. */
530 #undef TARGET_MIN_ANCHOR_OFFSET
531 #define TARGET_MIN_ANCHOR_OFFSET -4088
533 #undef TARGET_SCHED_ISSUE_RATE
534 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
536 #undef TARGET_MANGLE_TYPE
537 #define TARGET_MANGLE_TYPE arm_mangle_type
539 #undef TARGET_BUILD_BUILTIN_VA_LIST
540 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
541 #undef TARGET_EXPAND_BUILTIN_VA_START
542 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
543 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
544 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
546 #ifdef HAVE_AS_TLS
547 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
548 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
549 #endif
551 #undef TARGET_LEGITIMATE_ADDRESS_P
552 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
554 #undef TARGET_INVALID_PARAMETER_TYPE
555 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
557 #undef TARGET_INVALID_RETURN_TYPE
558 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
560 #undef TARGET_PROMOTED_TYPE
561 #define TARGET_PROMOTED_TYPE arm_promoted_type
563 #undef TARGET_CONVERT_TO_TYPE
564 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
566 #undef TARGET_SCALAR_MODE_SUPPORTED_P
567 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
569 #undef TARGET_FRAME_POINTER_REQUIRED
570 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
572 #undef TARGET_CAN_ELIMINATE
573 #define TARGET_CAN_ELIMINATE arm_can_eliminate
575 #undef TARGET_CONDITIONAL_REGISTER_USAGE
576 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
578 #undef TARGET_CLASS_LIKELY_SPILLED_P
579 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
581 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
582 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
583 arm_vector_alignment_reachable
585 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
586 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
587 arm_builtin_support_vector_misalignment
589 #undef TARGET_PREFERRED_RENAME_CLASS
590 #define TARGET_PREFERRED_RENAME_CLASS \
591 arm_preferred_rename_class
593 struct gcc_target targetm = TARGET_INITIALIZER;
595 /* Obstack for minipool constant handling. */
596 static struct obstack minipool_obstack;
597 static char * minipool_startobj;
599 /* The maximum number of insns skipped which
600 will be conditionalised if possible. */
601 static int max_insns_skipped = 5;
603 extern FILE * asm_out_file;
605 /* True if we are currently building a constant table. */
606 int making_const_table;
608 /* The processor for which instructions should be scheduled. */
609 enum processor_type arm_tune = arm_none;
611 /* The current tuning set. */
612 const struct tune_params *current_tune;
614 /* Which floating point hardware to schedule for. */
615 int arm_fpu_attr;
617 /* Which floating popint hardware to use. */
618 const struct arm_fpu_desc *arm_fpu_desc;
620 /* Whether to use floating point hardware. */
621 enum float_abi_type arm_float_abi;
623 /* Which __fp16 format to use. */
624 enum arm_fp16_format_type arm_fp16_format;
626 /* Which ABI to use. */
627 enum arm_abi_type arm_abi;
629 /* Which thread pointer model to use. */
630 enum arm_tp_type target_thread_pointer = TP_AUTO;
632 /* Used to parse -mstructure_size_boundary command line option. */
633 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
635 /* Used for Thumb call_via trampolines. */
636 rtx thumb_call_via_label[14];
637 static int thumb_call_reg_needed;
639 /* Bit values used to identify processor capabilities. */
640 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
641 #define FL_ARCH3M (1 << 1) /* Extended multiply */
642 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
643 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
644 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
645 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
646 #define FL_THUMB (1 << 6) /* Thumb aware */
647 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
648 #define FL_STRONG (1 << 8) /* StrongARM */
649 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
650 #define FL_XSCALE (1 << 10) /* XScale */
651 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
652 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
653 media instructions. */
654 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
655 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
656 Note: ARM6 & 7 derivatives only. */
657 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
658 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
659 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
660 profile. */
661 #define FL_DIV (1 << 18) /* Hardware divide. */
662 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
663 #define FL_NEON (1 << 20) /* Neon instructions. */
664 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
665 architecture. */
666 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
668 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
670 /* Flags that only effect tuning, not available instructions. */
671 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
672 | FL_CO_PROC)
674 #define FL_FOR_ARCH2 FL_NOTM
675 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
676 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
677 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
678 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
679 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
680 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
681 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
682 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
683 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
684 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
685 #define FL_FOR_ARCH6J FL_FOR_ARCH6
686 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
687 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
688 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
689 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
690 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
691 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
692 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
693 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
694 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
695 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
697 /* The bits in this mask specify which
698 instructions we are allowed to generate. */
699 static unsigned long insn_flags = 0;
701 /* The bits in this mask specify which instruction scheduling options should
702 be used. */
703 static unsigned long tune_flags = 0;
705 /* The following are used in the arm.md file as equivalents to bits
706 in the above two flag variables. */
708 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
709 int arm_arch3m = 0;
711 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
712 int arm_arch4 = 0;
714 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
715 int arm_arch4t = 0;
717 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
718 int arm_arch5 = 0;
720 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
721 int arm_arch5e = 0;
723 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
724 int arm_arch6 = 0;
726 /* Nonzero if this chip supports the ARM 6K extensions. */
727 int arm_arch6k = 0;
729 /* Nonzero if this chip supports the ARM 7 extensions. */
730 int arm_arch7 = 0;
732 /* Nonzero if instructions not present in the 'M' profile can be used. */
733 int arm_arch_notm = 0;
735 /* Nonzero if instructions present in ARMv7E-M can be used. */
736 int arm_arch7em = 0;
738 /* Nonzero if this chip can benefit from load scheduling. */
739 int arm_ld_sched = 0;
741 /* Nonzero if this chip is a StrongARM. */
742 int arm_tune_strongarm = 0;
744 /* Nonzero if this chip is a Cirrus variant. */
745 int arm_arch_cirrus = 0;
747 /* Nonzero if this chip supports Intel Wireless MMX technology. */
748 int arm_arch_iwmmxt = 0;
750 /* Nonzero if this chip is an XScale. */
751 int arm_arch_xscale = 0;
753 /* Nonzero if tuning for XScale */
754 int arm_tune_xscale = 0;
756 /* Nonzero if we want to tune for stores that access the write-buffer.
757 This typically means an ARM6 or ARM7 with MMU or MPU. */
758 int arm_tune_wbuf = 0;
760 /* Nonzero if tuning for Cortex-A9. */
761 int arm_tune_cortex_a9 = 0;
763 /* Nonzero if generating Thumb instructions. */
764 int thumb_code = 0;
766 /* Nonzero if generating Thumb-1 instructions. */
767 int thumb1_code = 0;
769 /* Nonzero if we should define __THUMB_INTERWORK__ in the
770 preprocessor.
771 XXX This is a bit of a hack, it's intended to help work around
772 problems in GLD which doesn't understand that armv5t code is
773 interworking clean. */
774 int arm_cpp_interwork = 0;
776 /* Nonzero if chip supports Thumb 2. */
777 int arm_arch_thumb2;
779 /* Nonzero if chip supports integer division instruction. */
780 int arm_arch_hwdiv;
782 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
783 we must report the mode of the memory reference from
784 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
785 enum machine_mode output_memory_reference_mode;
787 /* The register number to be used for the PIC offset register. */
788 unsigned arm_pic_register = INVALID_REGNUM;
790 /* Set to 1 after arm_reorg has started. Reset to start at the start of
791 the next function. */
792 static int after_arm_reorg = 0;
794 enum arm_pcs arm_pcs_default;
796 /* For an explanation of these variables, see final_prescan_insn below. */
797 int arm_ccfsm_state;
798 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
799 enum arm_cond_code arm_current_cc;
801 rtx arm_target_insn;
802 int arm_target_label;
803 /* The number of conditionally executed insns, including the current insn. */
804 int arm_condexec_count = 0;
805 /* A bitmask specifying the patterns for the IT block.
806 Zero means do not output an IT block before this insn. */
807 int arm_condexec_mask = 0;
808 /* The number of bits used in arm_condexec_mask. */
809 int arm_condexec_masklen = 0;
811 /* The condition codes of the ARM, and the inverse function. */
812 static const char * const arm_condition_codes[] =
814 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
815 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
818 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
819 int arm_regs_in_sequence[] =
821 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
824 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
825 #define streq(string1, string2) (strcmp (string1, string2) == 0)
827 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
828 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
829 | (1 << PIC_OFFSET_TABLE_REGNUM)))
831 /* Initialization code. */
833 struct processors
835 const char *const name;
836 enum processor_type core;
837 const char *arch;
838 const unsigned long flags;
839 const struct tune_params *const tune;
843 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
844 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
845 prefetch_slots, \
846 l1_size, \
847 l1_line_size
849 const struct tune_params arm_slowmul_tune =
851 arm_slowmul_rtx_costs,
852 NULL,
854 ARM_PREFETCH_NOT_BENEFICIAL
857 const struct tune_params arm_fastmul_tune =
859 arm_fastmul_rtx_costs,
860 NULL,
862 ARM_PREFETCH_NOT_BENEFICIAL
865 const struct tune_params arm_xscale_tune =
867 arm_xscale_rtx_costs,
868 xscale_sched_adjust_cost,
870 ARM_PREFETCH_NOT_BENEFICIAL
873 const struct tune_params arm_9e_tune =
875 arm_9e_rtx_costs,
876 NULL,
878 ARM_PREFETCH_NOT_BENEFICIAL
881 const struct tune_params arm_cortex_a9_tune =
883 arm_9e_rtx_costs,
884 cortex_a9_sched_adjust_cost,
886 ARM_PREFETCH_BENEFICIAL(4,32,32)
890 /* Not all of these give usefully different compilation alternatives,
891 but there is no simple way of generalizing them. */
892 static const struct processors all_cores[] =
894 /* ARM Cores */
895 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
896 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
897 #include "arm-cores.def"
898 #undef ARM_CORE
899 {NULL, arm_none, NULL, 0, NULL}
902 static const struct processors all_architectures[] =
904 /* ARM Architectures */
905 /* We don't specify tuning costs here as it will be figured out
906 from the core. */
908 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
909 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
910 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
911 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
912 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
913 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
914 implementations that support it, so we will leave it out for now. */
915 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
916 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
917 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
918 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
919 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
920 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
921 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
922 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
923 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
924 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
925 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
926 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
927 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
928 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
929 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
930 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
931 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
932 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
933 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
934 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
935 {NULL, arm_none, NULL, 0 , NULL}
939 /* These are populated as commandline arguments are processed, or NULL
940 if not specified. */
941 static const struct processors *arm_selected_arch;
942 static const struct processors *arm_selected_cpu;
943 static const struct processors *arm_selected_tune;
945 /* The name of the preprocessor macro to define for this architecture. */
947 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
949 /* Available values for -mfpu=. */
951 static const struct arm_fpu_desc all_fpus[] =
953 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
954 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
955 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
956 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
957 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
958 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
959 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
960 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
961 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
962 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
963 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
964 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
965 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
966 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
967 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
968 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
969 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
970 /* Compatibility aliases. */
971 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
975 struct float_abi
977 const char * name;
978 enum float_abi_type abi_type;
982 /* Available values for -mfloat-abi=. */
984 static const struct float_abi all_float_abis[] =
986 {"soft", ARM_FLOAT_ABI_SOFT},
987 {"softfp", ARM_FLOAT_ABI_SOFTFP},
988 {"hard", ARM_FLOAT_ABI_HARD}
992 struct fp16_format
994 const char *name;
995 enum arm_fp16_format_type fp16_format_type;
999 /* Available values for -mfp16-format=. */
1001 static const struct fp16_format all_fp16_formats[] =
1003 {"none", ARM_FP16_FORMAT_NONE},
1004 {"ieee", ARM_FP16_FORMAT_IEEE},
1005 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1009 struct abi_name
1011 const char *name;
1012 enum arm_abi_type abi_type;
1016 /* Available values for -mabi=. */
1018 static const struct abi_name arm_all_abis[] =
1020 {"apcs-gnu", ARM_ABI_APCS},
1021 {"atpcs", ARM_ABI_ATPCS},
1022 {"aapcs", ARM_ABI_AAPCS},
1023 {"iwmmxt", ARM_ABI_IWMMXT},
1024 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1027 /* Supported TLS relocations. */
1029 enum tls_reloc {
1030 TLS_GD32,
1031 TLS_LDM32,
1032 TLS_LDO32,
1033 TLS_IE32,
1034 TLS_LE32
1037 /* The maximum number of insns to be used when loading a constant. */
1038 inline static int
1039 arm_constant_limit (bool size_p)
1041 return size_p ? 1 : current_tune->constant_limit;
1044 /* Emit an insn that's a simple single-set. Both the operands must be known
1045 to be valid. */
1046 inline static rtx
1047 emit_set_insn (rtx x, rtx y)
1049 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1052 /* Return the number of bits set in VALUE. */
1053 static unsigned
1054 bit_count (unsigned long value)
1056 unsigned long count = 0;
1058 while (value)
1060 count++;
1061 value &= value - 1; /* Clear the least-significant set bit. */
1064 return count;
1067 /* Set up library functions unique to ARM. */
1069 static void
1070 arm_init_libfuncs (void)
1072 /* There are no special library functions unless we are using the
1073 ARM BPABI. */
1074 if (!TARGET_BPABI)
1075 return;
1077 /* The functions below are described in Section 4 of the "Run-Time
1078 ABI for the ARM architecture", Version 1.0. */
1080 /* Double-precision floating-point arithmetic. Table 2. */
1081 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1082 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1083 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1084 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1085 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1087 /* Double-precision comparisons. Table 3. */
1088 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1089 set_optab_libfunc (ne_optab, DFmode, NULL);
1090 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1091 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1092 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1093 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1094 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1096 /* Single-precision floating-point arithmetic. Table 4. */
1097 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1098 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1099 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1100 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1101 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1103 /* Single-precision comparisons. Table 5. */
1104 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1105 set_optab_libfunc (ne_optab, SFmode, NULL);
1106 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1107 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1108 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1109 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1110 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1112 /* Floating-point to integer conversions. Table 6. */
1113 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1114 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1115 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1116 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1117 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1118 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1119 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1120 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1122 /* Conversions between floating types. Table 7. */
1123 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1124 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1126 /* Integer to floating-point conversions. Table 8. */
1127 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1128 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1129 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1130 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1131 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1132 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1133 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1134 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1136 /* Long long. Table 9. */
1137 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1138 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1139 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1140 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1141 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1142 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1143 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1144 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1146 /* Integer (32/32->32) division. \S 4.3.1. */
1147 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1148 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1150 /* The divmod functions are designed so that they can be used for
1151 plain division, even though they return both the quotient and the
1152 remainder. The quotient is returned in the usual location (i.e.,
1153 r0 for SImode, {r0, r1} for DImode), just as would be expected
1154 for an ordinary division routine. Because the AAPCS calling
1155 conventions specify that all of { r0, r1, r2, r3 } are
1156 callee-saved registers, there is no need to tell the compiler
1157 explicitly that those registers are clobbered by these
1158 routines. */
1159 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1160 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1162 /* For SImode division the ABI provides div-without-mod routines,
1163 which are faster. */
1164 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1165 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1167 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1168 divmod libcalls instead. */
1169 set_optab_libfunc (smod_optab, DImode, NULL);
1170 set_optab_libfunc (umod_optab, DImode, NULL);
1171 set_optab_libfunc (smod_optab, SImode, NULL);
1172 set_optab_libfunc (umod_optab, SImode, NULL);
1174 /* Half-precision float operations. The compiler handles all operations
1175 with NULL libfuncs by converting the SFmode. */
1176 switch (arm_fp16_format)
1178 case ARM_FP16_FORMAT_IEEE:
1179 case ARM_FP16_FORMAT_ALTERNATIVE:
1181 /* Conversions. */
1182 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1183 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1184 ? "__gnu_f2h_ieee"
1185 : "__gnu_f2h_alternative"));
1186 set_conv_libfunc (sext_optab, SFmode, HFmode,
1187 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1188 ? "__gnu_h2f_ieee"
1189 : "__gnu_h2f_alternative"));
1191 /* Arithmetic. */
1192 set_optab_libfunc (add_optab, HFmode, NULL);
1193 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1194 set_optab_libfunc (smul_optab, HFmode, NULL);
1195 set_optab_libfunc (neg_optab, HFmode, NULL);
1196 set_optab_libfunc (sub_optab, HFmode, NULL);
1198 /* Comparisons. */
1199 set_optab_libfunc (eq_optab, HFmode, NULL);
1200 set_optab_libfunc (ne_optab, HFmode, NULL);
1201 set_optab_libfunc (lt_optab, HFmode, NULL);
1202 set_optab_libfunc (le_optab, HFmode, NULL);
1203 set_optab_libfunc (ge_optab, HFmode, NULL);
1204 set_optab_libfunc (gt_optab, HFmode, NULL);
1205 set_optab_libfunc (unord_optab, HFmode, NULL);
1206 break;
1208 default:
1209 break;
1212 if (TARGET_AAPCS_BASED)
1213 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1216 /* On AAPCS systems, this is the "struct __va_list". */
1217 static GTY(()) tree va_list_type;
1219 /* Return the type to use as __builtin_va_list. */
1220 static tree
1221 arm_build_builtin_va_list (void)
1223 tree va_list_name;
1224 tree ap_field;
1226 if (!TARGET_AAPCS_BASED)
1227 return std_build_builtin_va_list ();
1229 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1230 defined as:
1232 struct __va_list
1234 void *__ap;
1237 The C Library ABI further reinforces this definition in \S
1238 4.1.
1240 We must follow this definition exactly. The structure tag
1241 name is visible in C++ mangled names, and thus forms a part
1242 of the ABI. The field name may be used by people who
1243 #include <stdarg.h>. */
1244 /* Create the type. */
1245 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1246 /* Give it the required name. */
1247 va_list_name = build_decl (BUILTINS_LOCATION,
1248 TYPE_DECL,
1249 get_identifier ("__va_list"),
1250 va_list_type);
1251 DECL_ARTIFICIAL (va_list_name) = 1;
1252 TYPE_NAME (va_list_type) = va_list_name;
1253 TYPE_STUB_DECL (va_list_type) = va_list_name;
1254 /* Create the __ap field. */
1255 ap_field = build_decl (BUILTINS_LOCATION,
1256 FIELD_DECL,
1257 get_identifier ("__ap"),
1258 ptr_type_node);
1259 DECL_ARTIFICIAL (ap_field) = 1;
1260 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1261 TYPE_FIELDS (va_list_type) = ap_field;
1262 /* Compute its layout. */
1263 layout_type (va_list_type);
1265 return va_list_type;
1268 /* Return an expression of type "void *" pointing to the next
1269 available argument in a variable-argument list. VALIST is the
1270 user-level va_list object, of type __builtin_va_list. */
1271 static tree
1272 arm_extract_valist_ptr (tree valist)
1274 if (TREE_TYPE (valist) == error_mark_node)
1275 return error_mark_node;
1277 /* On an AAPCS target, the pointer is stored within "struct
1278 va_list". */
1279 if (TARGET_AAPCS_BASED)
1281 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1282 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1283 valist, ap_field, NULL_TREE);
1286 return valist;
1289 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1290 static void
1291 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1293 valist = arm_extract_valist_ptr (valist);
1294 std_expand_builtin_va_start (valist, nextarg);
1297 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1298 static tree
1299 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1300 gimple_seq *post_p)
1302 valist = arm_extract_valist_ptr (valist);
1303 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1306 /* Lookup NAME in SEL. */
1308 static const struct processors *
1309 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1311 if (!(name && *name))
1312 return NULL;
1314 for (; sel->name != NULL; sel++)
1316 if (streq (name, sel->name))
1317 return sel;
1320 error ("bad value (%s) for %s switch", name, desc);
1321 return NULL;
1324 /* Implement TARGET_HANDLE_OPTION. */
1326 static bool
1327 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1329 switch (code)
1331 case OPT_march_:
1332 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1333 return true;
1335 case OPT_mcpu_:
1336 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1337 return true;
1339 case OPT_mhard_float:
1340 target_float_abi_name = "hard";
1341 return true;
1343 case OPT_msoft_float:
1344 target_float_abi_name = "soft";
1345 return true;
1347 case OPT_mtune_:
1348 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1349 return true;
1351 default:
1352 return true;
1356 static void
1357 arm_target_help (void)
1359 int i;
1360 static int columns = 0;
1361 int remaining;
1363 /* If we have not done so already, obtain the desired maximum width of
1364 the output. Note - this is a duplication of the code at the start of
1365 gcc/opts.c:print_specific_help() - the two copies should probably be
1366 replaced by a single function. */
1367 if (columns == 0)
1369 const char *p;
1371 p = getenv ("COLUMNS");
1372 if (p != NULL)
1374 int value = atoi (p);
1376 if (value > 0)
1377 columns = value;
1380 if (columns == 0)
1381 /* Use a reasonable default. */
1382 columns = 80;
1385 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1387 /* The - 2 is because we know that the last entry in the array is NULL. */
1388 i = ARRAY_SIZE (all_cores) - 2;
1389 gcc_assert (i > 0);
1390 printf (" %s", all_cores[i].name);
1391 remaining = columns - (strlen (all_cores[i].name) + 4);
1392 gcc_assert (remaining >= 0);
1394 while (i--)
1396 int len = strlen (all_cores[i].name);
1398 if (remaining > len + 2)
1400 printf (", %s", all_cores[i].name);
1401 remaining -= len + 2;
1403 else
1405 if (remaining > 0)
1406 printf (",");
1407 printf ("\n %s", all_cores[i].name);
1408 remaining = columns - (len + 4);
1412 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1414 i = ARRAY_SIZE (all_architectures) - 2;
1415 gcc_assert (i > 0);
1417 printf (" %s", all_architectures[i].name);
1418 remaining = columns - (strlen (all_architectures[i].name) + 4);
1419 gcc_assert (remaining >= 0);
1421 while (i--)
1423 int len = strlen (all_architectures[i].name);
1425 if (remaining > len + 2)
1427 printf (", %s", all_architectures[i].name);
1428 remaining -= len + 2;
1430 else
1432 if (remaining > 0)
1433 printf (",");
1434 printf ("\n %s", all_architectures[i].name);
1435 remaining = columns - (len + 4);
1438 printf ("\n");
1442 /* Fix up any incompatible options that the user has specified. */
1443 static void
1444 arm_option_override (void)
1446 unsigned i;
1448 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1449 SUBTARGET_OVERRIDE_OPTIONS;
1450 #endif
1452 if (arm_selected_arch)
1454 if (arm_selected_cpu)
1456 /* Check for conflict between mcpu and march. */
1457 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1459 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1460 arm_selected_cpu->name, arm_selected_arch->name);
1461 /* -march wins for code generation.
1462 -mcpu wins for default tuning. */
1463 if (!arm_selected_tune)
1464 arm_selected_tune = arm_selected_cpu;
1466 arm_selected_cpu = arm_selected_arch;
1468 else
1469 /* -mcpu wins. */
1470 arm_selected_arch = NULL;
1472 else
1473 /* Pick a CPU based on the architecture. */
1474 arm_selected_cpu = arm_selected_arch;
1477 /* If the user did not specify a processor, choose one for them. */
1478 if (!arm_selected_cpu)
1480 const struct processors * sel;
1481 unsigned int sought;
1483 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1484 if (!arm_selected_cpu->name)
1486 #ifdef SUBTARGET_CPU_DEFAULT
1487 /* Use the subtarget default CPU if none was specified by
1488 configure. */
1489 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1490 #endif
1491 /* Default to ARM6. */
1492 if (!arm_selected_cpu->name)
1493 arm_selected_cpu = &all_cores[arm6];
1496 sel = arm_selected_cpu;
1497 insn_flags = sel->flags;
1499 /* Now check to see if the user has specified some command line
1500 switch that require certain abilities from the cpu. */
1501 sought = 0;
1503 if (TARGET_INTERWORK || TARGET_THUMB)
1505 sought |= (FL_THUMB | FL_MODE32);
1507 /* There are no ARM processors that support both APCS-26 and
1508 interworking. Therefore we force FL_MODE26 to be removed
1509 from insn_flags here (if it was set), so that the search
1510 below will always be able to find a compatible processor. */
1511 insn_flags &= ~FL_MODE26;
1514 if (sought != 0 && ((sought & insn_flags) != sought))
1516 /* Try to locate a CPU type that supports all of the abilities
1517 of the default CPU, plus the extra abilities requested by
1518 the user. */
1519 for (sel = all_cores; sel->name != NULL; sel++)
1520 if ((sel->flags & sought) == (sought | insn_flags))
1521 break;
1523 if (sel->name == NULL)
1525 unsigned current_bit_count = 0;
1526 const struct processors * best_fit = NULL;
1528 /* Ideally we would like to issue an error message here
1529 saying that it was not possible to find a CPU compatible
1530 with the default CPU, but which also supports the command
1531 line options specified by the programmer, and so they
1532 ought to use the -mcpu=<name> command line option to
1533 override the default CPU type.
1535 If we cannot find a cpu that has both the
1536 characteristics of the default cpu and the given
1537 command line options we scan the array again looking
1538 for a best match. */
1539 for (sel = all_cores; sel->name != NULL; sel++)
1540 if ((sel->flags & sought) == sought)
1542 unsigned count;
1544 count = bit_count (sel->flags & insn_flags);
1546 if (count >= current_bit_count)
1548 best_fit = sel;
1549 current_bit_count = count;
1553 gcc_assert (best_fit);
1554 sel = best_fit;
1557 arm_selected_cpu = sel;
1561 gcc_assert (arm_selected_cpu);
1562 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1563 if (!arm_selected_tune)
1564 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1566 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1567 insn_flags = arm_selected_cpu->flags;
1569 arm_tune = arm_selected_tune->core;
1570 tune_flags = arm_selected_tune->flags;
1571 current_tune = arm_selected_tune->tune;
1573 if (target_fp16_format_name)
1575 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1577 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1579 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1580 break;
1583 if (i == ARRAY_SIZE (all_fp16_formats))
1584 error ("invalid __fp16 format option: -mfp16-format=%s",
1585 target_fp16_format_name);
1587 else
1588 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1590 if (target_abi_name)
1592 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1594 if (streq (arm_all_abis[i].name, target_abi_name))
1596 arm_abi = arm_all_abis[i].abi_type;
1597 break;
1600 if (i == ARRAY_SIZE (arm_all_abis))
1601 error ("invalid ABI option: -mabi=%s", target_abi_name);
1603 else
1604 arm_abi = ARM_DEFAULT_ABI;
1606 /* Make sure that the processor choice does not conflict with any of the
1607 other command line choices. */
1608 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1609 error ("target CPU does not support ARM mode");
1611 /* BPABI targets use linker tricks to allow interworking on cores
1612 without thumb support. */
1613 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1615 warning (0, "target CPU does not support interworking" );
1616 target_flags &= ~MASK_INTERWORK;
1619 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1621 warning (0, "target CPU does not support THUMB instructions");
1622 target_flags &= ~MASK_THUMB;
1625 if (TARGET_APCS_FRAME && TARGET_THUMB)
1627 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1628 target_flags &= ~MASK_APCS_FRAME;
1631 /* Callee super interworking implies thumb interworking. Adding
1632 this to the flags here simplifies the logic elsewhere. */
1633 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1634 target_flags |= MASK_INTERWORK;
1636 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1637 from here where no function is being compiled currently. */
1638 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1639 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1641 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1642 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1644 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1646 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1647 target_flags |= MASK_APCS_FRAME;
1650 if (TARGET_POKE_FUNCTION_NAME)
1651 target_flags |= MASK_APCS_FRAME;
1653 if (TARGET_APCS_REENT && flag_pic)
1654 error ("-fpic and -mapcs-reent are incompatible");
1656 if (TARGET_APCS_REENT)
1657 warning (0, "APCS reentrant code not supported. Ignored");
1659 /* If this target is normally configured to use APCS frames, warn if they
1660 are turned off and debugging is turned on. */
1661 if (TARGET_ARM
1662 && write_symbols != NO_DEBUG
1663 && !TARGET_APCS_FRAME
1664 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1665 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1667 if (TARGET_APCS_FLOAT)
1668 warning (0, "passing floating point arguments in fp regs not yet supported");
1670 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1671 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1672 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1673 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1674 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1675 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1676 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1677 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1678 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1679 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1680 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1681 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1682 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1683 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1685 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1686 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1687 thumb_code = TARGET_ARM == 0;
1688 thumb1_code = TARGET_THUMB1 != 0;
1689 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1690 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1691 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1692 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1693 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1695 /* If we are not using the default (ARM mode) section anchor offset
1696 ranges, then set the correct ranges now. */
1697 if (TARGET_THUMB1)
1699 /* Thumb-1 LDR instructions cannot have negative offsets.
1700 Permissible positive offset ranges are 5-bit (for byte loads),
1701 6-bit (for halfword loads), or 7-bit (for word loads).
1702 Empirical results suggest a 7-bit anchor range gives the best
1703 overall code size. */
1704 targetm.min_anchor_offset = 0;
1705 targetm.max_anchor_offset = 127;
1707 else if (TARGET_THUMB2)
1709 /* The minimum is set such that the total size of the block
1710 for a particular anchor is 248 + 1 + 4095 bytes, which is
1711 divisible by eight, ensuring natural spacing of anchors. */
1712 targetm.min_anchor_offset = -248;
1713 targetm.max_anchor_offset = 4095;
1716 /* V5 code we generate is completely interworking capable, so we turn off
1717 TARGET_INTERWORK here to avoid many tests later on. */
1719 /* XXX However, we must pass the right pre-processor defines to CPP
1720 or GLD can get confused. This is a hack. */
1721 if (TARGET_INTERWORK)
1722 arm_cpp_interwork = 1;
1724 if (arm_arch5)
1725 target_flags &= ~MASK_INTERWORK;
1727 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1728 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1730 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1731 error ("iwmmxt abi requires an iwmmxt capable cpu");
1733 if (target_fpu_name == NULL && target_fpe_name != NULL)
1735 if (streq (target_fpe_name, "2"))
1736 target_fpu_name = "fpe2";
1737 else if (streq (target_fpe_name, "3"))
1738 target_fpu_name = "fpe3";
1739 else
1740 error ("invalid floating point emulation option: -mfpe=%s",
1741 target_fpe_name);
1744 if (target_fpu_name == NULL)
1746 #ifdef FPUTYPE_DEFAULT
1747 target_fpu_name = FPUTYPE_DEFAULT;
1748 #else
1749 if (arm_arch_cirrus)
1750 target_fpu_name = "maverick";
1751 else
1752 target_fpu_name = "fpe2";
1753 #endif
1756 arm_fpu_desc = NULL;
1757 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1759 if (streq (all_fpus[i].name, target_fpu_name))
1761 arm_fpu_desc = &all_fpus[i];
1762 break;
1766 if (!arm_fpu_desc)
1768 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1769 return;
1772 switch (arm_fpu_desc->model)
1774 case ARM_FP_MODEL_FPA:
1775 if (arm_fpu_desc->rev == 2)
1776 arm_fpu_attr = FPU_FPE2;
1777 else if (arm_fpu_desc->rev == 3)
1778 arm_fpu_attr = FPU_FPE3;
1779 else
1780 arm_fpu_attr = FPU_FPA;
1781 break;
1783 case ARM_FP_MODEL_MAVERICK:
1784 arm_fpu_attr = FPU_MAVERICK;
1785 break;
1787 case ARM_FP_MODEL_VFP:
1788 arm_fpu_attr = FPU_VFP;
1789 break;
1791 default:
1792 gcc_unreachable();
1795 if (target_float_abi_name != NULL)
1797 /* The user specified a FP ABI. */
1798 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1800 if (streq (all_float_abis[i].name, target_float_abi_name))
1802 arm_float_abi = all_float_abis[i].abi_type;
1803 break;
1806 if (i == ARRAY_SIZE (all_float_abis))
1807 error ("invalid floating point abi: -mfloat-abi=%s",
1808 target_float_abi_name);
1810 else
1811 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1813 if (TARGET_AAPCS_BASED
1814 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1815 error ("FPA is unsupported in the AAPCS");
1817 if (TARGET_AAPCS_BASED)
1819 if (TARGET_CALLER_INTERWORKING)
1820 error ("AAPCS does not support -mcaller-super-interworking");
1821 else
1822 if (TARGET_CALLEE_INTERWORKING)
1823 error ("AAPCS does not support -mcallee-super-interworking");
1826 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1827 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1828 will ever exist. GCC makes no attempt to support this combination. */
1829 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1830 sorry ("iWMMXt and hardware floating point");
1832 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1833 if (TARGET_THUMB2 && TARGET_IWMMXT)
1834 sorry ("Thumb-2 iWMMXt");
1836 /* __fp16 support currently assumes the core has ldrh. */
1837 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1838 sorry ("__fp16 and no ldrh");
1840 /* If soft-float is specified then don't use FPU. */
1841 if (TARGET_SOFT_FLOAT)
1842 arm_fpu_attr = FPU_NONE;
1844 if (TARGET_AAPCS_BASED)
1846 if (arm_abi == ARM_ABI_IWMMXT)
1847 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1848 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1849 && TARGET_HARD_FLOAT
1850 && TARGET_VFP)
1851 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1852 else
1853 arm_pcs_default = ARM_PCS_AAPCS;
1855 else
1857 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1858 sorry ("-mfloat-abi=hard and VFP");
1860 if (arm_abi == ARM_ABI_APCS)
1861 arm_pcs_default = ARM_PCS_APCS;
1862 else
1863 arm_pcs_default = ARM_PCS_ATPCS;
1866 /* For arm2/3 there is no need to do any scheduling if there is only
1867 a floating point emulator, or we are doing software floating-point. */
1868 if ((TARGET_SOFT_FLOAT
1869 || (TARGET_FPA && arm_fpu_desc->rev))
1870 && (tune_flags & FL_MODE32) == 0)
1871 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1873 if (target_thread_switch)
1875 if (strcmp (target_thread_switch, "soft") == 0)
1876 target_thread_pointer = TP_SOFT;
1877 else if (strcmp (target_thread_switch, "auto") == 0)
1878 target_thread_pointer = TP_AUTO;
1879 else if (strcmp (target_thread_switch, "cp15") == 0)
1880 target_thread_pointer = TP_CP15;
1881 else
1882 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1885 /* Use the cp15 method if it is available. */
1886 if (target_thread_pointer == TP_AUTO)
1888 if (arm_arch6k && !TARGET_THUMB1)
1889 target_thread_pointer = TP_CP15;
1890 else
1891 target_thread_pointer = TP_SOFT;
1894 if (TARGET_HARD_TP && TARGET_THUMB1)
1895 error ("can not use -mtp=cp15 with 16-bit Thumb");
1897 /* Override the default structure alignment for AAPCS ABI. */
1898 if (TARGET_AAPCS_BASED)
1899 arm_structure_size_boundary = 8;
1901 if (structure_size_string != NULL)
1903 int size = strtol (structure_size_string, NULL, 0);
1905 if (size == 8 || size == 32
1906 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1907 arm_structure_size_boundary = size;
1908 else
1909 warning (0, "structure size boundary can only be set to %s",
1910 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1913 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1915 error ("RTP PIC is incompatible with Thumb");
1916 flag_pic = 0;
1919 /* If stack checking is disabled, we can use r10 as the PIC register,
1920 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1921 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1923 if (TARGET_VXWORKS_RTP)
1924 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1925 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1928 if (flag_pic && TARGET_VXWORKS_RTP)
1929 arm_pic_register = 9;
1931 if (arm_pic_register_string != NULL)
1933 int pic_register = decode_reg_name (arm_pic_register_string);
1935 if (!flag_pic)
1936 warning (0, "-mpic-register= is useless without -fpic");
1938 /* Prevent the user from choosing an obviously stupid PIC register. */
1939 else if (pic_register < 0 || call_used_regs[pic_register]
1940 || pic_register == HARD_FRAME_POINTER_REGNUM
1941 || pic_register == STACK_POINTER_REGNUM
1942 || pic_register >= PC_REGNUM
1943 || (TARGET_VXWORKS_RTP
1944 && (unsigned int) pic_register != arm_pic_register))
1945 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1946 else
1947 arm_pic_register = pic_register;
1950 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1951 if (fix_cm3_ldrd == 2)
1953 if (arm_selected_cpu->core == cortexm3)
1954 fix_cm3_ldrd = 1;
1955 else
1956 fix_cm3_ldrd = 0;
1959 if (TARGET_THUMB1 && flag_schedule_insns)
1961 /* Don't warn since it's on by default in -O2. */
1962 flag_schedule_insns = 0;
1965 if (optimize_size)
1967 /* If optimizing for size, bump the number of instructions that we
1968 are prepared to conditionally execute (even on a StrongARM). */
1969 max_insns_skipped = 6;
1971 else
1973 /* StrongARM has early execution of branches, so a sequence
1974 that is worth skipping is shorter. */
1975 if (arm_tune_strongarm)
1976 max_insns_skipped = 3;
1979 /* Hot/Cold partitioning is not currently supported, since we can't
1980 handle literal pool placement in that case. */
1981 if (flag_reorder_blocks_and_partition)
1983 inform (input_location,
1984 "-freorder-blocks-and-partition not supported on this architecture");
1985 flag_reorder_blocks_and_partition = 0;
1986 flag_reorder_blocks = 1;
1989 if (flag_pic)
1990 /* Hoisting PIC address calculations more aggressively provides a small,
1991 but measurable, size reduction for PIC code. Therefore, we decrease
1992 the bar for unrestricted expression hoisting to the cost of PIC address
1993 calculation, which is 2 instructions. */
1994 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1995 global_options.x_param_values,
1996 global_options_set.x_param_values);
1998 /* ARM EABI defaults to strict volatile bitfields. */
1999 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
2000 flag_strict_volatile_bitfields = 1;
2002 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2003 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2004 if (flag_prefetch_loop_arrays < 0
2005 && HAVE_prefetch
2006 && optimize >= 3
2007 && current_tune->num_prefetch_slots > 0)
2008 flag_prefetch_loop_arrays = 1;
2010 /* Set up parameters to be used in prefetching algorithm. Do not override the
2011 defaults unless we are tuning for a core we have researched values for. */
2012 if (current_tune->num_prefetch_slots > 0)
2013 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2014 current_tune->num_prefetch_slots,
2015 global_options.x_param_values,
2016 global_options_set.x_param_values);
2017 if (current_tune->l1_cache_line_size >= 0)
2018 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2019 current_tune->l1_cache_line_size,
2020 global_options.x_param_values,
2021 global_options_set.x_param_values);
2022 if (current_tune->l1_cache_size >= 0)
2023 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2024 current_tune->l1_cache_size,
2025 global_options.x_param_values,
2026 global_options_set.x_param_values);
2028 /* Register global variables with the garbage collector. */
2029 arm_add_gc_roots ();
2032 static void
2033 arm_add_gc_roots (void)
2035 gcc_obstack_init(&minipool_obstack);
2036 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2039 /* A table of known ARM exception types.
2040 For use with the interrupt function attribute. */
2042 typedef struct
2044 const char *const arg;
2045 const unsigned long return_value;
2047 isr_attribute_arg;
2049 static const isr_attribute_arg isr_attribute_args [] =
2051 { "IRQ", ARM_FT_ISR },
2052 { "irq", ARM_FT_ISR },
2053 { "FIQ", ARM_FT_FIQ },
2054 { "fiq", ARM_FT_FIQ },
2055 { "ABORT", ARM_FT_ISR },
2056 { "abort", ARM_FT_ISR },
2057 { "ABORT", ARM_FT_ISR },
2058 { "abort", ARM_FT_ISR },
2059 { "UNDEF", ARM_FT_EXCEPTION },
2060 { "undef", ARM_FT_EXCEPTION },
2061 { "SWI", ARM_FT_EXCEPTION },
2062 { "swi", ARM_FT_EXCEPTION },
2063 { NULL, ARM_FT_NORMAL }
2066 /* Returns the (interrupt) function type of the current
2067 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2069 static unsigned long
2070 arm_isr_value (tree argument)
2072 const isr_attribute_arg * ptr;
2073 const char * arg;
2075 if (!arm_arch_notm)
2076 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2078 /* No argument - default to IRQ. */
2079 if (argument == NULL_TREE)
2080 return ARM_FT_ISR;
2082 /* Get the value of the argument. */
2083 if (TREE_VALUE (argument) == NULL_TREE
2084 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2085 return ARM_FT_UNKNOWN;
2087 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2089 /* Check it against the list of known arguments. */
2090 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2091 if (streq (arg, ptr->arg))
2092 return ptr->return_value;
2094 /* An unrecognized interrupt type. */
2095 return ARM_FT_UNKNOWN;
2098 /* Computes the type of the current function. */
2100 static unsigned long
2101 arm_compute_func_type (void)
2103 unsigned long type = ARM_FT_UNKNOWN;
2104 tree a;
2105 tree attr;
2107 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2109 /* Decide if the current function is volatile. Such functions
2110 never return, and many memory cycles can be saved by not storing
2111 register values that will never be needed again. This optimization
2112 was added to speed up context switching in a kernel application. */
2113 if (optimize > 0
2114 && (TREE_NOTHROW (current_function_decl)
2115 || !(flag_unwind_tables
2116 || (flag_exceptions
2117 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2118 && TREE_THIS_VOLATILE (current_function_decl))
2119 type |= ARM_FT_VOLATILE;
2121 if (cfun->static_chain_decl != NULL)
2122 type |= ARM_FT_NESTED;
2124 attr = DECL_ATTRIBUTES (current_function_decl);
2126 a = lookup_attribute ("naked", attr);
2127 if (a != NULL_TREE)
2128 type |= ARM_FT_NAKED;
2130 a = lookup_attribute ("isr", attr);
2131 if (a == NULL_TREE)
2132 a = lookup_attribute ("interrupt", attr);
2134 if (a == NULL_TREE)
2135 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2136 else
2137 type |= arm_isr_value (TREE_VALUE (a));
2139 return type;
2142 /* Returns the type of the current function. */
2144 unsigned long
2145 arm_current_func_type (void)
2147 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2148 cfun->machine->func_type = arm_compute_func_type ();
2150 return cfun->machine->func_type;
2153 bool
2154 arm_allocate_stack_slots_for_args (void)
2156 /* Naked functions should not allocate stack slots for arguments. */
2157 return !IS_NAKED (arm_current_func_type ());
2161 /* Output assembler code for a block containing the constant parts
2162 of a trampoline, leaving space for the variable parts.
2164 On the ARM, (if r8 is the static chain regnum, and remembering that
2165 referencing pc adds an offset of 8) the trampoline looks like:
2166 ldr r8, [pc, #0]
2167 ldr pc, [pc]
2168 .word static chain value
2169 .word function's address
2170 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2172 static void
2173 arm_asm_trampoline_template (FILE *f)
2175 if (TARGET_ARM)
2177 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2178 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2180 else if (TARGET_THUMB2)
2182 /* The Thumb-2 trampoline is similar to the arm implementation.
2183 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2184 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2185 STATIC_CHAIN_REGNUM, PC_REGNUM);
2186 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2188 else
2190 ASM_OUTPUT_ALIGN (f, 2);
2191 fprintf (f, "\t.code\t16\n");
2192 fprintf (f, ".Ltrampoline_start:\n");
2193 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2194 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2195 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2196 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2197 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2198 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2200 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2201 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2204 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2206 static void
2207 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2209 rtx fnaddr, mem, a_tramp;
2211 emit_block_move (m_tramp, assemble_trampoline_template (),
2212 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2214 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2215 emit_move_insn (mem, chain_value);
2217 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2218 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2219 emit_move_insn (mem, fnaddr);
2221 a_tramp = XEXP (m_tramp, 0);
2222 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2223 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2224 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2227 /* Thumb trampolines should be entered in thumb mode, so set
2228 the bottom bit of the address. */
2230 static rtx
2231 arm_trampoline_adjust_address (rtx addr)
2233 if (TARGET_THUMB)
2234 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2235 NULL, 0, OPTAB_LIB_WIDEN);
2236 return addr;
2239 /* Return 1 if it is possible to return using a single instruction.
2240 If SIBLING is non-null, this is a test for a return before a sibling
2241 call. SIBLING is the call insn, so we can examine its register usage. */
2244 use_return_insn (int iscond, rtx sibling)
2246 int regno;
2247 unsigned int func_type;
2248 unsigned long saved_int_regs;
2249 unsigned HOST_WIDE_INT stack_adjust;
2250 arm_stack_offsets *offsets;
2252 /* Never use a return instruction before reload has run. */
2253 if (!reload_completed)
2254 return 0;
2256 func_type = arm_current_func_type ();
2258 /* Naked, volatile and stack alignment functions need special
2259 consideration. */
2260 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2261 return 0;
2263 /* So do interrupt functions that use the frame pointer and Thumb
2264 interrupt functions. */
2265 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2266 return 0;
2268 offsets = arm_get_frame_offsets ();
2269 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2271 /* As do variadic functions. */
2272 if (crtl->args.pretend_args_size
2273 || cfun->machine->uses_anonymous_args
2274 /* Or if the function calls __builtin_eh_return () */
2275 || crtl->calls_eh_return
2276 /* Or if the function calls alloca */
2277 || cfun->calls_alloca
2278 /* Or if there is a stack adjustment. However, if the stack pointer
2279 is saved on the stack, we can use a pre-incrementing stack load. */
2280 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2281 && stack_adjust == 4)))
2282 return 0;
2284 saved_int_regs = offsets->saved_regs_mask;
2286 /* Unfortunately, the insn
2288 ldmib sp, {..., sp, ...}
2290 triggers a bug on most SA-110 based devices, such that the stack
2291 pointer won't be correctly restored if the instruction takes a
2292 page fault. We work around this problem by popping r3 along with
2293 the other registers, since that is never slower than executing
2294 another instruction.
2296 We test for !arm_arch5 here, because code for any architecture
2297 less than this could potentially be run on one of the buggy
2298 chips. */
2299 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2301 /* Validate that r3 is a call-clobbered register (always true in
2302 the default abi) ... */
2303 if (!call_used_regs[3])
2304 return 0;
2306 /* ... that it isn't being used for a return value ... */
2307 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2308 return 0;
2310 /* ... or for a tail-call argument ... */
2311 if (sibling)
2313 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2315 if (find_regno_fusage (sibling, USE, 3))
2316 return 0;
2319 /* ... and that there are no call-saved registers in r0-r2
2320 (always true in the default ABI). */
2321 if (saved_int_regs & 0x7)
2322 return 0;
2325 /* Can't be done if interworking with Thumb, and any registers have been
2326 stacked. */
2327 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2328 return 0;
2330 /* On StrongARM, conditional returns are expensive if they aren't
2331 taken and multiple registers have been stacked. */
2332 if (iscond && arm_tune_strongarm)
2334 /* Conditional return when just the LR is stored is a simple
2335 conditional-load instruction, that's not expensive. */
2336 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2337 return 0;
2339 if (flag_pic
2340 && arm_pic_register != INVALID_REGNUM
2341 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2342 return 0;
2345 /* If there are saved registers but the LR isn't saved, then we need
2346 two instructions for the return. */
2347 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2348 return 0;
2350 /* Can't be done if any of the FPA regs are pushed,
2351 since this also requires an insn. */
2352 if (TARGET_HARD_FLOAT && TARGET_FPA)
2353 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2354 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2355 return 0;
2357 /* Likewise VFP regs. */
2358 if (TARGET_HARD_FLOAT && TARGET_VFP)
2359 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2360 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2361 return 0;
2363 if (TARGET_REALLY_IWMMXT)
2364 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2365 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2366 return 0;
2368 return 1;
2371 /* Return TRUE if int I is a valid immediate ARM constant. */
2374 const_ok_for_arm (HOST_WIDE_INT i)
2376 int lowbit;
2378 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2379 be all zero, or all one. */
2380 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2381 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2382 != ((~(unsigned HOST_WIDE_INT) 0)
2383 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2384 return FALSE;
2386 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2388 /* Fast return for 0 and small values. We must do this for zero, since
2389 the code below can't handle that one case. */
2390 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2391 return TRUE;
2393 /* Get the number of trailing zeros. */
2394 lowbit = ffs((int) i) - 1;
2396 /* Only even shifts are allowed in ARM mode so round down to the
2397 nearest even number. */
2398 if (TARGET_ARM)
2399 lowbit &= ~1;
2401 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2402 return TRUE;
2404 if (TARGET_ARM)
2406 /* Allow rotated constants in ARM mode. */
2407 if (lowbit <= 4
2408 && ((i & ~0xc000003f) == 0
2409 || (i & ~0xf000000f) == 0
2410 || (i & ~0xfc000003) == 0))
2411 return TRUE;
2413 else
2415 HOST_WIDE_INT v;
2417 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2418 v = i & 0xff;
2419 v |= v << 16;
2420 if (i == v || i == (v | (v << 8)))
2421 return TRUE;
2423 /* Allow repeated pattern 0xXY00XY00. */
2424 v = i & 0xff00;
2425 v |= v << 16;
2426 if (i == v)
2427 return TRUE;
2430 return FALSE;
2433 /* Return true if I is a valid constant for the operation CODE. */
2434 static int
2435 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2437 if (const_ok_for_arm (i))
2438 return 1;
2440 switch (code)
2442 case PLUS:
2443 case COMPARE:
2444 case EQ:
2445 case NE:
2446 case GT:
2447 case LE:
2448 case LT:
2449 case GE:
2450 case GEU:
2451 case LTU:
2452 case GTU:
2453 case LEU:
2454 case UNORDERED:
2455 case ORDERED:
2456 case UNEQ:
2457 case UNGE:
2458 case UNLT:
2459 case UNGT:
2460 case UNLE:
2461 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2463 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2464 case XOR:
2465 return 0;
2467 case IOR:
2468 if (TARGET_THUMB2)
2469 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2470 return 0;
2472 case AND:
2473 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2475 default:
2476 gcc_unreachable ();
2480 /* Emit a sequence of insns to handle a large constant.
2481 CODE is the code of the operation required, it can be any of SET, PLUS,
2482 IOR, AND, XOR, MINUS;
2483 MODE is the mode in which the operation is being performed;
2484 VAL is the integer to operate on;
2485 SOURCE is the other operand (a register, or a null-pointer for SET);
2486 SUBTARGETS means it is safe to create scratch registers if that will
2487 either produce a simpler sequence, or we will want to cse the values.
2488 Return value is the number of insns emitted. */
2490 /* ??? Tweak this for thumb2. */
2492 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2493 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2495 rtx cond;
2497 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2498 cond = COND_EXEC_TEST (PATTERN (insn));
2499 else
2500 cond = NULL_RTX;
2502 if (subtargets || code == SET
2503 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2504 && REGNO (target) != REGNO (source)))
2506 /* After arm_reorg has been called, we can't fix up expensive
2507 constants by pushing them into memory so we must synthesize
2508 them in-line, regardless of the cost. This is only likely to
2509 be more costly on chips that have load delay slots and we are
2510 compiling without running the scheduler (so no splitting
2511 occurred before the final instruction emission).
2513 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2515 if (!after_arm_reorg
2516 && !cond
2517 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2518 1, 0)
2519 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2520 + (code != SET))))
2522 if (code == SET)
2524 /* Currently SET is the only monadic value for CODE, all
2525 the rest are diadic. */
2526 if (TARGET_USE_MOVT)
2527 arm_emit_movpair (target, GEN_INT (val));
2528 else
2529 emit_set_insn (target, GEN_INT (val));
2531 return 1;
2533 else
2535 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2537 if (TARGET_USE_MOVT)
2538 arm_emit_movpair (temp, GEN_INT (val));
2539 else
2540 emit_set_insn (temp, GEN_INT (val));
2542 /* For MINUS, the value is subtracted from, since we never
2543 have subtraction of a constant. */
2544 if (code == MINUS)
2545 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2546 else
2547 emit_set_insn (target,
2548 gen_rtx_fmt_ee (code, mode, source, temp));
2549 return 2;
2554 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2558 /* Return the number of instructions required to synthesize the given
2559 constant, if we start emitting them from bit-position I. */
2560 static int
2561 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2563 HOST_WIDE_INT temp1;
2564 int step_size = TARGET_ARM ? 2 : 1;
2565 int num_insns = 0;
2567 gcc_assert (TARGET_ARM || i == 0);
2571 int end;
2573 if (i <= 0)
2574 i += 32;
2575 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2577 end = i - 8;
2578 if (end < 0)
2579 end += 32;
2580 temp1 = remainder & ((0x0ff << end)
2581 | ((i < end) ? (0xff >> (32 - end)) : 0));
2582 remainder &= ~temp1;
2583 num_insns++;
2584 i -= 8 - step_size;
2586 i -= step_size;
2587 } while (remainder);
2588 return num_insns;
2591 static int
2592 find_best_start (unsigned HOST_WIDE_INT remainder)
2594 int best_consecutive_zeros = 0;
2595 int i;
2596 int best_start = 0;
2598 /* If we aren't targetting ARM, the best place to start is always at
2599 the bottom. */
2600 if (! TARGET_ARM)
2601 return 0;
2603 for (i = 0; i < 32; i += 2)
2605 int consecutive_zeros = 0;
2607 if (!(remainder & (3 << i)))
2609 while ((i < 32) && !(remainder & (3 << i)))
2611 consecutive_zeros += 2;
2612 i += 2;
2614 if (consecutive_zeros > best_consecutive_zeros)
2616 best_consecutive_zeros = consecutive_zeros;
2617 best_start = i - consecutive_zeros;
2619 i -= 2;
2623 /* So long as it won't require any more insns to do so, it's
2624 desirable to emit a small constant (in bits 0...9) in the last
2625 insn. This way there is more chance that it can be combined with
2626 a later addressing insn to form a pre-indexed load or store
2627 operation. Consider:
2629 *((volatile int *)0xe0000100) = 1;
2630 *((volatile int *)0xe0000110) = 2;
2632 We want this to wind up as:
2634 mov rA, #0xe0000000
2635 mov rB, #1
2636 str rB, [rA, #0x100]
2637 mov rB, #2
2638 str rB, [rA, #0x110]
2640 rather than having to synthesize both large constants from scratch.
2642 Therefore, we calculate how many insns would be required to emit
2643 the constant starting from `best_start', and also starting from
2644 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2645 yield a shorter sequence, we may as well use zero. */
2646 if (best_start != 0
2647 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2648 && (count_insns_for_constant (remainder, 0) <=
2649 count_insns_for_constant (remainder, best_start)))
2650 best_start = 0;
2652 return best_start;
2655 /* Emit an instruction with the indicated PATTERN. If COND is
2656 non-NULL, conditionalize the execution of the instruction on COND
2657 being true. */
2659 static void
2660 emit_constant_insn (rtx cond, rtx pattern)
2662 if (cond)
2663 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2664 emit_insn (pattern);
2667 /* As above, but extra parameter GENERATE which, if clear, suppresses
2668 RTL generation. */
2669 /* ??? This needs more work for thumb2. */
2671 static int
2672 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2673 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2674 int generate)
2676 int can_invert = 0;
2677 int can_negate = 0;
2678 int final_invert = 0;
2679 int can_negate_initial = 0;
2680 int i;
2681 int num_bits_set = 0;
2682 int set_sign_bit_copies = 0;
2683 int clear_sign_bit_copies = 0;
2684 int clear_zero_bit_copies = 0;
2685 int set_zero_bit_copies = 0;
2686 int insns = 0;
2687 unsigned HOST_WIDE_INT temp1, temp2;
2688 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2689 int step_size = TARGET_ARM ? 2 : 1;
2691 /* Find out which operations are safe for a given CODE. Also do a quick
2692 check for degenerate cases; these can occur when DImode operations
2693 are split. */
2694 switch (code)
2696 case SET:
2697 can_invert = 1;
2698 can_negate = 1;
2699 break;
2701 case PLUS:
2702 can_negate = 1;
2703 can_negate_initial = 1;
2704 break;
2706 case IOR:
2707 if (remainder == 0xffffffff)
2709 if (generate)
2710 emit_constant_insn (cond,
2711 gen_rtx_SET (VOIDmode, target,
2712 GEN_INT (ARM_SIGN_EXTEND (val))));
2713 return 1;
2716 if (remainder == 0)
2718 if (reload_completed && rtx_equal_p (target, source))
2719 return 0;
2721 if (generate)
2722 emit_constant_insn (cond,
2723 gen_rtx_SET (VOIDmode, target, source));
2724 return 1;
2727 if (TARGET_THUMB2)
2728 can_invert = 1;
2729 break;
2731 case AND:
2732 if (remainder == 0)
2734 if (generate)
2735 emit_constant_insn (cond,
2736 gen_rtx_SET (VOIDmode, target, const0_rtx));
2737 return 1;
2739 if (remainder == 0xffffffff)
2741 if (reload_completed && rtx_equal_p (target, source))
2742 return 0;
2743 if (generate)
2744 emit_constant_insn (cond,
2745 gen_rtx_SET (VOIDmode, target, source));
2746 return 1;
2748 can_invert = 1;
2749 break;
2751 case XOR:
2752 if (remainder == 0)
2754 if (reload_completed && rtx_equal_p (target, source))
2755 return 0;
2756 if (generate)
2757 emit_constant_insn (cond,
2758 gen_rtx_SET (VOIDmode, target, source));
2759 return 1;
2762 if (remainder == 0xffffffff)
2764 if (generate)
2765 emit_constant_insn (cond,
2766 gen_rtx_SET (VOIDmode, target,
2767 gen_rtx_NOT (mode, source)));
2768 return 1;
2770 break;
2772 case MINUS:
2773 /* We treat MINUS as (val - source), since (source - val) is always
2774 passed as (source + (-val)). */
2775 if (remainder == 0)
2777 if (generate)
2778 emit_constant_insn (cond,
2779 gen_rtx_SET (VOIDmode, target,
2780 gen_rtx_NEG (mode, source)));
2781 return 1;
2783 if (const_ok_for_arm (val))
2785 if (generate)
2786 emit_constant_insn (cond,
2787 gen_rtx_SET (VOIDmode, target,
2788 gen_rtx_MINUS (mode, GEN_INT (val),
2789 source)));
2790 return 1;
2792 can_negate = 1;
2794 break;
2796 default:
2797 gcc_unreachable ();
2800 /* If we can do it in one insn get out quickly. */
2801 if (const_ok_for_arm (val)
2802 || (can_negate_initial && const_ok_for_arm (-val))
2803 || (can_invert && const_ok_for_arm (~val)))
2805 if (generate)
2806 emit_constant_insn (cond,
2807 gen_rtx_SET (VOIDmode, target,
2808 (source
2809 ? gen_rtx_fmt_ee (code, mode, source,
2810 GEN_INT (val))
2811 : GEN_INT (val))));
2812 return 1;
2815 /* Calculate a few attributes that may be useful for specific
2816 optimizations. */
2817 /* Count number of leading zeros. */
2818 for (i = 31; i >= 0; i--)
2820 if ((remainder & (1 << i)) == 0)
2821 clear_sign_bit_copies++;
2822 else
2823 break;
2826 /* Count number of leading 1's. */
2827 for (i = 31; i >= 0; i--)
2829 if ((remainder & (1 << i)) != 0)
2830 set_sign_bit_copies++;
2831 else
2832 break;
2835 /* Count number of trailing zero's. */
2836 for (i = 0; i <= 31; i++)
2838 if ((remainder & (1 << i)) == 0)
2839 clear_zero_bit_copies++;
2840 else
2841 break;
2844 /* Count number of trailing 1's. */
2845 for (i = 0; i <= 31; i++)
2847 if ((remainder & (1 << i)) != 0)
2848 set_zero_bit_copies++;
2849 else
2850 break;
2853 switch (code)
2855 case SET:
2856 /* See if we can use movw. */
2857 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2859 if (generate)
2860 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2861 GEN_INT (val)));
2862 return 1;
2865 /* See if we can do this by sign_extending a constant that is known
2866 to be negative. This is a good, way of doing it, since the shift
2867 may well merge into a subsequent insn. */
2868 if (set_sign_bit_copies > 1)
2870 if (const_ok_for_arm
2871 (temp1 = ARM_SIGN_EXTEND (remainder
2872 << (set_sign_bit_copies - 1))))
2874 if (generate)
2876 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2877 emit_constant_insn (cond,
2878 gen_rtx_SET (VOIDmode, new_src,
2879 GEN_INT (temp1)));
2880 emit_constant_insn (cond,
2881 gen_ashrsi3 (target, new_src,
2882 GEN_INT (set_sign_bit_copies - 1)));
2884 return 2;
2886 /* For an inverted constant, we will need to set the low bits,
2887 these will be shifted out of harm's way. */
2888 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2889 if (const_ok_for_arm (~temp1))
2891 if (generate)
2893 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2894 emit_constant_insn (cond,
2895 gen_rtx_SET (VOIDmode, new_src,
2896 GEN_INT (temp1)));
2897 emit_constant_insn (cond,
2898 gen_ashrsi3 (target, new_src,
2899 GEN_INT (set_sign_bit_copies - 1)));
2901 return 2;
2905 /* See if we can calculate the value as the difference between two
2906 valid immediates. */
2907 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2909 int topshift = clear_sign_bit_copies & ~1;
2911 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2912 & (0xff000000 >> topshift));
2914 /* If temp1 is zero, then that means the 9 most significant
2915 bits of remainder were 1 and we've caused it to overflow.
2916 When topshift is 0 we don't need to do anything since we
2917 can borrow from 'bit 32'. */
2918 if (temp1 == 0 && topshift != 0)
2919 temp1 = 0x80000000 >> (topshift - 1);
2921 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2923 if (const_ok_for_arm (temp2))
2925 if (generate)
2927 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2928 emit_constant_insn (cond,
2929 gen_rtx_SET (VOIDmode, new_src,
2930 GEN_INT (temp1)));
2931 emit_constant_insn (cond,
2932 gen_addsi3 (target, new_src,
2933 GEN_INT (-temp2)));
2936 return 2;
2940 /* See if we can generate this by setting the bottom (or the top)
2941 16 bits, and then shifting these into the other half of the
2942 word. We only look for the simplest cases, to do more would cost
2943 too much. Be careful, however, not to generate this when the
2944 alternative would take fewer insns. */
2945 if (val & 0xffff0000)
2947 temp1 = remainder & 0xffff0000;
2948 temp2 = remainder & 0x0000ffff;
2950 /* Overlaps outside this range are best done using other methods. */
2951 for (i = 9; i < 24; i++)
2953 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2954 && !const_ok_for_arm (temp2))
2956 rtx new_src = (subtargets
2957 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2958 : target);
2959 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2960 source, subtargets, generate);
2961 source = new_src;
2962 if (generate)
2963 emit_constant_insn
2964 (cond,
2965 gen_rtx_SET
2966 (VOIDmode, target,
2967 gen_rtx_IOR (mode,
2968 gen_rtx_ASHIFT (mode, source,
2969 GEN_INT (i)),
2970 source)));
2971 return insns + 1;
2975 /* Don't duplicate cases already considered. */
2976 for (i = 17; i < 24; i++)
2978 if (((temp1 | (temp1 >> i)) == remainder)
2979 && !const_ok_for_arm (temp1))
2981 rtx new_src = (subtargets
2982 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2983 : target);
2984 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2985 source, subtargets, generate);
2986 source = new_src;
2987 if (generate)
2988 emit_constant_insn
2989 (cond,
2990 gen_rtx_SET (VOIDmode, target,
2991 gen_rtx_IOR
2992 (mode,
2993 gen_rtx_LSHIFTRT (mode, source,
2994 GEN_INT (i)),
2995 source)));
2996 return insns + 1;
3000 break;
3002 case IOR:
3003 case XOR:
3004 /* If we have IOR or XOR, and the constant can be loaded in a
3005 single instruction, and we can find a temporary to put it in,
3006 then this can be done in two instructions instead of 3-4. */
3007 if (subtargets
3008 /* TARGET can't be NULL if SUBTARGETS is 0 */
3009 || (reload_completed && !reg_mentioned_p (target, source)))
3011 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3013 if (generate)
3015 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3017 emit_constant_insn (cond,
3018 gen_rtx_SET (VOIDmode, sub,
3019 GEN_INT (val)));
3020 emit_constant_insn (cond,
3021 gen_rtx_SET (VOIDmode, target,
3022 gen_rtx_fmt_ee (code, mode,
3023 source, sub)));
3025 return 2;
3029 if (code == XOR)
3030 break;
3032 /* Convert.
3033 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3034 and the remainder 0s for e.g. 0xfff00000)
3035 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3037 This can be done in 2 instructions by using shifts with mov or mvn.
3038 e.g. for
3039 x = x | 0xfff00000;
3040 we generate.
3041 mvn r0, r0, asl #12
3042 mvn r0, r0, lsr #12 */
3043 if (set_sign_bit_copies > 8
3044 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3046 if (generate)
3048 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3049 rtx shift = GEN_INT (set_sign_bit_copies);
3051 emit_constant_insn
3052 (cond,
3053 gen_rtx_SET (VOIDmode, sub,
3054 gen_rtx_NOT (mode,
3055 gen_rtx_ASHIFT (mode,
3056 source,
3057 shift))));
3058 emit_constant_insn
3059 (cond,
3060 gen_rtx_SET (VOIDmode, target,
3061 gen_rtx_NOT (mode,
3062 gen_rtx_LSHIFTRT (mode, sub,
3063 shift))));
3065 return 2;
3068 /* Convert
3069 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3071 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3073 For eg. r0 = r0 | 0xfff
3074 mvn r0, r0, lsr #12
3075 mvn r0, r0, asl #12
3078 if (set_zero_bit_copies > 8
3079 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3081 if (generate)
3083 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3084 rtx shift = GEN_INT (set_zero_bit_copies);
3086 emit_constant_insn
3087 (cond,
3088 gen_rtx_SET (VOIDmode, sub,
3089 gen_rtx_NOT (mode,
3090 gen_rtx_LSHIFTRT (mode,
3091 source,
3092 shift))));
3093 emit_constant_insn
3094 (cond,
3095 gen_rtx_SET (VOIDmode, target,
3096 gen_rtx_NOT (mode,
3097 gen_rtx_ASHIFT (mode, sub,
3098 shift))));
3100 return 2;
3103 /* This will never be reached for Thumb2 because orn is a valid
3104 instruction. This is for Thumb1 and the ARM 32 bit cases.
3106 x = y | constant (such that ~constant is a valid constant)
3107 Transform this to
3108 x = ~(~y & ~constant).
3110 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3112 if (generate)
3114 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3115 emit_constant_insn (cond,
3116 gen_rtx_SET (VOIDmode, sub,
3117 gen_rtx_NOT (mode, source)));
3118 source = sub;
3119 if (subtargets)
3120 sub = gen_reg_rtx (mode);
3121 emit_constant_insn (cond,
3122 gen_rtx_SET (VOIDmode, sub,
3123 gen_rtx_AND (mode, source,
3124 GEN_INT (temp1))));
3125 emit_constant_insn (cond,
3126 gen_rtx_SET (VOIDmode, target,
3127 gen_rtx_NOT (mode, sub)));
3129 return 3;
3131 break;
3133 case AND:
3134 /* See if two shifts will do 2 or more insn's worth of work. */
3135 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3137 HOST_WIDE_INT shift_mask = ((0xffffffff
3138 << (32 - clear_sign_bit_copies))
3139 & 0xffffffff);
3141 if ((remainder | shift_mask) != 0xffffffff)
3143 if (generate)
3145 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3146 insns = arm_gen_constant (AND, mode, cond,
3147 remainder | shift_mask,
3148 new_src, source, subtargets, 1);
3149 source = new_src;
3151 else
3153 rtx targ = subtargets ? NULL_RTX : target;
3154 insns = arm_gen_constant (AND, mode, cond,
3155 remainder | shift_mask,
3156 targ, source, subtargets, 0);
3160 if (generate)
3162 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3163 rtx shift = GEN_INT (clear_sign_bit_copies);
3165 emit_insn (gen_ashlsi3 (new_src, source, shift));
3166 emit_insn (gen_lshrsi3 (target, new_src, shift));
3169 return insns + 2;
3172 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3174 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3176 if ((remainder | shift_mask) != 0xffffffff)
3178 if (generate)
3180 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3182 insns = arm_gen_constant (AND, mode, cond,
3183 remainder | shift_mask,
3184 new_src, source, subtargets, 1);
3185 source = new_src;
3187 else
3189 rtx targ = subtargets ? NULL_RTX : target;
3191 insns = arm_gen_constant (AND, mode, cond,
3192 remainder | shift_mask,
3193 targ, source, subtargets, 0);
3197 if (generate)
3199 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3200 rtx shift = GEN_INT (clear_zero_bit_copies);
3202 emit_insn (gen_lshrsi3 (new_src, source, shift));
3203 emit_insn (gen_ashlsi3 (target, new_src, shift));
3206 return insns + 2;
3209 break;
3211 default:
3212 break;
3215 for (i = 0; i < 32; i++)
3216 if (remainder & (1 << i))
3217 num_bits_set++;
3219 if ((code == AND)
3220 || (code != IOR && can_invert && num_bits_set > 16))
3221 remainder ^= 0xffffffff;
3222 else if (code == PLUS && num_bits_set > 16)
3223 remainder = (-remainder) & 0xffffffff;
3225 /* For XOR, if more than half the bits are set and there's a sequence
3226 of more than 8 consecutive ones in the pattern then we can XOR by the
3227 inverted constant and then invert the final result; this may save an
3228 instruction and might also lead to the final mvn being merged with
3229 some other operation. */
3230 else if (code == XOR && num_bits_set > 16
3231 && (count_insns_for_constant (remainder ^ 0xffffffff,
3232 find_best_start
3233 (remainder ^ 0xffffffff))
3234 < count_insns_for_constant (remainder,
3235 find_best_start (remainder))))
3237 remainder ^= 0xffffffff;
3238 final_invert = 1;
3240 else
3242 can_invert = 0;
3243 can_negate = 0;
3246 /* Now try and find a way of doing the job in either two or three
3247 instructions.
3248 We start by looking for the largest block of zeros that are aligned on
3249 a 2-bit boundary, we then fill up the temps, wrapping around to the
3250 top of the word when we drop off the bottom.
3251 In the worst case this code should produce no more than four insns.
3252 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3253 best place to start. */
3255 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3256 the same. */
3258 /* Now start emitting the insns. */
3259 i = find_best_start (remainder);
3262 int end;
3264 if (i <= 0)
3265 i += 32;
3266 if (remainder & (3 << (i - 2)))
3268 end = i - 8;
3269 if (end < 0)
3270 end += 32;
3271 temp1 = remainder & ((0x0ff << end)
3272 | ((i < end) ? (0xff >> (32 - end)) : 0));
3273 remainder &= ~temp1;
3275 if (generate)
3277 rtx new_src, temp1_rtx;
3279 if (code == SET || code == MINUS)
3281 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3282 if (can_invert && code != MINUS)
3283 temp1 = ~temp1;
3285 else
3287 if ((final_invert || remainder) && subtargets)
3288 new_src = gen_reg_rtx (mode);
3289 else
3290 new_src = target;
3291 if (can_invert)
3292 temp1 = ~temp1;
3293 else if (can_negate)
3294 temp1 = -temp1;
3297 temp1 = trunc_int_for_mode (temp1, mode);
3298 temp1_rtx = GEN_INT (temp1);
3300 if (code == SET)
3302 else if (code == MINUS)
3303 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3304 else
3305 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3307 emit_constant_insn (cond,
3308 gen_rtx_SET (VOIDmode, new_src,
3309 temp1_rtx));
3310 source = new_src;
3313 if (code == SET)
3315 can_invert = 0;
3316 code = PLUS;
3318 else if (code == MINUS)
3319 code = PLUS;
3321 insns++;
3322 i -= 8 - step_size;
3324 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3325 shifts. */
3326 i -= step_size;
3328 while (remainder);
3331 if (final_invert)
3333 if (generate)
3334 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3335 gen_rtx_NOT (mode, source)));
3336 insns++;
3339 return insns;
3342 /* Canonicalize a comparison so that we are more likely to recognize it.
3343 This can be done for a few constant compares, where we can make the
3344 immediate value easier to load. */
3346 enum rtx_code
3347 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3349 enum machine_mode mode;
3350 unsigned HOST_WIDE_INT i, maxval;
3352 mode = GET_MODE (*op0);
3353 if (mode == VOIDmode)
3354 mode = GET_MODE (*op1);
3356 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3358 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3359 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3360 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3361 for GTU/LEU in Thumb mode. */
3362 if (mode == DImode)
3364 rtx tem;
3366 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3367 available. */
3368 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3369 return code;
3371 if (code == GT || code == LE
3372 || (!TARGET_ARM && (code == GTU || code == LEU)))
3374 /* Missing comparison. First try to use an available
3375 comparison. */
3376 if (GET_CODE (*op1) == CONST_INT)
3378 i = INTVAL (*op1);
3379 switch (code)
3381 case GT:
3382 case LE:
3383 if (i != maxval
3384 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3386 *op1 = GEN_INT (i + 1);
3387 return code == GT ? GE : LT;
3389 break;
3390 case GTU:
3391 case LEU:
3392 if (i != ~((unsigned HOST_WIDE_INT) 0)
3393 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3395 *op1 = GEN_INT (i + 1);
3396 return code == GTU ? GEU : LTU;
3398 break;
3399 default:
3400 gcc_unreachable ();
3404 /* If that did not work, reverse the condition. */
3405 tem = *op0;
3406 *op0 = *op1;
3407 *op1 = tem;
3408 return swap_condition (code);
3411 return code;
3414 /* Comparisons smaller than DImode. Only adjust comparisons against
3415 an out-of-range constant. */
3416 if (GET_CODE (*op1) != CONST_INT
3417 || const_ok_for_arm (INTVAL (*op1))
3418 || const_ok_for_arm (- INTVAL (*op1)))
3419 return code;
3421 i = INTVAL (*op1);
3423 switch (code)
3425 case EQ:
3426 case NE:
3427 return code;
3429 case GT:
3430 case LE:
3431 if (i != maxval
3432 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3434 *op1 = GEN_INT (i + 1);
3435 return code == GT ? GE : LT;
3437 break;
3439 case GE:
3440 case LT:
3441 if (i != ~maxval
3442 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3444 *op1 = GEN_INT (i - 1);
3445 return code == GE ? GT : LE;
3447 break;
3449 case GTU:
3450 case LEU:
3451 if (i != ~((unsigned HOST_WIDE_INT) 0)
3452 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3454 *op1 = GEN_INT (i + 1);
3455 return code == GTU ? GEU : LTU;
3457 break;
3459 case GEU:
3460 case LTU:
3461 if (i != 0
3462 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3464 *op1 = GEN_INT (i - 1);
3465 return code == GEU ? GTU : LEU;
3467 break;
3469 default:
3470 gcc_unreachable ();
3473 return code;
3477 /* Define how to find the value returned by a function. */
3479 static rtx
3480 arm_function_value(const_tree type, const_tree func,
3481 bool outgoing ATTRIBUTE_UNUSED)
3483 enum machine_mode mode;
3484 int unsignedp ATTRIBUTE_UNUSED;
3485 rtx r ATTRIBUTE_UNUSED;
3487 mode = TYPE_MODE (type);
3489 if (TARGET_AAPCS_BASED)
3490 return aapcs_allocate_return_reg (mode, type, func);
3492 /* Promote integer types. */
3493 if (INTEGRAL_TYPE_P (type))
3494 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3496 /* Promotes small structs returned in a register to full-word size
3497 for big-endian AAPCS. */
3498 if (arm_return_in_msb (type))
3500 HOST_WIDE_INT size = int_size_in_bytes (type);
3501 if (size % UNITS_PER_WORD != 0)
3503 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3504 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3508 return LIBCALL_VALUE (mode);
3511 static int
3512 libcall_eq (const void *p1, const void *p2)
3514 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3517 static hashval_t
3518 libcall_hash (const void *p1)
3520 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3523 static void
3524 add_libcall (htab_t htab, rtx libcall)
3526 *htab_find_slot (htab, libcall, INSERT) = libcall;
3529 static bool
3530 arm_libcall_uses_aapcs_base (const_rtx libcall)
3532 static bool init_done = false;
3533 static htab_t libcall_htab;
3535 if (!init_done)
3537 init_done = true;
3539 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3540 NULL);
3541 add_libcall (libcall_htab,
3542 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3543 add_libcall (libcall_htab,
3544 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3545 add_libcall (libcall_htab,
3546 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3547 add_libcall (libcall_htab,
3548 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3550 add_libcall (libcall_htab,
3551 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3552 add_libcall (libcall_htab,
3553 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3554 add_libcall (libcall_htab,
3555 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3556 add_libcall (libcall_htab,
3557 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3559 add_libcall (libcall_htab,
3560 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3561 add_libcall (libcall_htab,
3562 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3563 add_libcall (libcall_htab,
3564 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3565 add_libcall (libcall_htab,
3566 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3567 add_libcall (libcall_htab,
3568 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3569 add_libcall (libcall_htab,
3570 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3573 return libcall && htab_find (libcall_htab, libcall) != NULL;
3577 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3579 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3580 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3582 /* The following libcalls return their result in integer registers,
3583 even though they return a floating point value. */
3584 if (arm_libcall_uses_aapcs_base (libcall))
3585 return gen_rtx_REG (mode, ARG_REGISTER(1));
3589 return LIBCALL_VALUE (mode);
3592 /* Determine the amount of memory needed to store the possible return
3593 registers of an untyped call. */
3595 arm_apply_result_size (void)
3597 int size = 16;
3599 if (TARGET_32BIT)
3601 if (TARGET_HARD_FLOAT_ABI)
3603 if (TARGET_VFP)
3604 size += 32;
3605 if (TARGET_FPA)
3606 size += 12;
3607 if (TARGET_MAVERICK)
3608 size += 8;
3610 if (TARGET_IWMMXT_ABI)
3611 size += 8;
3614 return size;
3617 /* Decide whether TYPE should be returned in memory (true)
3618 or in a register (false). FNTYPE is the type of the function making
3619 the call. */
3620 static bool
3621 arm_return_in_memory (const_tree type, const_tree fntype)
3623 HOST_WIDE_INT size;
3625 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3627 if (TARGET_AAPCS_BASED)
3629 /* Simple, non-aggregate types (ie not including vectors and
3630 complex) are always returned in a register (or registers).
3631 We don't care about which register here, so we can short-cut
3632 some of the detail. */
3633 if (!AGGREGATE_TYPE_P (type)
3634 && TREE_CODE (type) != VECTOR_TYPE
3635 && TREE_CODE (type) != COMPLEX_TYPE)
3636 return false;
3638 /* Any return value that is no larger than one word can be
3639 returned in r0. */
3640 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3641 return false;
3643 /* Check any available co-processors to see if they accept the
3644 type as a register candidate (VFP, for example, can return
3645 some aggregates in consecutive registers). These aren't
3646 available if the call is variadic. */
3647 if (aapcs_select_return_coproc (type, fntype) >= 0)
3648 return false;
3650 /* Vector values should be returned using ARM registers, not
3651 memory (unless they're over 16 bytes, which will break since
3652 we only have four call-clobbered registers to play with). */
3653 if (TREE_CODE (type) == VECTOR_TYPE)
3654 return (size < 0 || size > (4 * UNITS_PER_WORD));
3656 /* The rest go in memory. */
3657 return true;
3660 if (TREE_CODE (type) == VECTOR_TYPE)
3661 return (size < 0 || size > (4 * UNITS_PER_WORD));
3663 if (!AGGREGATE_TYPE_P (type) &&
3664 (TREE_CODE (type) != VECTOR_TYPE))
3665 /* All simple types are returned in registers. */
3666 return false;
3668 if (arm_abi != ARM_ABI_APCS)
3670 /* ATPCS and later return aggregate types in memory only if they are
3671 larger than a word (or are variable size). */
3672 return (size < 0 || size > UNITS_PER_WORD);
3675 /* For the arm-wince targets we choose to be compatible with Microsoft's
3676 ARM and Thumb compilers, which always return aggregates in memory. */
3677 #ifndef ARM_WINCE
3678 /* All structures/unions bigger than one word are returned in memory.
3679 Also catch the case where int_size_in_bytes returns -1. In this case
3680 the aggregate is either huge or of variable size, and in either case
3681 we will want to return it via memory and not in a register. */
3682 if (size < 0 || size > UNITS_PER_WORD)
3683 return true;
3685 if (TREE_CODE (type) == RECORD_TYPE)
3687 tree field;
3689 /* For a struct the APCS says that we only return in a register
3690 if the type is 'integer like' and every addressable element
3691 has an offset of zero. For practical purposes this means
3692 that the structure can have at most one non bit-field element
3693 and that this element must be the first one in the structure. */
3695 /* Find the first field, ignoring non FIELD_DECL things which will
3696 have been created by C++. */
3697 for (field = TYPE_FIELDS (type);
3698 field && TREE_CODE (field) != FIELD_DECL;
3699 field = DECL_CHAIN (field))
3700 continue;
3702 if (field == NULL)
3703 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3705 /* Check that the first field is valid for returning in a register. */
3707 /* ... Floats are not allowed */
3708 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3709 return true;
3711 /* ... Aggregates that are not themselves valid for returning in
3712 a register are not allowed. */
3713 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3714 return true;
3716 /* Now check the remaining fields, if any. Only bitfields are allowed,
3717 since they are not addressable. */
3718 for (field = DECL_CHAIN (field);
3719 field;
3720 field = DECL_CHAIN (field))
3722 if (TREE_CODE (field) != FIELD_DECL)
3723 continue;
3725 if (!DECL_BIT_FIELD_TYPE (field))
3726 return true;
3729 return false;
3732 if (TREE_CODE (type) == UNION_TYPE)
3734 tree field;
3736 /* Unions can be returned in registers if every element is
3737 integral, or can be returned in an integer register. */
3738 for (field = TYPE_FIELDS (type);
3739 field;
3740 field = DECL_CHAIN (field))
3742 if (TREE_CODE (field) != FIELD_DECL)
3743 continue;
3745 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3746 return true;
3748 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3749 return true;
3752 return false;
3754 #endif /* not ARM_WINCE */
3756 /* Return all other types in memory. */
3757 return true;
3760 /* Indicate whether or not words of a double are in big-endian order. */
3763 arm_float_words_big_endian (void)
3765 if (TARGET_MAVERICK)
3766 return 0;
3768 /* For FPA, float words are always big-endian. For VFP, floats words
3769 follow the memory system mode. */
3771 if (TARGET_FPA)
3773 return 1;
3776 if (TARGET_VFP)
3777 return (TARGET_BIG_END ? 1 : 0);
3779 return 1;
3782 const struct pcs_attribute_arg
3784 const char *arg;
3785 enum arm_pcs value;
3786 } pcs_attribute_args[] =
3788 {"aapcs", ARM_PCS_AAPCS},
3789 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3790 #if 0
3791 /* We could recognize these, but changes would be needed elsewhere
3792 * to implement them. */
3793 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3794 {"atpcs", ARM_PCS_ATPCS},
3795 {"apcs", ARM_PCS_APCS},
3796 #endif
3797 {NULL, ARM_PCS_UNKNOWN}
3800 static enum arm_pcs
3801 arm_pcs_from_attribute (tree attr)
3803 const struct pcs_attribute_arg *ptr;
3804 const char *arg;
3806 /* Get the value of the argument. */
3807 if (TREE_VALUE (attr) == NULL_TREE
3808 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3809 return ARM_PCS_UNKNOWN;
3811 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3813 /* Check it against the list of known arguments. */
3814 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3815 if (streq (arg, ptr->arg))
3816 return ptr->value;
3818 /* An unrecognized interrupt type. */
3819 return ARM_PCS_UNKNOWN;
3822 /* Get the PCS variant to use for this call. TYPE is the function's type
3823 specification, DECL is the specific declartion. DECL may be null if
3824 the call could be indirect or if this is a library call. */
3825 static enum arm_pcs
3826 arm_get_pcs_model (const_tree type, const_tree decl)
3828 bool user_convention = false;
3829 enum arm_pcs user_pcs = arm_pcs_default;
3830 tree attr;
3832 gcc_assert (type);
3834 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3835 if (attr)
3837 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3838 user_convention = true;
3841 if (TARGET_AAPCS_BASED)
3843 /* Detect varargs functions. These always use the base rules
3844 (no argument is ever a candidate for a co-processor
3845 register). */
3846 bool base_rules = stdarg_p (type);
3848 if (user_convention)
3850 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3851 sorry ("non-AAPCS derived PCS variant");
3852 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3853 error ("variadic functions must use the base AAPCS variant");
3856 if (base_rules)
3857 return ARM_PCS_AAPCS;
3858 else if (user_convention)
3859 return user_pcs;
3860 else if (decl && flag_unit_at_a_time)
3862 /* Local functions never leak outside this compilation unit,
3863 so we are free to use whatever conventions are
3864 appropriate. */
3865 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3866 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3867 if (i && i->local)
3868 return ARM_PCS_AAPCS_LOCAL;
3871 else if (user_convention && user_pcs != arm_pcs_default)
3872 sorry ("PCS variant");
3874 /* For everything else we use the target's default. */
3875 return arm_pcs_default;
3879 static void
3880 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3881 const_tree fntype ATTRIBUTE_UNUSED,
3882 rtx libcall ATTRIBUTE_UNUSED,
3883 const_tree fndecl ATTRIBUTE_UNUSED)
3885 /* Record the unallocated VFP registers. */
3886 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3887 pcum->aapcs_vfp_reg_alloc = 0;
3890 /* Walk down the type tree of TYPE counting consecutive base elements.
3891 If *MODEP is VOIDmode, then set it to the first valid floating point
3892 type. If a non-floating point type is found, or if a floating point
3893 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3894 otherwise return the count in the sub-tree. */
3895 static int
3896 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3898 enum machine_mode mode;
3899 HOST_WIDE_INT size;
3901 switch (TREE_CODE (type))
3903 case REAL_TYPE:
3904 mode = TYPE_MODE (type);
3905 if (mode != DFmode && mode != SFmode)
3906 return -1;
3908 if (*modep == VOIDmode)
3909 *modep = mode;
3911 if (*modep == mode)
3912 return 1;
3914 break;
3916 case COMPLEX_TYPE:
3917 mode = TYPE_MODE (TREE_TYPE (type));
3918 if (mode != DFmode && mode != SFmode)
3919 return -1;
3921 if (*modep == VOIDmode)
3922 *modep = mode;
3924 if (*modep == mode)
3925 return 2;
3927 break;
3929 case VECTOR_TYPE:
3930 /* Use V2SImode and V4SImode as representatives of all 64-bit
3931 and 128-bit vector types, whether or not those modes are
3932 supported with the present options. */
3933 size = int_size_in_bytes (type);
3934 switch (size)
3936 case 8:
3937 mode = V2SImode;
3938 break;
3939 case 16:
3940 mode = V4SImode;
3941 break;
3942 default:
3943 return -1;
3946 if (*modep == VOIDmode)
3947 *modep = mode;
3949 /* Vector modes are considered to be opaque: two vectors are
3950 equivalent for the purposes of being homogeneous aggregates
3951 if they are the same size. */
3952 if (*modep == mode)
3953 return 1;
3955 break;
3957 case ARRAY_TYPE:
3959 int count;
3960 tree index = TYPE_DOMAIN (type);
3962 /* Can't handle incomplete types. */
3963 if (!COMPLETE_TYPE_P(type))
3964 return -1;
3966 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3967 if (count == -1
3968 || !index
3969 || !TYPE_MAX_VALUE (index)
3970 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3971 || !TYPE_MIN_VALUE (index)
3972 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3973 || count < 0)
3974 return -1;
3976 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3977 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3979 /* There must be no padding. */
3980 if (!host_integerp (TYPE_SIZE (type), 1)
3981 || (tree_low_cst (TYPE_SIZE (type), 1)
3982 != count * GET_MODE_BITSIZE (*modep)))
3983 return -1;
3985 return count;
3988 case RECORD_TYPE:
3990 int count = 0;
3991 int sub_count;
3992 tree field;
3994 /* Can't handle incomplete types. */
3995 if (!COMPLETE_TYPE_P(type))
3996 return -1;
3998 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4000 if (TREE_CODE (field) != FIELD_DECL)
4001 continue;
4003 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4004 if (sub_count < 0)
4005 return -1;
4006 count += sub_count;
4009 /* There must be no padding. */
4010 if (!host_integerp (TYPE_SIZE (type), 1)
4011 || (tree_low_cst (TYPE_SIZE (type), 1)
4012 != count * GET_MODE_BITSIZE (*modep)))
4013 return -1;
4015 return count;
4018 case UNION_TYPE:
4019 case QUAL_UNION_TYPE:
4021 /* These aren't very interesting except in a degenerate case. */
4022 int count = 0;
4023 int sub_count;
4024 tree field;
4026 /* Can't handle incomplete types. */
4027 if (!COMPLETE_TYPE_P(type))
4028 return -1;
4030 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4032 if (TREE_CODE (field) != FIELD_DECL)
4033 continue;
4035 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4036 if (sub_count < 0)
4037 return -1;
4038 count = count > sub_count ? count : sub_count;
4041 /* There must be no padding. */
4042 if (!host_integerp (TYPE_SIZE (type), 1)
4043 || (tree_low_cst (TYPE_SIZE (type), 1)
4044 != count * GET_MODE_BITSIZE (*modep)))
4045 return -1;
4047 return count;
4050 default:
4051 break;
4054 return -1;
4057 /* Return true if PCS_VARIANT should use VFP registers. */
4058 static bool
4059 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4061 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4063 static bool seen_thumb1_vfp = false;
4065 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4067 sorry ("Thumb-1 hard-float VFP ABI");
4068 /* sorry() is not immediately fatal, so only display this once. */
4069 seen_thumb1_vfp = true;
4072 return true;
4075 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4076 return false;
4078 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4079 (TARGET_VFP_DOUBLE || !is_double));
4082 static bool
4083 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4084 enum machine_mode mode, const_tree type,
4085 enum machine_mode *base_mode, int *count)
4087 enum machine_mode new_mode = VOIDmode;
4089 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4090 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4091 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4093 *count = 1;
4094 new_mode = mode;
4096 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4098 *count = 2;
4099 new_mode = (mode == DCmode ? DFmode : SFmode);
4101 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4103 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4105 if (ag_count > 0 && ag_count <= 4)
4106 *count = ag_count;
4107 else
4108 return false;
4110 else
4111 return false;
4114 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4115 return false;
4117 *base_mode = new_mode;
4118 return true;
4121 static bool
4122 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4123 enum machine_mode mode, const_tree type)
4125 int count ATTRIBUTE_UNUSED;
4126 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4128 if (!use_vfp_abi (pcs_variant, false))
4129 return false;
4130 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4131 &ag_mode, &count);
4134 static bool
4135 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4136 const_tree type)
4138 if (!use_vfp_abi (pcum->pcs_variant, false))
4139 return false;
4141 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4142 &pcum->aapcs_vfp_rmode,
4143 &pcum->aapcs_vfp_rcount);
4146 static bool
4147 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4148 const_tree type ATTRIBUTE_UNUSED)
4150 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4151 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4152 int regno;
4154 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4155 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4157 pcum->aapcs_vfp_reg_alloc = mask << regno;
4158 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4160 int i;
4161 int rcount = pcum->aapcs_vfp_rcount;
4162 int rshift = shift;
4163 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4164 rtx par;
4165 if (!TARGET_NEON)
4167 /* Avoid using unsupported vector modes. */
4168 if (rmode == V2SImode)
4169 rmode = DImode;
4170 else if (rmode == V4SImode)
4172 rmode = DImode;
4173 rcount *= 2;
4174 rshift /= 2;
4177 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4178 for (i = 0; i < rcount; i++)
4180 rtx tmp = gen_rtx_REG (rmode,
4181 FIRST_VFP_REGNUM + regno + i * rshift);
4182 tmp = gen_rtx_EXPR_LIST
4183 (VOIDmode, tmp,
4184 GEN_INT (i * GET_MODE_SIZE (rmode)));
4185 XVECEXP (par, 0, i) = tmp;
4188 pcum->aapcs_reg = par;
4190 else
4191 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4192 return true;
4194 return false;
4197 static rtx
4198 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4199 enum machine_mode mode,
4200 const_tree type ATTRIBUTE_UNUSED)
4202 if (!use_vfp_abi (pcs_variant, false))
4203 return false;
4205 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4207 int count;
4208 enum machine_mode ag_mode;
4209 int i;
4210 rtx par;
4211 int shift;
4213 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4214 &ag_mode, &count);
4216 if (!TARGET_NEON)
4218 if (ag_mode == V2SImode)
4219 ag_mode = DImode;
4220 else if (ag_mode == V4SImode)
4222 ag_mode = DImode;
4223 count *= 2;
4226 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4227 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4228 for (i = 0; i < count; i++)
4230 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4231 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4232 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4233 XVECEXP (par, 0, i) = tmp;
4236 return par;
4239 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4242 static void
4243 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4244 enum machine_mode mode ATTRIBUTE_UNUSED,
4245 const_tree type ATTRIBUTE_UNUSED)
4247 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4248 pcum->aapcs_vfp_reg_alloc = 0;
4249 return;
4252 #define AAPCS_CP(X) \
4254 aapcs_ ## X ## _cum_init, \
4255 aapcs_ ## X ## _is_call_candidate, \
4256 aapcs_ ## X ## _allocate, \
4257 aapcs_ ## X ## _is_return_candidate, \
4258 aapcs_ ## X ## _allocate_return_reg, \
4259 aapcs_ ## X ## _advance \
4262 /* Table of co-processors that can be used to pass arguments in
4263 registers. Idealy no arugment should be a candidate for more than
4264 one co-processor table entry, but the table is processed in order
4265 and stops after the first match. If that entry then fails to put
4266 the argument into a co-processor register, the argument will go on
4267 the stack. */
4268 static struct
4270 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4271 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4273 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4274 BLKmode) is a candidate for this co-processor's registers; this
4275 function should ignore any position-dependent state in
4276 CUMULATIVE_ARGS and only use call-type dependent information. */
4277 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4279 /* Return true if the argument does get a co-processor register; it
4280 should set aapcs_reg to an RTX of the register allocated as is
4281 required for a return from FUNCTION_ARG. */
4282 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4284 /* Return true if a result of mode MODE (or type TYPE if MODE is
4285 BLKmode) is can be returned in this co-processor's registers. */
4286 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4288 /* Allocate and return an RTX element to hold the return type of a
4289 call, this routine must not fail and will only be called if
4290 is_return_candidate returned true with the same parameters. */
4291 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4293 /* Finish processing this argument and prepare to start processing
4294 the next one. */
4295 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4296 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4298 AAPCS_CP(vfp)
4301 #undef AAPCS_CP
4303 static int
4304 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4305 const_tree type)
4307 int i;
4309 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4310 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4311 return i;
4313 return -1;
4316 static int
4317 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4319 /* We aren't passed a decl, so we can't check that a call is local.
4320 However, it isn't clear that that would be a win anyway, since it
4321 might limit some tail-calling opportunities. */
4322 enum arm_pcs pcs_variant;
4324 if (fntype)
4326 const_tree fndecl = NULL_TREE;
4328 if (TREE_CODE (fntype) == FUNCTION_DECL)
4330 fndecl = fntype;
4331 fntype = TREE_TYPE (fntype);
4334 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4336 else
4337 pcs_variant = arm_pcs_default;
4339 if (pcs_variant != ARM_PCS_AAPCS)
4341 int i;
4343 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4344 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4345 TYPE_MODE (type),
4346 type))
4347 return i;
4349 return -1;
4352 static rtx
4353 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4354 const_tree fntype)
4356 /* We aren't passed a decl, so we can't check that a call is local.
4357 However, it isn't clear that that would be a win anyway, since it
4358 might limit some tail-calling opportunities. */
4359 enum arm_pcs pcs_variant;
4360 int unsignedp ATTRIBUTE_UNUSED;
4362 if (fntype)
4364 const_tree fndecl = NULL_TREE;
4366 if (TREE_CODE (fntype) == FUNCTION_DECL)
4368 fndecl = fntype;
4369 fntype = TREE_TYPE (fntype);
4372 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4374 else
4375 pcs_variant = arm_pcs_default;
4377 /* Promote integer types. */
4378 if (type && INTEGRAL_TYPE_P (type))
4379 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4381 if (pcs_variant != ARM_PCS_AAPCS)
4383 int i;
4385 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4386 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4387 type))
4388 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4389 mode, type);
4392 /* Promotes small structs returned in a register to full-word size
4393 for big-endian AAPCS. */
4394 if (type && arm_return_in_msb (type))
4396 HOST_WIDE_INT size = int_size_in_bytes (type);
4397 if (size % UNITS_PER_WORD != 0)
4399 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4400 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4404 return gen_rtx_REG (mode, R0_REGNUM);
4408 aapcs_libcall_value (enum machine_mode mode)
4410 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4413 /* Lay out a function argument using the AAPCS rules. The rule
4414 numbers referred to here are those in the AAPCS. */
4415 static void
4416 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4417 const_tree type, bool named)
4419 int nregs, nregs2;
4420 int ncrn;
4422 /* We only need to do this once per argument. */
4423 if (pcum->aapcs_arg_processed)
4424 return;
4426 pcum->aapcs_arg_processed = true;
4428 /* Special case: if named is false then we are handling an incoming
4429 anonymous argument which is on the stack. */
4430 if (!named)
4431 return;
4433 /* Is this a potential co-processor register candidate? */
4434 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4436 int slot = aapcs_select_call_coproc (pcum, mode, type);
4437 pcum->aapcs_cprc_slot = slot;
4439 /* We don't have to apply any of the rules from part B of the
4440 preparation phase, these are handled elsewhere in the
4441 compiler. */
4443 if (slot >= 0)
4445 /* A Co-processor register candidate goes either in its own
4446 class of registers or on the stack. */
4447 if (!pcum->aapcs_cprc_failed[slot])
4449 /* C1.cp - Try to allocate the argument to co-processor
4450 registers. */
4451 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4452 return;
4454 /* C2.cp - Put the argument on the stack and note that we
4455 can't assign any more candidates in this slot. We also
4456 need to note that we have allocated stack space, so that
4457 we won't later try to split a non-cprc candidate between
4458 core registers and the stack. */
4459 pcum->aapcs_cprc_failed[slot] = true;
4460 pcum->can_split = false;
4463 /* We didn't get a register, so this argument goes on the
4464 stack. */
4465 gcc_assert (pcum->can_split == false);
4466 return;
4470 /* C3 - For double-word aligned arguments, round the NCRN up to the
4471 next even number. */
4472 ncrn = pcum->aapcs_ncrn;
4473 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4474 ncrn++;
4476 nregs = ARM_NUM_REGS2(mode, type);
4478 /* Sigh, this test should really assert that nregs > 0, but a GCC
4479 extension allows empty structs and then gives them empty size; it
4480 then allows such a structure to be passed by value. For some of
4481 the code below we have to pretend that such an argument has
4482 non-zero size so that we 'locate' it correctly either in
4483 registers or on the stack. */
4484 gcc_assert (nregs >= 0);
4486 nregs2 = nregs ? nregs : 1;
4488 /* C4 - Argument fits entirely in core registers. */
4489 if (ncrn + nregs2 <= NUM_ARG_REGS)
4491 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4492 pcum->aapcs_next_ncrn = ncrn + nregs;
4493 return;
4496 /* C5 - Some core registers left and there are no arguments already
4497 on the stack: split this argument between the remaining core
4498 registers and the stack. */
4499 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4501 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4502 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4503 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4504 return;
4507 /* C6 - NCRN is set to 4. */
4508 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4510 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4511 return;
4514 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4515 for a call to a function whose data type is FNTYPE.
4516 For a library call, FNTYPE is NULL. */
4517 void
4518 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4519 rtx libname,
4520 tree fndecl ATTRIBUTE_UNUSED)
4522 /* Long call handling. */
4523 if (fntype)
4524 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4525 else
4526 pcum->pcs_variant = arm_pcs_default;
4528 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4530 if (arm_libcall_uses_aapcs_base (libname))
4531 pcum->pcs_variant = ARM_PCS_AAPCS;
4533 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4534 pcum->aapcs_reg = NULL_RTX;
4535 pcum->aapcs_partial = 0;
4536 pcum->aapcs_arg_processed = false;
4537 pcum->aapcs_cprc_slot = -1;
4538 pcum->can_split = true;
4540 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4542 int i;
4544 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4546 pcum->aapcs_cprc_failed[i] = false;
4547 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4550 return;
4553 /* Legacy ABIs */
4555 /* On the ARM, the offset starts at 0. */
4556 pcum->nregs = 0;
4557 pcum->iwmmxt_nregs = 0;
4558 pcum->can_split = true;
4560 /* Varargs vectors are treated the same as long long.
4561 named_count avoids having to change the way arm handles 'named' */
4562 pcum->named_count = 0;
4563 pcum->nargs = 0;
4565 if (TARGET_REALLY_IWMMXT && fntype)
4567 tree fn_arg;
4569 for (fn_arg = TYPE_ARG_TYPES (fntype);
4570 fn_arg;
4571 fn_arg = TREE_CHAIN (fn_arg))
4572 pcum->named_count += 1;
4574 if (! pcum->named_count)
4575 pcum->named_count = INT_MAX;
4580 /* Return true if mode/type need doubleword alignment. */
4581 static bool
4582 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4584 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4585 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4589 /* Determine where to put an argument to a function.
4590 Value is zero to push the argument on the stack,
4591 or a hard register in which to store the argument.
4593 MODE is the argument's machine mode.
4594 TYPE is the data type of the argument (as a tree).
4595 This is null for libcalls where that information may
4596 not be available.
4597 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4598 the preceding args and about the function being called.
4599 NAMED is nonzero if this argument is a named parameter
4600 (otherwise it is an extra parameter matching an ellipsis).
4602 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4603 other arguments are passed on the stack. If (NAMED == 0) (which happens
4604 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4605 defined), say it is passed in the stack (function_prologue will
4606 indeed make it pass in the stack if necessary). */
4608 static rtx
4609 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4610 const_tree type, bool named)
4612 int nregs;
4614 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4615 a call insn (op3 of a call_value insn). */
4616 if (mode == VOIDmode)
4617 return const0_rtx;
4619 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4621 aapcs_layout_arg (pcum, mode, type, named);
4622 return pcum->aapcs_reg;
4625 /* Varargs vectors are treated the same as long long.
4626 named_count avoids having to change the way arm handles 'named' */
4627 if (TARGET_IWMMXT_ABI
4628 && arm_vector_mode_supported_p (mode)
4629 && pcum->named_count > pcum->nargs + 1)
4631 if (pcum->iwmmxt_nregs <= 9)
4632 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4633 else
4635 pcum->can_split = false;
4636 return NULL_RTX;
4640 /* Put doubleword aligned quantities in even register pairs. */
4641 if (pcum->nregs & 1
4642 && ARM_DOUBLEWORD_ALIGN
4643 && arm_needs_doubleword_align (mode, type))
4644 pcum->nregs++;
4646 /* Only allow splitting an arg between regs and memory if all preceding
4647 args were allocated to regs. For args passed by reference we only count
4648 the reference pointer. */
4649 if (pcum->can_split)
4650 nregs = 1;
4651 else
4652 nregs = ARM_NUM_REGS2 (mode, type);
4654 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4655 return NULL_RTX;
4657 return gen_rtx_REG (mode, pcum->nregs);
4660 static unsigned int
4661 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4663 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4664 ? DOUBLEWORD_ALIGNMENT
4665 : PARM_BOUNDARY);
4668 static int
4669 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4670 tree type, bool named)
4672 int nregs = pcum->nregs;
4674 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4676 aapcs_layout_arg (pcum, mode, type, named);
4677 return pcum->aapcs_partial;
4680 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4681 return 0;
4683 if (NUM_ARG_REGS > nregs
4684 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4685 && pcum->can_split)
4686 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4688 return 0;
4691 /* Update the data in PCUM to advance over an argument
4692 of mode MODE and data type TYPE.
4693 (TYPE is null for libcalls where that information may not be available.) */
4695 static void
4696 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4697 const_tree type, bool named)
4699 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4701 aapcs_layout_arg (pcum, mode, type, named);
4703 if (pcum->aapcs_cprc_slot >= 0)
4705 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4706 type);
4707 pcum->aapcs_cprc_slot = -1;
4710 /* Generic stuff. */
4711 pcum->aapcs_arg_processed = false;
4712 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4713 pcum->aapcs_reg = NULL_RTX;
4714 pcum->aapcs_partial = 0;
4716 else
4718 pcum->nargs += 1;
4719 if (arm_vector_mode_supported_p (mode)
4720 && pcum->named_count > pcum->nargs
4721 && TARGET_IWMMXT_ABI)
4722 pcum->iwmmxt_nregs += 1;
4723 else
4724 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4728 /* Variable sized types are passed by reference. This is a GCC
4729 extension to the ARM ABI. */
4731 static bool
4732 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4733 enum machine_mode mode ATTRIBUTE_UNUSED,
4734 const_tree type, bool named ATTRIBUTE_UNUSED)
4736 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4739 /* Encode the current state of the #pragma [no_]long_calls. */
4740 typedef enum
4742 OFF, /* No #pragma [no_]long_calls is in effect. */
4743 LONG, /* #pragma long_calls is in effect. */
4744 SHORT /* #pragma no_long_calls is in effect. */
4745 } arm_pragma_enum;
4747 static arm_pragma_enum arm_pragma_long_calls = OFF;
4749 void
4750 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4752 arm_pragma_long_calls = LONG;
4755 void
4756 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4758 arm_pragma_long_calls = SHORT;
4761 void
4762 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4764 arm_pragma_long_calls = OFF;
4767 /* Handle an attribute requiring a FUNCTION_DECL;
4768 arguments as in struct attribute_spec.handler. */
4769 static tree
4770 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4771 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4773 if (TREE_CODE (*node) != FUNCTION_DECL)
4775 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4776 name);
4777 *no_add_attrs = true;
4780 return NULL_TREE;
4783 /* Handle an "interrupt" or "isr" attribute;
4784 arguments as in struct attribute_spec.handler. */
4785 static tree
4786 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4787 bool *no_add_attrs)
4789 if (DECL_P (*node))
4791 if (TREE_CODE (*node) != FUNCTION_DECL)
4793 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4794 name);
4795 *no_add_attrs = true;
4797 /* FIXME: the argument if any is checked for type attributes;
4798 should it be checked for decl ones? */
4800 else
4802 if (TREE_CODE (*node) == FUNCTION_TYPE
4803 || TREE_CODE (*node) == METHOD_TYPE)
4805 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4807 warning (OPT_Wattributes, "%qE attribute ignored",
4808 name);
4809 *no_add_attrs = true;
4812 else if (TREE_CODE (*node) == POINTER_TYPE
4813 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4814 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4815 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4817 *node = build_variant_type_copy (*node);
4818 TREE_TYPE (*node) = build_type_attribute_variant
4819 (TREE_TYPE (*node),
4820 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4821 *no_add_attrs = true;
4823 else
4825 /* Possibly pass this attribute on from the type to a decl. */
4826 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4827 | (int) ATTR_FLAG_FUNCTION_NEXT
4828 | (int) ATTR_FLAG_ARRAY_NEXT))
4830 *no_add_attrs = true;
4831 return tree_cons (name, args, NULL_TREE);
4833 else
4835 warning (OPT_Wattributes, "%qE attribute ignored",
4836 name);
4841 return NULL_TREE;
4844 /* Handle a "pcs" attribute; arguments as in struct
4845 attribute_spec.handler. */
4846 static tree
4847 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4848 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4850 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4852 warning (OPT_Wattributes, "%qE attribute ignored", name);
4853 *no_add_attrs = true;
4855 return NULL_TREE;
4858 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4859 /* Handle the "notshared" attribute. This attribute is another way of
4860 requesting hidden visibility. ARM's compiler supports
4861 "__declspec(notshared)"; we support the same thing via an
4862 attribute. */
4864 static tree
4865 arm_handle_notshared_attribute (tree *node,
4866 tree name ATTRIBUTE_UNUSED,
4867 tree args ATTRIBUTE_UNUSED,
4868 int flags ATTRIBUTE_UNUSED,
4869 bool *no_add_attrs)
4871 tree decl = TYPE_NAME (*node);
4873 if (decl)
4875 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4876 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4877 *no_add_attrs = false;
4879 return NULL_TREE;
4881 #endif
4883 /* Return 0 if the attributes for two types are incompatible, 1 if they
4884 are compatible, and 2 if they are nearly compatible (which causes a
4885 warning to be generated). */
4886 static int
4887 arm_comp_type_attributes (const_tree type1, const_tree type2)
4889 int l1, l2, s1, s2;
4891 /* Check for mismatch of non-default calling convention. */
4892 if (TREE_CODE (type1) != FUNCTION_TYPE)
4893 return 1;
4895 /* Check for mismatched call attributes. */
4896 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4897 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4898 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4899 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4901 /* Only bother to check if an attribute is defined. */
4902 if (l1 | l2 | s1 | s2)
4904 /* If one type has an attribute, the other must have the same attribute. */
4905 if ((l1 != l2) || (s1 != s2))
4906 return 0;
4908 /* Disallow mixed attributes. */
4909 if ((l1 & s2) || (l2 & s1))
4910 return 0;
4913 /* Check for mismatched ISR attribute. */
4914 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4915 if (! l1)
4916 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4917 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4918 if (! l2)
4919 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4920 if (l1 != l2)
4921 return 0;
4923 return 1;
4926 /* Assigns default attributes to newly defined type. This is used to
4927 set short_call/long_call attributes for function types of
4928 functions defined inside corresponding #pragma scopes. */
4929 static void
4930 arm_set_default_type_attributes (tree type)
4932 /* Add __attribute__ ((long_call)) to all functions, when
4933 inside #pragma long_calls or __attribute__ ((short_call)),
4934 when inside #pragma no_long_calls. */
4935 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4937 tree type_attr_list, attr_name;
4938 type_attr_list = TYPE_ATTRIBUTES (type);
4940 if (arm_pragma_long_calls == LONG)
4941 attr_name = get_identifier ("long_call");
4942 else if (arm_pragma_long_calls == SHORT)
4943 attr_name = get_identifier ("short_call");
4944 else
4945 return;
4947 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4948 TYPE_ATTRIBUTES (type) = type_attr_list;
4952 /* Return true if DECL is known to be linked into section SECTION. */
4954 static bool
4955 arm_function_in_section_p (tree decl, section *section)
4957 /* We can only be certain about functions defined in the same
4958 compilation unit. */
4959 if (!TREE_STATIC (decl))
4960 return false;
4962 /* Make sure that SYMBOL always binds to the definition in this
4963 compilation unit. */
4964 if (!targetm.binds_local_p (decl))
4965 return false;
4967 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4968 if (!DECL_SECTION_NAME (decl))
4970 /* Make sure that we will not create a unique section for DECL. */
4971 if (flag_function_sections || DECL_ONE_ONLY (decl))
4972 return false;
4975 return function_section (decl) == section;
4978 /* Return nonzero if a 32-bit "long_call" should be generated for
4979 a call from the current function to DECL. We generate a long_call
4980 if the function:
4982 a. has an __attribute__((long call))
4983 or b. is within the scope of a #pragma long_calls
4984 or c. the -mlong-calls command line switch has been specified
4986 However we do not generate a long call if the function:
4988 d. has an __attribute__ ((short_call))
4989 or e. is inside the scope of a #pragma no_long_calls
4990 or f. is defined in the same section as the current function. */
4992 bool
4993 arm_is_long_call_p (tree decl)
4995 tree attrs;
4997 if (!decl)
4998 return TARGET_LONG_CALLS;
5000 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5001 if (lookup_attribute ("short_call", attrs))
5002 return false;
5004 /* For "f", be conservative, and only cater for cases in which the
5005 whole of the current function is placed in the same section. */
5006 if (!flag_reorder_blocks_and_partition
5007 && TREE_CODE (decl) == FUNCTION_DECL
5008 && arm_function_in_section_p (decl, current_function_section ()))
5009 return false;
5011 if (lookup_attribute ("long_call", attrs))
5012 return true;
5014 return TARGET_LONG_CALLS;
5017 /* Return nonzero if it is ok to make a tail-call to DECL. */
5018 static bool
5019 arm_function_ok_for_sibcall (tree decl, tree exp)
5021 unsigned long func_type;
5023 if (cfun->machine->sibcall_blocked)
5024 return false;
5026 /* Never tailcall something for which we have no decl, or if we
5027 are generating code for Thumb-1. */
5028 if (decl == NULL || TARGET_THUMB1)
5029 return false;
5031 /* The PIC register is live on entry to VxWorks PLT entries, so we
5032 must make the call before restoring the PIC register. */
5033 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5034 return false;
5036 /* Cannot tail-call to long calls, since these are out of range of
5037 a branch instruction. */
5038 if (arm_is_long_call_p (decl))
5039 return false;
5041 /* If we are interworking and the function is not declared static
5042 then we can't tail-call it unless we know that it exists in this
5043 compilation unit (since it might be a Thumb routine). */
5044 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5045 return false;
5047 func_type = arm_current_func_type ();
5048 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5049 if (IS_INTERRUPT (func_type))
5050 return false;
5052 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5054 /* Check that the return value locations are the same. For
5055 example that we aren't returning a value from the sibling in
5056 a VFP register but then need to transfer it to a core
5057 register. */
5058 rtx a, b;
5060 a = arm_function_value (TREE_TYPE (exp), decl, false);
5061 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5062 cfun->decl, false);
5063 if (!rtx_equal_p (a, b))
5064 return false;
5067 /* Never tailcall if function may be called with a misaligned SP. */
5068 if (IS_STACKALIGN (func_type))
5069 return false;
5071 /* Everything else is ok. */
5072 return true;
5076 /* Addressing mode support functions. */
5078 /* Return nonzero if X is a legitimate immediate operand when compiling
5079 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5081 legitimate_pic_operand_p (rtx x)
5083 if (GET_CODE (x) == SYMBOL_REF
5084 || (GET_CODE (x) == CONST
5085 && GET_CODE (XEXP (x, 0)) == PLUS
5086 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5087 return 0;
5089 return 1;
5092 /* Record that the current function needs a PIC register. Initialize
5093 cfun->machine->pic_reg if we have not already done so. */
5095 static void
5096 require_pic_register (void)
5098 /* A lot of the logic here is made obscure by the fact that this
5099 routine gets called as part of the rtx cost estimation process.
5100 We don't want those calls to affect any assumptions about the real
5101 function; and further, we can't call entry_of_function() until we
5102 start the real expansion process. */
5103 if (!crtl->uses_pic_offset_table)
5105 gcc_assert (can_create_pseudo_p ());
5106 if (arm_pic_register != INVALID_REGNUM)
5108 if (!cfun->machine->pic_reg)
5109 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5111 /* Play games to avoid marking the function as needing pic
5112 if we are being called as part of the cost-estimation
5113 process. */
5114 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5115 crtl->uses_pic_offset_table = 1;
5117 else
5119 rtx seq;
5121 if (!cfun->machine->pic_reg)
5122 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5124 /* Play games to avoid marking the function as needing pic
5125 if we are being called as part of the cost-estimation
5126 process. */
5127 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5129 crtl->uses_pic_offset_table = 1;
5130 start_sequence ();
5132 arm_load_pic_register (0UL);
5134 seq = get_insns ();
5135 end_sequence ();
5136 /* We can be called during expansion of PHI nodes, where
5137 we can't yet emit instructions directly in the final
5138 insn stream. Queue the insns on the entry edge, they will
5139 be committed after everything else is expanded. */
5140 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5147 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5149 if (GET_CODE (orig) == SYMBOL_REF
5150 || GET_CODE (orig) == LABEL_REF)
5152 rtx insn;
5154 if (reg == 0)
5156 gcc_assert (can_create_pseudo_p ());
5157 reg = gen_reg_rtx (Pmode);
5160 /* VxWorks does not impose a fixed gap between segments; the run-time
5161 gap can be different from the object-file gap. We therefore can't
5162 use GOTOFF unless we are absolutely sure that the symbol is in the
5163 same segment as the GOT. Unfortunately, the flexibility of linker
5164 scripts means that we can't be sure of that in general, so assume
5165 that GOTOFF is never valid on VxWorks. */
5166 if ((GET_CODE (orig) == LABEL_REF
5167 || (GET_CODE (orig) == SYMBOL_REF &&
5168 SYMBOL_REF_LOCAL_P (orig)))
5169 && NEED_GOT_RELOC
5170 && !TARGET_VXWORKS_RTP)
5171 insn = arm_pic_static_addr (orig, reg);
5172 else
5174 rtx pat;
5175 rtx mem;
5177 /* If this function doesn't have a pic register, create one now. */
5178 require_pic_register ();
5180 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5182 /* Make the MEM as close to a constant as possible. */
5183 mem = SET_SRC (pat);
5184 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5185 MEM_READONLY_P (mem) = 1;
5186 MEM_NOTRAP_P (mem) = 1;
5188 insn = emit_insn (pat);
5191 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5192 by loop. */
5193 set_unique_reg_note (insn, REG_EQUAL, orig);
5195 return reg;
5197 else if (GET_CODE (orig) == CONST)
5199 rtx base, offset;
5201 if (GET_CODE (XEXP (orig, 0)) == PLUS
5202 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5203 return orig;
5205 /* Handle the case where we have: const (UNSPEC_TLS). */
5206 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5207 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5208 return orig;
5210 /* Handle the case where we have:
5211 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5212 CONST_INT. */
5213 if (GET_CODE (XEXP (orig, 0)) == PLUS
5214 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5215 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5217 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5218 return orig;
5221 if (reg == 0)
5223 gcc_assert (can_create_pseudo_p ());
5224 reg = gen_reg_rtx (Pmode);
5227 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5229 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5230 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5231 base == reg ? 0 : reg);
5233 if (GET_CODE (offset) == CONST_INT)
5235 /* The base register doesn't really matter, we only want to
5236 test the index for the appropriate mode. */
5237 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5239 gcc_assert (can_create_pseudo_p ());
5240 offset = force_reg (Pmode, offset);
5243 if (GET_CODE (offset) == CONST_INT)
5244 return plus_constant (base, INTVAL (offset));
5247 if (GET_MODE_SIZE (mode) > 4
5248 && (GET_MODE_CLASS (mode) == MODE_INT
5249 || TARGET_SOFT_FLOAT))
5251 emit_insn (gen_addsi3 (reg, base, offset));
5252 return reg;
5255 return gen_rtx_PLUS (Pmode, base, offset);
5258 return orig;
5262 /* Find a spare register to use during the prolog of a function. */
5264 static int
5265 thumb_find_work_register (unsigned long pushed_regs_mask)
5267 int reg;
5269 /* Check the argument registers first as these are call-used. The
5270 register allocation order means that sometimes r3 might be used
5271 but earlier argument registers might not, so check them all. */
5272 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5273 if (!df_regs_ever_live_p (reg))
5274 return reg;
5276 /* Before going on to check the call-saved registers we can try a couple
5277 more ways of deducing that r3 is available. The first is when we are
5278 pushing anonymous arguments onto the stack and we have less than 4
5279 registers worth of fixed arguments(*). In this case r3 will be part of
5280 the variable argument list and so we can be sure that it will be
5281 pushed right at the start of the function. Hence it will be available
5282 for the rest of the prologue.
5283 (*): ie crtl->args.pretend_args_size is greater than 0. */
5284 if (cfun->machine->uses_anonymous_args
5285 && crtl->args.pretend_args_size > 0)
5286 return LAST_ARG_REGNUM;
5288 /* The other case is when we have fixed arguments but less than 4 registers
5289 worth. In this case r3 might be used in the body of the function, but
5290 it is not being used to convey an argument into the function. In theory
5291 we could just check crtl->args.size to see how many bytes are
5292 being passed in argument registers, but it seems that it is unreliable.
5293 Sometimes it will have the value 0 when in fact arguments are being
5294 passed. (See testcase execute/20021111-1.c for an example). So we also
5295 check the args_info.nregs field as well. The problem with this field is
5296 that it makes no allowances for arguments that are passed to the
5297 function but which are not used. Hence we could miss an opportunity
5298 when a function has an unused argument in r3. But it is better to be
5299 safe than to be sorry. */
5300 if (! cfun->machine->uses_anonymous_args
5301 && crtl->args.size >= 0
5302 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5303 && crtl->args.info.nregs < 4)
5304 return LAST_ARG_REGNUM;
5306 /* Otherwise look for a call-saved register that is going to be pushed. */
5307 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5308 if (pushed_regs_mask & (1 << reg))
5309 return reg;
5311 if (TARGET_THUMB2)
5313 /* Thumb-2 can use high regs. */
5314 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5315 if (pushed_regs_mask & (1 << reg))
5316 return reg;
5318 /* Something went wrong - thumb_compute_save_reg_mask()
5319 should have arranged for a suitable register to be pushed. */
5320 gcc_unreachable ();
5323 static GTY(()) int pic_labelno;
5325 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5326 low register. */
5328 void
5329 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5331 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5333 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5334 return;
5336 gcc_assert (flag_pic);
5338 pic_reg = cfun->machine->pic_reg;
5339 if (TARGET_VXWORKS_RTP)
5341 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5342 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5343 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5345 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5347 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5348 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5350 else
5352 /* We use an UNSPEC rather than a LABEL_REF because this label
5353 never appears in the code stream. */
5355 labelno = GEN_INT (pic_labelno++);
5356 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5357 l1 = gen_rtx_CONST (VOIDmode, l1);
5359 /* On the ARM the PC register contains 'dot + 8' at the time of the
5360 addition, on the Thumb it is 'dot + 4'. */
5361 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5362 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5363 UNSPEC_GOTSYM_OFF);
5364 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5366 if (TARGET_32BIT)
5368 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5369 if (TARGET_ARM)
5370 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5371 else
5372 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5374 else /* TARGET_THUMB1 */
5376 if (arm_pic_register != INVALID_REGNUM
5377 && REGNO (pic_reg) > LAST_LO_REGNUM)
5379 /* We will have pushed the pic register, so we should always be
5380 able to find a work register. */
5381 pic_tmp = gen_rtx_REG (SImode,
5382 thumb_find_work_register (saved_regs));
5383 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5384 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5386 else
5387 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5388 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5392 /* Need to emit this whether or not we obey regdecls,
5393 since setjmp/longjmp can cause life info to screw up. */
5394 emit_use (pic_reg);
5397 /* Generate code to load the address of a static var when flag_pic is set. */
5398 static rtx
5399 arm_pic_static_addr (rtx orig, rtx reg)
5401 rtx l1, labelno, offset_rtx, insn;
5403 gcc_assert (flag_pic);
5405 /* We use an UNSPEC rather than a LABEL_REF because this label
5406 never appears in the code stream. */
5407 labelno = GEN_INT (pic_labelno++);
5408 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5409 l1 = gen_rtx_CONST (VOIDmode, l1);
5411 /* On the ARM the PC register contains 'dot + 8' at the time of the
5412 addition, on the Thumb it is 'dot + 4'. */
5413 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5414 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5415 UNSPEC_SYMBOL_OFFSET);
5416 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5418 if (TARGET_32BIT)
5420 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5421 if (TARGET_ARM)
5422 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5423 else
5424 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5426 else /* TARGET_THUMB1 */
5428 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5429 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5432 return insn;
5435 /* Return nonzero if X is valid as an ARM state addressing register. */
5436 static int
5437 arm_address_register_rtx_p (rtx x, int strict_p)
5439 int regno;
5441 if (GET_CODE (x) != REG)
5442 return 0;
5444 regno = REGNO (x);
5446 if (strict_p)
5447 return ARM_REGNO_OK_FOR_BASE_P (regno);
5449 return (regno <= LAST_ARM_REGNUM
5450 || regno >= FIRST_PSEUDO_REGISTER
5451 || regno == FRAME_POINTER_REGNUM
5452 || regno == ARG_POINTER_REGNUM);
5455 /* Return TRUE if this rtx is the difference of a symbol and a label,
5456 and will reduce to a PC-relative relocation in the object file.
5457 Expressions like this can be left alone when generating PIC, rather
5458 than forced through the GOT. */
5459 static int
5460 pcrel_constant_p (rtx x)
5462 if (GET_CODE (x) == MINUS)
5463 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5465 return FALSE;
5468 /* Return true if X will surely end up in an index register after next
5469 splitting pass. */
5470 static bool
5471 will_be_in_index_register (const_rtx x)
5473 /* arm.md: calculate_pic_address will split this into a register. */
5474 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5477 /* Return nonzero if X is a valid ARM state address operand. */
5479 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5480 int strict_p)
5482 bool use_ldrd;
5483 enum rtx_code code = GET_CODE (x);
5485 if (arm_address_register_rtx_p (x, strict_p))
5486 return 1;
5488 use_ldrd = (TARGET_LDRD
5489 && (mode == DImode
5490 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5492 if (code == POST_INC || code == PRE_DEC
5493 || ((code == PRE_INC || code == POST_DEC)
5494 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5495 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5497 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5498 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5499 && GET_CODE (XEXP (x, 1)) == PLUS
5500 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5502 rtx addend = XEXP (XEXP (x, 1), 1);
5504 /* Don't allow ldrd post increment by register because it's hard
5505 to fixup invalid register choices. */
5506 if (use_ldrd
5507 && GET_CODE (x) == POST_MODIFY
5508 && GET_CODE (addend) == REG)
5509 return 0;
5511 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5512 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5515 /* After reload constants split into minipools will have addresses
5516 from a LABEL_REF. */
5517 else if (reload_completed
5518 && (code == LABEL_REF
5519 || (code == CONST
5520 && GET_CODE (XEXP (x, 0)) == PLUS
5521 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5522 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5523 return 1;
5525 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5526 return 0;
5528 else if (code == PLUS)
5530 rtx xop0 = XEXP (x, 0);
5531 rtx xop1 = XEXP (x, 1);
5533 return ((arm_address_register_rtx_p (xop0, strict_p)
5534 && ((GET_CODE(xop1) == CONST_INT
5535 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5536 || (!strict_p && will_be_in_index_register (xop1))))
5537 || (arm_address_register_rtx_p (xop1, strict_p)
5538 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5541 #if 0
5542 /* Reload currently can't handle MINUS, so disable this for now */
5543 else if (GET_CODE (x) == MINUS)
5545 rtx xop0 = XEXP (x, 0);
5546 rtx xop1 = XEXP (x, 1);
5548 return (arm_address_register_rtx_p (xop0, strict_p)
5549 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5551 #endif
5553 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5554 && code == SYMBOL_REF
5555 && CONSTANT_POOL_ADDRESS_P (x)
5556 && ! (flag_pic
5557 && symbol_mentioned_p (get_pool_constant (x))
5558 && ! pcrel_constant_p (get_pool_constant (x))))
5559 return 1;
5561 return 0;
5564 /* Return nonzero if X is a valid Thumb-2 address operand. */
5565 static int
5566 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5568 bool use_ldrd;
5569 enum rtx_code code = GET_CODE (x);
5571 if (arm_address_register_rtx_p (x, strict_p))
5572 return 1;
5574 use_ldrd = (TARGET_LDRD
5575 && (mode == DImode
5576 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5578 if (code == POST_INC || code == PRE_DEC
5579 || ((code == PRE_INC || code == POST_DEC)
5580 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5581 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5583 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5584 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5585 && GET_CODE (XEXP (x, 1)) == PLUS
5586 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5588 /* Thumb-2 only has autoincrement by constant. */
5589 rtx addend = XEXP (XEXP (x, 1), 1);
5590 HOST_WIDE_INT offset;
5592 if (GET_CODE (addend) != CONST_INT)
5593 return 0;
5595 offset = INTVAL(addend);
5596 if (GET_MODE_SIZE (mode) <= 4)
5597 return (offset > -256 && offset < 256);
5599 return (use_ldrd && offset > -1024 && offset < 1024
5600 && (offset & 3) == 0);
5603 /* After reload constants split into minipools will have addresses
5604 from a LABEL_REF. */
5605 else if (reload_completed
5606 && (code == LABEL_REF
5607 || (code == CONST
5608 && GET_CODE (XEXP (x, 0)) == PLUS
5609 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5610 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5611 return 1;
5613 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5614 return 0;
5616 else if (code == PLUS)
5618 rtx xop0 = XEXP (x, 0);
5619 rtx xop1 = XEXP (x, 1);
5621 return ((arm_address_register_rtx_p (xop0, strict_p)
5622 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5623 || (!strict_p && will_be_in_index_register (xop1))))
5624 || (arm_address_register_rtx_p (xop1, strict_p)
5625 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5628 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5629 && code == SYMBOL_REF
5630 && CONSTANT_POOL_ADDRESS_P (x)
5631 && ! (flag_pic
5632 && symbol_mentioned_p (get_pool_constant (x))
5633 && ! pcrel_constant_p (get_pool_constant (x))))
5634 return 1;
5636 return 0;
5639 /* Return nonzero if INDEX is valid for an address index operand in
5640 ARM state. */
5641 static int
5642 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5643 int strict_p)
5645 HOST_WIDE_INT range;
5646 enum rtx_code code = GET_CODE (index);
5648 /* Standard coprocessor addressing modes. */
5649 if (TARGET_HARD_FLOAT
5650 && (TARGET_FPA || TARGET_MAVERICK)
5651 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5652 || (TARGET_MAVERICK && mode == DImode)))
5653 return (code == CONST_INT && INTVAL (index) < 1024
5654 && INTVAL (index) > -1024
5655 && (INTVAL (index) & 3) == 0);
5657 /* For quad modes, we restrict the constant offset to be slightly less
5658 than what the instruction format permits. We do this because for
5659 quad mode moves, we will actually decompose them into two separate
5660 double-mode reads or writes. INDEX must therefore be a valid
5661 (double-mode) offset and so should INDEX+8. */
5662 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5663 return (code == CONST_INT
5664 && INTVAL (index) < 1016
5665 && INTVAL (index) > -1024
5666 && (INTVAL (index) & 3) == 0);
5668 /* We have no such constraint on double mode offsets, so we permit the
5669 full range of the instruction format. */
5670 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5671 return (code == CONST_INT
5672 && INTVAL (index) < 1024
5673 && INTVAL (index) > -1024
5674 && (INTVAL (index) & 3) == 0);
5676 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5677 return (code == CONST_INT
5678 && INTVAL (index) < 1024
5679 && INTVAL (index) > -1024
5680 && (INTVAL (index) & 3) == 0);
5682 if (arm_address_register_rtx_p (index, strict_p)
5683 && (GET_MODE_SIZE (mode) <= 4))
5684 return 1;
5686 if (mode == DImode || mode == DFmode)
5688 if (code == CONST_INT)
5690 HOST_WIDE_INT val = INTVAL (index);
5692 if (TARGET_LDRD)
5693 return val > -256 && val < 256;
5694 else
5695 return val > -4096 && val < 4092;
5698 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5701 if (GET_MODE_SIZE (mode) <= 4
5702 && ! (arm_arch4
5703 && (mode == HImode
5704 || mode == HFmode
5705 || (mode == QImode && outer == SIGN_EXTEND))))
5707 if (code == MULT)
5709 rtx xiop0 = XEXP (index, 0);
5710 rtx xiop1 = XEXP (index, 1);
5712 return ((arm_address_register_rtx_p (xiop0, strict_p)
5713 && power_of_two_operand (xiop1, SImode))
5714 || (arm_address_register_rtx_p (xiop1, strict_p)
5715 && power_of_two_operand (xiop0, SImode)));
5717 else if (code == LSHIFTRT || code == ASHIFTRT
5718 || code == ASHIFT || code == ROTATERT)
5720 rtx op = XEXP (index, 1);
5722 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5723 && GET_CODE (op) == CONST_INT
5724 && INTVAL (op) > 0
5725 && INTVAL (op) <= 31);
5729 /* For ARM v4 we may be doing a sign-extend operation during the
5730 load. */
5731 if (arm_arch4)
5733 if (mode == HImode
5734 || mode == HFmode
5735 || (outer == SIGN_EXTEND && mode == QImode))
5736 range = 256;
5737 else
5738 range = 4096;
5740 else
5741 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5743 return (code == CONST_INT
5744 && INTVAL (index) < range
5745 && INTVAL (index) > -range);
5748 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5749 index operand. i.e. 1, 2, 4 or 8. */
5750 static bool
5751 thumb2_index_mul_operand (rtx op)
5753 HOST_WIDE_INT val;
5755 if (GET_CODE(op) != CONST_INT)
5756 return false;
5758 val = INTVAL(op);
5759 return (val == 1 || val == 2 || val == 4 || val == 8);
5762 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5763 static int
5764 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5766 enum rtx_code code = GET_CODE (index);
5768 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5769 /* Standard coprocessor addressing modes. */
5770 if (TARGET_HARD_FLOAT
5771 && (TARGET_FPA || TARGET_MAVERICK)
5772 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5773 || (TARGET_MAVERICK && mode == DImode)))
5774 return (code == CONST_INT && INTVAL (index) < 1024
5775 && INTVAL (index) > -1024
5776 && (INTVAL (index) & 3) == 0);
5778 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5780 /* For DImode assume values will usually live in core regs
5781 and only allow LDRD addressing modes. */
5782 if (!TARGET_LDRD || mode != DImode)
5783 return (code == CONST_INT
5784 && INTVAL (index) < 1024
5785 && INTVAL (index) > -1024
5786 && (INTVAL (index) & 3) == 0);
5789 /* For quad modes, we restrict the constant offset to be slightly less
5790 than what the instruction format permits. We do this because for
5791 quad mode moves, we will actually decompose them into two separate
5792 double-mode reads or writes. INDEX must therefore be a valid
5793 (double-mode) offset and so should INDEX+8. */
5794 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5795 return (code == CONST_INT
5796 && INTVAL (index) < 1016
5797 && INTVAL (index) > -1024
5798 && (INTVAL (index) & 3) == 0);
5800 /* We have no such constraint on double mode offsets, so we permit the
5801 full range of the instruction format. */
5802 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5803 return (code == CONST_INT
5804 && INTVAL (index) < 1024
5805 && INTVAL (index) > -1024
5806 && (INTVAL (index) & 3) == 0);
5808 if (arm_address_register_rtx_p (index, strict_p)
5809 && (GET_MODE_SIZE (mode) <= 4))
5810 return 1;
5812 if (mode == DImode || mode == DFmode)
5814 if (code == CONST_INT)
5816 HOST_WIDE_INT val = INTVAL (index);
5817 /* ??? Can we assume ldrd for thumb2? */
5818 /* Thumb-2 ldrd only has reg+const addressing modes. */
5819 /* ldrd supports offsets of +-1020.
5820 However the ldr fallback does not. */
5821 return val > -256 && val < 256 && (val & 3) == 0;
5823 else
5824 return 0;
5827 if (code == MULT)
5829 rtx xiop0 = XEXP (index, 0);
5830 rtx xiop1 = XEXP (index, 1);
5832 return ((arm_address_register_rtx_p (xiop0, strict_p)
5833 && thumb2_index_mul_operand (xiop1))
5834 || (arm_address_register_rtx_p (xiop1, strict_p)
5835 && thumb2_index_mul_operand (xiop0)));
5837 else if (code == ASHIFT)
5839 rtx op = XEXP (index, 1);
5841 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5842 && GET_CODE (op) == CONST_INT
5843 && INTVAL (op) > 0
5844 && INTVAL (op) <= 3);
5847 return (code == CONST_INT
5848 && INTVAL (index) < 4096
5849 && INTVAL (index) > -256);
5852 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5853 static int
5854 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5856 int regno;
5858 if (GET_CODE (x) != REG)
5859 return 0;
5861 regno = REGNO (x);
5863 if (strict_p)
5864 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5866 return (regno <= LAST_LO_REGNUM
5867 || regno > LAST_VIRTUAL_REGISTER
5868 || regno == FRAME_POINTER_REGNUM
5869 || (GET_MODE_SIZE (mode) >= 4
5870 && (regno == STACK_POINTER_REGNUM
5871 || regno >= FIRST_PSEUDO_REGISTER
5872 || x == hard_frame_pointer_rtx
5873 || x == arg_pointer_rtx)));
5876 /* Return nonzero if x is a legitimate index register. This is the case
5877 for any base register that can access a QImode object. */
5878 inline static int
5879 thumb1_index_register_rtx_p (rtx x, int strict_p)
5881 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5884 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5886 The AP may be eliminated to either the SP or the FP, so we use the
5887 least common denominator, e.g. SImode, and offsets from 0 to 64.
5889 ??? Verify whether the above is the right approach.
5891 ??? Also, the FP may be eliminated to the SP, so perhaps that
5892 needs special handling also.
5894 ??? Look at how the mips16 port solves this problem. It probably uses
5895 better ways to solve some of these problems.
5897 Although it is not incorrect, we don't accept QImode and HImode
5898 addresses based on the frame pointer or arg pointer until the
5899 reload pass starts. This is so that eliminating such addresses
5900 into stack based ones won't produce impossible code. */
5901 static int
5902 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5904 /* ??? Not clear if this is right. Experiment. */
5905 if (GET_MODE_SIZE (mode) < 4
5906 && !(reload_in_progress || reload_completed)
5907 && (reg_mentioned_p (frame_pointer_rtx, x)
5908 || reg_mentioned_p (arg_pointer_rtx, x)
5909 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5910 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5911 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5912 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5913 return 0;
5915 /* Accept any base register. SP only in SImode or larger. */
5916 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5917 return 1;
5919 /* This is PC relative data before arm_reorg runs. */
5920 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5921 && GET_CODE (x) == SYMBOL_REF
5922 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5923 return 1;
5925 /* This is PC relative data after arm_reorg runs. */
5926 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5927 && reload_completed
5928 && (GET_CODE (x) == LABEL_REF
5929 || (GET_CODE (x) == CONST
5930 && GET_CODE (XEXP (x, 0)) == PLUS
5931 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5932 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5933 return 1;
5935 /* Post-inc indexing only supported for SImode and larger. */
5936 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5937 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5938 return 1;
5940 else if (GET_CODE (x) == PLUS)
5942 /* REG+REG address can be any two index registers. */
5943 /* We disallow FRAME+REG addressing since we know that FRAME
5944 will be replaced with STACK, and SP relative addressing only
5945 permits SP+OFFSET. */
5946 if (GET_MODE_SIZE (mode) <= 4
5947 && XEXP (x, 0) != frame_pointer_rtx
5948 && XEXP (x, 1) != frame_pointer_rtx
5949 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5950 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5951 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5952 return 1;
5954 /* REG+const has 5-7 bit offset for non-SP registers. */
5955 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5956 || XEXP (x, 0) == arg_pointer_rtx)
5957 && GET_CODE (XEXP (x, 1)) == CONST_INT
5958 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5959 return 1;
5961 /* REG+const has 10-bit offset for SP, but only SImode and
5962 larger is supported. */
5963 /* ??? Should probably check for DI/DFmode overflow here
5964 just like GO_IF_LEGITIMATE_OFFSET does. */
5965 else if (GET_CODE (XEXP (x, 0)) == REG
5966 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5967 && GET_MODE_SIZE (mode) >= 4
5968 && GET_CODE (XEXP (x, 1)) == CONST_INT
5969 && INTVAL (XEXP (x, 1)) >= 0
5970 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5971 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5972 return 1;
5974 else if (GET_CODE (XEXP (x, 0)) == REG
5975 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5976 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5977 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5978 && REGNO (XEXP (x, 0))
5979 <= LAST_VIRTUAL_POINTER_REGISTER))
5980 && GET_MODE_SIZE (mode) >= 4
5981 && GET_CODE (XEXP (x, 1)) == CONST_INT
5982 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5983 return 1;
5986 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5987 && GET_MODE_SIZE (mode) == 4
5988 && GET_CODE (x) == SYMBOL_REF
5989 && CONSTANT_POOL_ADDRESS_P (x)
5990 && ! (flag_pic
5991 && symbol_mentioned_p (get_pool_constant (x))
5992 && ! pcrel_constant_p (get_pool_constant (x))))
5993 return 1;
5995 return 0;
5998 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5999 instruction of mode MODE. */
6001 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6003 switch (GET_MODE_SIZE (mode))
6005 case 1:
6006 return val >= 0 && val < 32;
6008 case 2:
6009 return val >= 0 && val < 64 && (val & 1) == 0;
6011 default:
6012 return (val >= 0
6013 && (val + GET_MODE_SIZE (mode)) <= 128
6014 && (val & 3) == 0);
6018 bool
6019 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6021 if (TARGET_ARM)
6022 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6023 else if (TARGET_THUMB2)
6024 return thumb2_legitimate_address_p (mode, x, strict_p);
6025 else /* if (TARGET_THUMB1) */
6026 return thumb1_legitimate_address_p (mode, x, strict_p);
6029 /* Build the SYMBOL_REF for __tls_get_addr. */
6031 static GTY(()) rtx tls_get_addr_libfunc;
6033 static rtx
6034 get_tls_get_addr (void)
6036 if (!tls_get_addr_libfunc)
6037 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6038 return tls_get_addr_libfunc;
6041 static rtx
6042 arm_load_tp (rtx target)
6044 if (!target)
6045 target = gen_reg_rtx (SImode);
6047 if (TARGET_HARD_TP)
6049 /* Can return in any reg. */
6050 emit_insn (gen_load_tp_hard (target));
6052 else
6054 /* Always returned in r0. Immediately copy the result into a pseudo,
6055 otherwise other uses of r0 (e.g. setting up function arguments) may
6056 clobber the value. */
6058 rtx tmp;
6060 emit_insn (gen_load_tp_soft ());
6062 tmp = gen_rtx_REG (SImode, 0);
6063 emit_move_insn (target, tmp);
6065 return target;
6068 static rtx
6069 load_tls_operand (rtx x, rtx reg)
6071 rtx tmp;
6073 if (reg == NULL_RTX)
6074 reg = gen_reg_rtx (SImode);
6076 tmp = gen_rtx_CONST (SImode, x);
6078 emit_move_insn (reg, tmp);
6080 return reg;
6083 static rtx
6084 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6086 rtx insns, label, labelno, sum;
6088 start_sequence ();
6090 labelno = GEN_INT (pic_labelno++);
6091 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6092 label = gen_rtx_CONST (VOIDmode, label);
6094 sum = gen_rtx_UNSPEC (Pmode,
6095 gen_rtvec (4, x, GEN_INT (reloc), label,
6096 GEN_INT (TARGET_ARM ? 8 : 4)),
6097 UNSPEC_TLS);
6098 reg = load_tls_operand (sum, reg);
6100 if (TARGET_ARM)
6101 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6102 else if (TARGET_THUMB2)
6103 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6104 else /* TARGET_THUMB1 */
6105 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6107 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6108 Pmode, 1, reg, Pmode);
6110 insns = get_insns ();
6111 end_sequence ();
6113 return insns;
6117 legitimize_tls_address (rtx x, rtx reg)
6119 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6120 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6122 switch (model)
6124 case TLS_MODEL_GLOBAL_DYNAMIC:
6125 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6126 dest = gen_reg_rtx (Pmode);
6127 emit_libcall_block (insns, dest, ret, x);
6128 return dest;
6130 case TLS_MODEL_LOCAL_DYNAMIC:
6131 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6133 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6134 share the LDM result with other LD model accesses. */
6135 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6136 UNSPEC_TLS);
6137 dest = gen_reg_rtx (Pmode);
6138 emit_libcall_block (insns, dest, ret, eqv);
6140 /* Load the addend. */
6141 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6142 UNSPEC_TLS);
6143 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6144 return gen_rtx_PLUS (Pmode, dest, addend);
6146 case TLS_MODEL_INITIAL_EXEC:
6147 labelno = GEN_INT (pic_labelno++);
6148 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6149 label = gen_rtx_CONST (VOIDmode, label);
6150 sum = gen_rtx_UNSPEC (Pmode,
6151 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6152 GEN_INT (TARGET_ARM ? 8 : 4)),
6153 UNSPEC_TLS);
6154 reg = load_tls_operand (sum, reg);
6156 if (TARGET_ARM)
6157 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6158 else if (TARGET_THUMB2)
6159 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6160 else
6162 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6163 emit_move_insn (reg, gen_const_mem (SImode, reg));
6166 tp = arm_load_tp (NULL_RTX);
6168 return gen_rtx_PLUS (Pmode, tp, reg);
6170 case TLS_MODEL_LOCAL_EXEC:
6171 tp = arm_load_tp (NULL_RTX);
6173 reg = gen_rtx_UNSPEC (Pmode,
6174 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6175 UNSPEC_TLS);
6176 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6178 return gen_rtx_PLUS (Pmode, tp, reg);
6180 default:
6181 abort ();
6185 /* Try machine-dependent ways of modifying an illegitimate address
6186 to be legitimate. If we find one, return the new, valid address. */
6188 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6190 if (!TARGET_ARM)
6192 /* TODO: legitimize_address for Thumb2. */
6193 if (TARGET_THUMB2)
6194 return x;
6195 return thumb_legitimize_address (x, orig_x, mode);
6198 if (arm_tls_symbol_p (x))
6199 return legitimize_tls_address (x, NULL_RTX);
6201 if (GET_CODE (x) == PLUS)
6203 rtx xop0 = XEXP (x, 0);
6204 rtx xop1 = XEXP (x, 1);
6206 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6207 xop0 = force_reg (SImode, xop0);
6209 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6210 xop1 = force_reg (SImode, xop1);
6212 if (ARM_BASE_REGISTER_RTX_P (xop0)
6213 && GET_CODE (xop1) == CONST_INT)
6215 HOST_WIDE_INT n, low_n;
6216 rtx base_reg, val;
6217 n = INTVAL (xop1);
6219 /* VFP addressing modes actually allow greater offsets, but for
6220 now we just stick with the lowest common denominator. */
6221 if (mode == DImode
6222 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6224 low_n = n & 0x0f;
6225 n &= ~0x0f;
6226 if (low_n > 4)
6228 n += 16;
6229 low_n -= 16;
6232 else
6234 low_n = ((mode) == TImode ? 0
6235 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6236 n -= low_n;
6239 base_reg = gen_reg_rtx (SImode);
6240 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6241 emit_move_insn (base_reg, val);
6242 x = plus_constant (base_reg, low_n);
6244 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6245 x = gen_rtx_PLUS (SImode, xop0, xop1);
6248 /* XXX We don't allow MINUS any more -- see comment in
6249 arm_legitimate_address_outer_p (). */
6250 else if (GET_CODE (x) == MINUS)
6252 rtx xop0 = XEXP (x, 0);
6253 rtx xop1 = XEXP (x, 1);
6255 if (CONSTANT_P (xop0))
6256 xop0 = force_reg (SImode, xop0);
6258 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6259 xop1 = force_reg (SImode, xop1);
6261 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6262 x = gen_rtx_MINUS (SImode, xop0, xop1);
6265 /* Make sure to take full advantage of the pre-indexed addressing mode
6266 with absolute addresses which often allows for the base register to
6267 be factorized for multiple adjacent memory references, and it might
6268 even allows for the mini pool to be avoided entirely. */
6269 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6271 unsigned int bits;
6272 HOST_WIDE_INT mask, base, index;
6273 rtx base_reg;
6275 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6276 use a 8-bit index. So let's use a 12-bit index for SImode only and
6277 hope that arm_gen_constant will enable ldrb to use more bits. */
6278 bits = (mode == SImode) ? 12 : 8;
6279 mask = (1 << bits) - 1;
6280 base = INTVAL (x) & ~mask;
6281 index = INTVAL (x) & mask;
6282 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6284 /* It'll most probably be more efficient to generate the base
6285 with more bits set and use a negative index instead. */
6286 base |= mask;
6287 index -= mask;
6289 base_reg = force_reg (SImode, GEN_INT (base));
6290 x = plus_constant (base_reg, index);
6293 if (flag_pic)
6295 /* We need to find and carefully transform any SYMBOL and LABEL
6296 references; so go back to the original address expression. */
6297 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6299 if (new_x != orig_x)
6300 x = new_x;
6303 return x;
6307 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6308 to be legitimate. If we find one, return the new, valid address. */
6310 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6312 if (arm_tls_symbol_p (x))
6313 return legitimize_tls_address (x, NULL_RTX);
6315 if (GET_CODE (x) == PLUS
6316 && GET_CODE (XEXP (x, 1)) == CONST_INT
6317 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6318 || INTVAL (XEXP (x, 1)) < 0))
6320 rtx xop0 = XEXP (x, 0);
6321 rtx xop1 = XEXP (x, 1);
6322 HOST_WIDE_INT offset = INTVAL (xop1);
6324 /* Try and fold the offset into a biasing of the base register and
6325 then offsetting that. Don't do this when optimizing for space
6326 since it can cause too many CSEs. */
6327 if (optimize_size && offset >= 0
6328 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6330 HOST_WIDE_INT delta;
6332 if (offset >= 256)
6333 delta = offset - (256 - GET_MODE_SIZE (mode));
6334 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6335 delta = 31 * GET_MODE_SIZE (mode);
6336 else
6337 delta = offset & (~31 * GET_MODE_SIZE (mode));
6339 xop0 = force_operand (plus_constant (xop0, offset - delta),
6340 NULL_RTX);
6341 x = plus_constant (xop0, delta);
6343 else if (offset < 0 && offset > -256)
6344 /* Small negative offsets are best done with a subtract before the
6345 dereference, forcing these into a register normally takes two
6346 instructions. */
6347 x = force_operand (x, NULL_RTX);
6348 else
6350 /* For the remaining cases, force the constant into a register. */
6351 xop1 = force_reg (SImode, xop1);
6352 x = gen_rtx_PLUS (SImode, xop0, xop1);
6355 else if (GET_CODE (x) == PLUS
6356 && s_register_operand (XEXP (x, 1), SImode)
6357 && !s_register_operand (XEXP (x, 0), SImode))
6359 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6361 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6364 if (flag_pic)
6366 /* We need to find and carefully transform any SYMBOL and LABEL
6367 references; so go back to the original address expression. */
6368 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6370 if (new_x != orig_x)
6371 x = new_x;
6374 return x;
6378 thumb_legitimize_reload_address (rtx *x_p,
6379 enum machine_mode mode,
6380 int opnum, int type,
6381 int ind_levels ATTRIBUTE_UNUSED)
6383 rtx x = *x_p;
6385 if (GET_CODE (x) == PLUS
6386 && GET_MODE_SIZE (mode) < 4
6387 && REG_P (XEXP (x, 0))
6388 && XEXP (x, 0) == stack_pointer_rtx
6389 && GET_CODE (XEXP (x, 1)) == CONST_INT
6390 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6392 rtx orig_x = x;
6394 x = copy_rtx (x);
6395 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6396 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6397 return x;
6400 /* If both registers are hi-regs, then it's better to reload the
6401 entire expression rather than each register individually. That
6402 only requires one reload register rather than two. */
6403 if (GET_CODE (x) == PLUS
6404 && REG_P (XEXP (x, 0))
6405 && REG_P (XEXP (x, 1))
6406 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6407 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6409 rtx orig_x = x;
6411 x = copy_rtx (x);
6412 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6413 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6414 return x;
6417 return NULL;
6420 /* Test for various thread-local symbols. */
6422 /* Return TRUE if X is a thread-local symbol. */
6424 static bool
6425 arm_tls_symbol_p (rtx x)
6427 if (! TARGET_HAVE_TLS)
6428 return false;
6430 if (GET_CODE (x) != SYMBOL_REF)
6431 return false;
6433 return SYMBOL_REF_TLS_MODEL (x) != 0;
6436 /* Helper for arm_tls_referenced_p. */
6438 static int
6439 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6441 if (GET_CODE (*x) == SYMBOL_REF)
6442 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6444 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6445 TLS offsets, not real symbol references. */
6446 if (GET_CODE (*x) == UNSPEC
6447 && XINT (*x, 1) == UNSPEC_TLS)
6448 return -1;
6450 return 0;
6453 /* Return TRUE if X contains any TLS symbol references. */
6455 bool
6456 arm_tls_referenced_p (rtx x)
6458 if (! TARGET_HAVE_TLS)
6459 return false;
6461 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6464 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6466 bool
6467 arm_cannot_force_const_mem (rtx x)
6469 rtx base, offset;
6471 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6473 split_const (x, &base, &offset);
6474 if (GET_CODE (base) == SYMBOL_REF
6475 && !offset_within_block_p (base, INTVAL (offset)))
6476 return true;
6478 return arm_tls_referenced_p (x);
6481 #define REG_OR_SUBREG_REG(X) \
6482 (GET_CODE (X) == REG \
6483 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6485 #define REG_OR_SUBREG_RTX(X) \
6486 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6488 static inline int
6489 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6491 enum machine_mode mode = GET_MODE (x);
6492 int total;
6494 switch (code)
6496 case ASHIFT:
6497 case ASHIFTRT:
6498 case LSHIFTRT:
6499 case ROTATERT:
6500 case PLUS:
6501 case MINUS:
6502 case COMPARE:
6503 case NEG:
6504 case NOT:
6505 return COSTS_N_INSNS (1);
6507 case MULT:
6508 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6510 int cycles = 0;
6511 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6513 while (i)
6515 i >>= 2;
6516 cycles++;
6518 return COSTS_N_INSNS (2) + cycles;
6520 return COSTS_N_INSNS (1) + 16;
6522 case SET:
6523 return (COSTS_N_INSNS (1)
6524 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6525 + GET_CODE (SET_DEST (x)) == MEM));
6527 case CONST_INT:
6528 if (outer == SET)
6530 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6531 return 0;
6532 if (thumb_shiftable_const (INTVAL (x)))
6533 return COSTS_N_INSNS (2);
6534 return COSTS_N_INSNS (3);
6536 else if ((outer == PLUS || outer == COMPARE)
6537 && INTVAL (x) < 256 && INTVAL (x) > -256)
6538 return 0;
6539 else if ((outer == IOR || outer == XOR || outer == AND)
6540 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6541 return COSTS_N_INSNS (1);
6542 else if (outer == AND)
6544 int i;
6545 /* This duplicates the tests in the andsi3 expander. */
6546 for (i = 9; i <= 31; i++)
6547 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6548 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6549 return COSTS_N_INSNS (2);
6551 else if (outer == ASHIFT || outer == ASHIFTRT
6552 || outer == LSHIFTRT)
6553 return 0;
6554 return COSTS_N_INSNS (2);
6556 case CONST:
6557 case CONST_DOUBLE:
6558 case LABEL_REF:
6559 case SYMBOL_REF:
6560 return COSTS_N_INSNS (3);
6562 case UDIV:
6563 case UMOD:
6564 case DIV:
6565 case MOD:
6566 return 100;
6568 case TRUNCATE:
6569 return 99;
6571 case AND:
6572 case XOR:
6573 case IOR:
6574 /* XXX guess. */
6575 return 8;
6577 case MEM:
6578 /* XXX another guess. */
6579 /* Memory costs quite a lot for the first word, but subsequent words
6580 load at the equivalent of a single insn each. */
6581 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6582 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6583 ? 4 : 0));
6585 case IF_THEN_ELSE:
6586 /* XXX a guess. */
6587 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6588 return 14;
6589 return 2;
6591 case SIGN_EXTEND:
6592 case ZERO_EXTEND:
6593 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6594 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6596 if (mode == SImode)
6597 return total;
6599 if (arm_arch6)
6600 return total + COSTS_N_INSNS (1);
6602 /* Assume a two-shift sequence. Increase the cost slightly so
6603 we prefer actual shifts over an extend operation. */
6604 return total + 1 + COSTS_N_INSNS (2);
6606 default:
6607 return 99;
6611 static inline bool
6612 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6614 enum machine_mode mode = GET_MODE (x);
6615 enum rtx_code subcode;
6616 rtx operand;
6617 enum rtx_code code = GET_CODE (x);
6618 *total = 0;
6620 switch (code)
6622 case MEM:
6623 /* Memory costs quite a lot for the first word, but subsequent words
6624 load at the equivalent of a single insn each. */
6625 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6626 return true;
6628 case DIV:
6629 case MOD:
6630 case UDIV:
6631 case UMOD:
6632 if (TARGET_HARD_FLOAT && mode == SFmode)
6633 *total = COSTS_N_INSNS (2);
6634 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6635 *total = COSTS_N_INSNS (4);
6636 else
6637 *total = COSTS_N_INSNS (20);
6638 return false;
6640 case ROTATE:
6641 if (GET_CODE (XEXP (x, 1)) == REG)
6642 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6643 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6644 *total = rtx_cost (XEXP (x, 1), code, speed);
6646 /* Fall through */
6647 case ROTATERT:
6648 if (mode != SImode)
6650 *total += COSTS_N_INSNS (4);
6651 return true;
6654 /* Fall through */
6655 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6656 *total += rtx_cost (XEXP (x, 0), code, speed);
6657 if (mode == DImode)
6659 *total += COSTS_N_INSNS (3);
6660 return true;
6663 *total += COSTS_N_INSNS (1);
6664 /* Increase the cost of complex shifts because they aren't any faster,
6665 and reduce dual issue opportunities. */
6666 if (arm_tune_cortex_a9
6667 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6668 ++*total;
6670 return true;
6672 case MINUS:
6673 if (mode == DImode)
6675 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6676 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6677 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6679 *total += rtx_cost (XEXP (x, 1), code, speed);
6680 return true;
6683 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6684 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6686 *total += rtx_cost (XEXP (x, 0), code, speed);
6687 return true;
6690 return false;
6693 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6695 if (TARGET_HARD_FLOAT
6696 && (mode == SFmode
6697 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6699 *total = COSTS_N_INSNS (1);
6700 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6701 && arm_const_double_rtx (XEXP (x, 0)))
6703 *total += rtx_cost (XEXP (x, 1), code, speed);
6704 return true;
6707 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6708 && arm_const_double_rtx (XEXP (x, 1)))
6710 *total += rtx_cost (XEXP (x, 0), code, speed);
6711 return true;
6714 return false;
6716 *total = COSTS_N_INSNS (20);
6717 return false;
6720 *total = COSTS_N_INSNS (1);
6721 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6722 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6724 *total += rtx_cost (XEXP (x, 1), code, speed);
6725 return true;
6728 subcode = GET_CODE (XEXP (x, 1));
6729 if (subcode == ASHIFT || subcode == ASHIFTRT
6730 || subcode == LSHIFTRT
6731 || subcode == ROTATE || subcode == ROTATERT)
6733 *total += rtx_cost (XEXP (x, 0), code, speed);
6734 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6735 return true;
6738 /* A shift as a part of RSB costs no more than RSB itself. */
6739 if (GET_CODE (XEXP (x, 0)) == MULT
6740 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6742 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6743 *total += rtx_cost (XEXP (x, 1), code, speed);
6744 return true;
6747 if (subcode == MULT
6748 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6750 *total += rtx_cost (XEXP (x, 0), code, speed);
6751 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6752 return true;
6755 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6756 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6758 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6759 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6760 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6761 *total += COSTS_N_INSNS (1);
6763 return true;
6766 /* Fall through */
6768 case PLUS:
6769 if (code == PLUS && arm_arch6 && mode == SImode
6770 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6771 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6773 *total = COSTS_N_INSNS (1);
6774 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6775 speed);
6776 *total += rtx_cost (XEXP (x, 1), code, speed);
6777 return true;
6780 /* MLA: All arguments must be registers. We filter out
6781 multiplication by a power of two, so that we fall down into
6782 the code below. */
6783 if (GET_CODE (XEXP (x, 0)) == MULT
6784 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6786 /* The cost comes from the cost of the multiply. */
6787 return false;
6790 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6792 if (TARGET_HARD_FLOAT
6793 && (mode == SFmode
6794 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6796 *total = COSTS_N_INSNS (1);
6797 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6798 && arm_const_double_rtx (XEXP (x, 1)))
6800 *total += rtx_cost (XEXP (x, 0), code, speed);
6801 return true;
6804 return false;
6807 *total = COSTS_N_INSNS (20);
6808 return false;
6811 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6812 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6814 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6815 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6816 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6817 *total += COSTS_N_INSNS (1);
6818 return true;
6821 /* Fall through */
6823 case AND: case XOR: case IOR:
6825 /* Normally the frame registers will be spilt into reg+const during
6826 reload, so it is a bad idea to combine them with other instructions,
6827 since then they might not be moved outside of loops. As a compromise
6828 we allow integration with ops that have a constant as their second
6829 operand. */
6830 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6831 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6832 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6833 *total = COSTS_N_INSNS (1);
6835 if (mode == DImode)
6837 *total += COSTS_N_INSNS (2);
6838 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6839 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6841 *total += rtx_cost (XEXP (x, 0), code, speed);
6842 return true;
6845 return false;
6848 *total += COSTS_N_INSNS (1);
6849 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6850 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6852 *total += rtx_cost (XEXP (x, 0), code, speed);
6853 return true;
6855 subcode = GET_CODE (XEXP (x, 0));
6856 if (subcode == ASHIFT || subcode == ASHIFTRT
6857 || subcode == LSHIFTRT
6858 || subcode == ROTATE || subcode == ROTATERT)
6860 *total += rtx_cost (XEXP (x, 1), code, speed);
6861 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6862 return true;
6865 if (subcode == MULT
6866 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6868 *total += rtx_cost (XEXP (x, 1), code, speed);
6869 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6870 return true;
6873 if (subcode == UMIN || subcode == UMAX
6874 || subcode == SMIN || subcode == SMAX)
6876 *total = COSTS_N_INSNS (3);
6877 return true;
6880 return false;
6882 case MULT:
6883 /* This should have been handled by the CPU specific routines. */
6884 gcc_unreachable ();
6886 case TRUNCATE:
6887 if (arm_arch3m && mode == SImode
6888 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6889 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6890 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6891 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6892 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6893 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6895 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6896 return true;
6898 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6899 return false;
6901 case NEG:
6902 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6904 if (TARGET_HARD_FLOAT
6905 && (mode == SFmode
6906 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6908 *total = COSTS_N_INSNS (1);
6909 return false;
6911 *total = COSTS_N_INSNS (2);
6912 return false;
6915 /* Fall through */
6916 case NOT:
6917 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6918 if (mode == SImode && code == NOT)
6920 subcode = GET_CODE (XEXP (x, 0));
6921 if (subcode == ASHIFT || subcode == ASHIFTRT
6922 || subcode == LSHIFTRT
6923 || subcode == ROTATE || subcode == ROTATERT
6924 || (subcode == MULT
6925 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6927 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6928 /* Register shifts cost an extra cycle. */
6929 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6930 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6931 subcode, speed);
6932 return true;
6936 return false;
6938 case IF_THEN_ELSE:
6939 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6941 *total = COSTS_N_INSNS (4);
6942 return true;
6945 operand = XEXP (x, 0);
6947 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6948 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6949 && GET_CODE (XEXP (operand, 0)) == REG
6950 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6951 *total += COSTS_N_INSNS (1);
6952 *total += (rtx_cost (XEXP (x, 1), code, speed)
6953 + rtx_cost (XEXP (x, 2), code, speed));
6954 return true;
6956 case NE:
6957 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6959 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6960 return true;
6962 goto scc_insn;
6964 case GE:
6965 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6966 && mode == SImode && XEXP (x, 1) == const0_rtx)
6968 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6969 return true;
6971 goto scc_insn;
6973 case LT:
6974 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6975 && mode == SImode && XEXP (x, 1) == const0_rtx)
6977 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6978 return true;
6980 goto scc_insn;
6982 case EQ:
6983 case GT:
6984 case LE:
6985 case GEU:
6986 case LTU:
6987 case GTU:
6988 case LEU:
6989 case UNORDERED:
6990 case ORDERED:
6991 case UNEQ:
6992 case UNGE:
6993 case UNLT:
6994 case UNGT:
6995 case UNLE:
6996 scc_insn:
6997 /* SCC insns. In the case where the comparison has already been
6998 performed, then they cost 2 instructions. Otherwise they need
6999 an additional comparison before them. */
7000 *total = COSTS_N_INSNS (2);
7001 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7003 return true;
7006 /* Fall through */
7007 case COMPARE:
7008 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7010 *total = 0;
7011 return true;
7014 *total += COSTS_N_INSNS (1);
7015 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7016 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7018 *total += rtx_cost (XEXP (x, 0), code, speed);
7019 return true;
7022 subcode = GET_CODE (XEXP (x, 0));
7023 if (subcode == ASHIFT || subcode == ASHIFTRT
7024 || subcode == LSHIFTRT
7025 || subcode == ROTATE || subcode == ROTATERT)
7027 *total += rtx_cost (XEXP (x, 1), code, speed);
7028 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7029 return true;
7032 if (subcode == MULT
7033 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7035 *total += rtx_cost (XEXP (x, 1), code, speed);
7036 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7037 return true;
7040 return false;
7042 case UMIN:
7043 case UMAX:
7044 case SMIN:
7045 case SMAX:
7046 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7047 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7048 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7049 *total += rtx_cost (XEXP (x, 1), code, speed);
7050 return true;
7052 case ABS:
7053 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7055 if (TARGET_HARD_FLOAT
7056 && (mode == SFmode
7057 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7059 *total = COSTS_N_INSNS (1);
7060 return false;
7062 *total = COSTS_N_INSNS (20);
7063 return false;
7065 *total = COSTS_N_INSNS (1);
7066 if (mode == DImode)
7067 *total += COSTS_N_INSNS (3);
7068 return false;
7070 case SIGN_EXTEND:
7071 case ZERO_EXTEND:
7072 *total = 0;
7073 if (GET_MODE_CLASS (mode) == MODE_INT)
7075 rtx op = XEXP (x, 0);
7076 enum machine_mode opmode = GET_MODE (op);
7078 if (mode == DImode)
7079 *total += COSTS_N_INSNS (1);
7081 if (opmode != SImode)
7083 if (MEM_P (op))
7085 /* If !arm_arch4, we use one of the extendhisi2_mem
7086 or movhi_bytes patterns for HImode. For a QImode
7087 sign extension, we first zero-extend from memory
7088 and then perform a shift sequence. */
7089 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7090 *total += COSTS_N_INSNS (2);
7092 else if (arm_arch6)
7093 *total += COSTS_N_INSNS (1);
7095 /* We don't have the necessary insn, so we need to perform some
7096 other operation. */
7097 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7098 /* An and with constant 255. */
7099 *total += COSTS_N_INSNS (1);
7100 else
7101 /* A shift sequence. Increase costs slightly to avoid
7102 combining two shifts into an extend operation. */
7103 *total += COSTS_N_INSNS (2) + 1;
7106 return false;
7109 switch (GET_MODE (XEXP (x, 0)))
7111 case V8QImode:
7112 case V4HImode:
7113 case V2SImode:
7114 case V4QImode:
7115 case V2HImode:
7116 *total = COSTS_N_INSNS (1);
7117 return false;
7119 default:
7120 gcc_unreachable ();
7122 gcc_unreachable ();
7124 case ZERO_EXTRACT:
7125 case SIGN_EXTRACT:
7126 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7127 return true;
7129 case CONST_INT:
7130 if (const_ok_for_arm (INTVAL (x))
7131 || const_ok_for_arm (~INTVAL (x)))
7132 *total = COSTS_N_INSNS (1);
7133 else
7134 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7135 INTVAL (x), NULL_RTX,
7136 NULL_RTX, 0, 0));
7137 return true;
7139 case CONST:
7140 case LABEL_REF:
7141 case SYMBOL_REF:
7142 *total = COSTS_N_INSNS (3);
7143 return true;
7145 case HIGH:
7146 *total = COSTS_N_INSNS (1);
7147 return true;
7149 case LO_SUM:
7150 *total = COSTS_N_INSNS (1);
7151 *total += rtx_cost (XEXP (x, 0), code, speed);
7152 return true;
7154 case CONST_DOUBLE:
7155 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7156 && (mode == SFmode || !TARGET_VFP_SINGLE))
7157 *total = COSTS_N_INSNS (1);
7158 else
7159 *total = COSTS_N_INSNS (4);
7160 return true;
7162 default:
7163 *total = COSTS_N_INSNS (4);
7164 return false;
7168 /* Estimates the size cost of thumb1 instructions.
7169 For now most of the code is copied from thumb1_rtx_costs. We need more
7170 fine grain tuning when we have more related test cases. */
7171 static inline int
7172 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7174 enum machine_mode mode = GET_MODE (x);
7176 switch (code)
7178 case ASHIFT:
7179 case ASHIFTRT:
7180 case LSHIFTRT:
7181 case ROTATERT:
7182 case PLUS:
7183 case MINUS:
7184 case COMPARE:
7185 case NEG:
7186 case NOT:
7187 return COSTS_N_INSNS (1);
7189 case MULT:
7190 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7192 /* Thumb1 mul instruction can't operate on const. We must Load it
7193 into a register first. */
7194 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7195 return COSTS_N_INSNS (1) + const_size;
7197 return COSTS_N_INSNS (1);
7199 case SET:
7200 return (COSTS_N_INSNS (1)
7201 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7202 + GET_CODE (SET_DEST (x)) == MEM));
7204 case CONST_INT:
7205 if (outer == SET)
7207 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7208 return COSTS_N_INSNS (1);
7209 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7210 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7211 return COSTS_N_INSNS (2);
7212 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7213 if (thumb_shiftable_const (INTVAL (x)))
7214 return COSTS_N_INSNS (2);
7215 return COSTS_N_INSNS (3);
7217 else if ((outer == PLUS || outer == COMPARE)
7218 && INTVAL (x) < 256 && INTVAL (x) > -256)
7219 return 0;
7220 else if ((outer == IOR || outer == XOR || outer == AND)
7221 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7222 return COSTS_N_INSNS (1);
7223 else if (outer == AND)
7225 int i;
7226 /* This duplicates the tests in the andsi3 expander. */
7227 for (i = 9; i <= 31; i++)
7228 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7229 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7230 return COSTS_N_INSNS (2);
7232 else if (outer == ASHIFT || outer == ASHIFTRT
7233 || outer == LSHIFTRT)
7234 return 0;
7235 return COSTS_N_INSNS (2);
7237 case CONST:
7238 case CONST_DOUBLE:
7239 case LABEL_REF:
7240 case SYMBOL_REF:
7241 return COSTS_N_INSNS (3);
7243 case UDIV:
7244 case UMOD:
7245 case DIV:
7246 case MOD:
7247 return 100;
7249 case TRUNCATE:
7250 return 99;
7252 case AND:
7253 case XOR:
7254 case IOR:
7255 /* XXX guess. */
7256 return 8;
7258 case MEM:
7259 /* XXX another guess. */
7260 /* Memory costs quite a lot for the first word, but subsequent words
7261 load at the equivalent of a single insn each. */
7262 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7263 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7264 ? 4 : 0));
7266 case IF_THEN_ELSE:
7267 /* XXX a guess. */
7268 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7269 return 14;
7270 return 2;
7272 case ZERO_EXTEND:
7273 /* XXX still guessing. */
7274 switch (GET_MODE (XEXP (x, 0)))
7276 case QImode:
7277 return (1 + (mode == DImode ? 4 : 0)
7278 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7280 case HImode:
7281 return (4 + (mode == DImode ? 4 : 0)
7282 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7284 case SImode:
7285 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7287 default:
7288 return 99;
7291 default:
7292 return 99;
7296 /* RTX costs when optimizing for size. */
7297 static bool
7298 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7299 int *total)
7301 enum machine_mode mode = GET_MODE (x);
7302 if (TARGET_THUMB1)
7304 *total = thumb1_size_rtx_costs (x, code, outer_code);
7305 return true;
7308 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7309 switch (code)
7311 case MEM:
7312 /* A memory access costs 1 insn if the mode is small, or the address is
7313 a single register, otherwise it costs one insn per word. */
7314 if (REG_P (XEXP (x, 0)))
7315 *total = COSTS_N_INSNS (1);
7316 else if (flag_pic
7317 && GET_CODE (XEXP (x, 0)) == PLUS
7318 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7319 /* This will be split into two instructions.
7320 See arm.md:calculate_pic_address. */
7321 *total = COSTS_N_INSNS (2);
7322 else
7323 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7324 return true;
7326 case DIV:
7327 case MOD:
7328 case UDIV:
7329 case UMOD:
7330 /* Needs a libcall, so it costs about this. */
7331 *total = COSTS_N_INSNS (2);
7332 return false;
7334 case ROTATE:
7335 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7337 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7338 return true;
7340 /* Fall through */
7341 case ROTATERT:
7342 case ASHIFT:
7343 case LSHIFTRT:
7344 case ASHIFTRT:
7345 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7347 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7348 return true;
7350 else if (mode == SImode)
7352 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7353 /* Slightly disparage register shifts, but not by much. */
7354 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7355 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7356 return true;
7359 /* Needs a libcall. */
7360 *total = COSTS_N_INSNS (2);
7361 return false;
7363 case MINUS:
7364 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7365 && (mode == SFmode || !TARGET_VFP_SINGLE))
7367 *total = COSTS_N_INSNS (1);
7368 return false;
7371 if (mode == SImode)
7373 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7374 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7376 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7377 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7378 || subcode1 == ROTATE || subcode1 == ROTATERT
7379 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7380 || subcode1 == ASHIFTRT)
7382 /* It's just the cost of the two operands. */
7383 *total = 0;
7384 return false;
7387 *total = COSTS_N_INSNS (1);
7388 return false;
7391 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7392 return false;
7394 case PLUS:
7395 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7396 && (mode == SFmode || !TARGET_VFP_SINGLE))
7398 *total = COSTS_N_INSNS (1);
7399 return false;
7402 /* A shift as a part of ADD costs nothing. */
7403 if (GET_CODE (XEXP (x, 0)) == MULT
7404 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7406 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7407 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7408 *total += rtx_cost (XEXP (x, 1), code, false);
7409 return true;
7412 /* Fall through */
7413 case AND: case XOR: case IOR:
7414 if (mode == SImode)
7416 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7418 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7419 || subcode == LSHIFTRT || subcode == ASHIFTRT
7420 || (code == AND && subcode == NOT))
7422 /* It's just the cost of the two operands. */
7423 *total = 0;
7424 return false;
7428 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7429 return false;
7431 case MULT:
7432 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7433 return false;
7435 case NEG:
7436 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7437 && (mode == SFmode || !TARGET_VFP_SINGLE))
7439 *total = COSTS_N_INSNS (1);
7440 return false;
7443 /* Fall through */
7444 case NOT:
7445 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7447 return false;
7449 case IF_THEN_ELSE:
7450 *total = 0;
7451 return false;
7453 case COMPARE:
7454 if (cc_register (XEXP (x, 0), VOIDmode))
7455 * total = 0;
7456 else
7457 *total = COSTS_N_INSNS (1);
7458 return false;
7460 case ABS:
7461 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7462 && (mode == SFmode || !TARGET_VFP_SINGLE))
7463 *total = COSTS_N_INSNS (1);
7464 else
7465 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7466 return false;
7468 case SIGN_EXTEND:
7469 case ZERO_EXTEND:
7470 return arm_rtx_costs_1 (x, outer_code, total, 0);
7472 case CONST_INT:
7473 if (const_ok_for_arm (INTVAL (x)))
7474 /* A multiplication by a constant requires another instruction
7475 to load the constant to a register. */
7476 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7477 ? 1 : 0);
7478 else if (const_ok_for_arm (~INTVAL (x)))
7479 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7480 else if (const_ok_for_arm (-INTVAL (x)))
7482 if (outer_code == COMPARE || outer_code == PLUS
7483 || outer_code == MINUS)
7484 *total = 0;
7485 else
7486 *total = COSTS_N_INSNS (1);
7488 else
7489 *total = COSTS_N_INSNS (2);
7490 return true;
7492 case CONST:
7493 case LABEL_REF:
7494 case SYMBOL_REF:
7495 *total = COSTS_N_INSNS (2);
7496 return true;
7498 case CONST_DOUBLE:
7499 *total = COSTS_N_INSNS (4);
7500 return true;
7502 case HIGH:
7503 case LO_SUM:
7504 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7505 cost of these slightly. */
7506 *total = COSTS_N_INSNS (1) + 1;
7507 return true;
7509 default:
7510 if (mode != VOIDmode)
7511 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7512 else
7513 *total = COSTS_N_INSNS (4); /* How knows? */
7514 return false;
7518 /* RTX costs when optimizing for size. */
7519 static bool
7520 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7521 bool speed)
7523 if (!speed)
7524 return arm_size_rtx_costs (x, (enum rtx_code) code,
7525 (enum rtx_code) outer_code, total);
7526 else
7527 return current_tune->rtx_costs (x, (enum rtx_code) code,
7528 (enum rtx_code) outer_code,
7529 total, speed);
7532 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7533 supported on any "slowmul" cores, so it can be ignored. */
7535 static bool
7536 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7537 int *total, bool speed)
7539 enum machine_mode mode = GET_MODE (x);
7541 if (TARGET_THUMB)
7543 *total = thumb1_rtx_costs (x, code, outer_code);
7544 return true;
7547 switch (code)
7549 case MULT:
7550 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7551 || mode == DImode)
7553 *total = COSTS_N_INSNS (20);
7554 return false;
7557 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7559 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7560 & (unsigned HOST_WIDE_INT) 0xffffffff);
7561 int cost, const_ok = const_ok_for_arm (i);
7562 int j, booth_unit_size;
7564 /* Tune as appropriate. */
7565 cost = const_ok ? 4 : 8;
7566 booth_unit_size = 2;
7567 for (j = 0; i && j < 32; j += booth_unit_size)
7569 i >>= booth_unit_size;
7570 cost++;
7573 *total = COSTS_N_INSNS (cost);
7574 *total += rtx_cost (XEXP (x, 0), code, speed);
7575 return true;
7578 *total = COSTS_N_INSNS (20);
7579 return false;
7581 default:
7582 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7587 /* RTX cost for cores with a fast multiply unit (M variants). */
7589 static bool
7590 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7591 int *total, bool speed)
7593 enum machine_mode mode = GET_MODE (x);
7595 if (TARGET_THUMB1)
7597 *total = thumb1_rtx_costs (x, code, outer_code);
7598 return true;
7601 /* ??? should thumb2 use different costs? */
7602 switch (code)
7604 case MULT:
7605 /* There is no point basing this on the tuning, since it is always the
7606 fast variant if it exists at all. */
7607 if (mode == DImode
7608 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7609 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7610 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7612 *total = COSTS_N_INSNS(2);
7613 return false;
7617 if (mode == DImode)
7619 *total = COSTS_N_INSNS (5);
7620 return false;
7623 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7625 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7626 & (unsigned HOST_WIDE_INT) 0xffffffff);
7627 int cost, const_ok = const_ok_for_arm (i);
7628 int j, booth_unit_size;
7630 /* Tune as appropriate. */
7631 cost = const_ok ? 4 : 8;
7632 booth_unit_size = 8;
7633 for (j = 0; i && j < 32; j += booth_unit_size)
7635 i >>= booth_unit_size;
7636 cost++;
7639 *total = COSTS_N_INSNS(cost);
7640 return false;
7643 if (mode == SImode)
7645 *total = COSTS_N_INSNS (4);
7646 return false;
7649 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7651 if (TARGET_HARD_FLOAT
7652 && (mode == SFmode
7653 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7655 *total = COSTS_N_INSNS (1);
7656 return false;
7660 /* Requires a lib call */
7661 *total = COSTS_N_INSNS (20);
7662 return false;
7664 default:
7665 return arm_rtx_costs_1 (x, outer_code, total, speed);
7670 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7671 so it can be ignored. */
7673 static bool
7674 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7675 int *total, bool speed)
7677 enum machine_mode mode = GET_MODE (x);
7679 if (TARGET_THUMB)
7681 *total = thumb1_rtx_costs (x, code, outer_code);
7682 return true;
7685 switch (code)
7687 case COMPARE:
7688 if (GET_CODE (XEXP (x, 0)) != MULT)
7689 return arm_rtx_costs_1 (x, outer_code, total, speed);
7691 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7692 will stall until the multiplication is complete. */
7693 *total = COSTS_N_INSNS (3);
7694 return false;
7696 case MULT:
7697 /* There is no point basing this on the tuning, since it is always the
7698 fast variant if it exists at all. */
7699 if (mode == DImode
7700 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7701 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7702 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7704 *total = COSTS_N_INSNS (2);
7705 return false;
7709 if (mode == DImode)
7711 *total = COSTS_N_INSNS (5);
7712 return false;
7715 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7717 /* If operand 1 is a constant we can more accurately
7718 calculate the cost of the multiply. The multiplier can
7719 retire 15 bits on the first cycle and a further 12 on the
7720 second. We do, of course, have to load the constant into
7721 a register first. */
7722 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7723 /* There's a general overhead of one cycle. */
7724 int cost = 1;
7725 unsigned HOST_WIDE_INT masked_const;
7727 if (i & 0x80000000)
7728 i = ~i;
7730 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7732 masked_const = i & 0xffff8000;
7733 if (masked_const != 0)
7735 cost++;
7736 masked_const = i & 0xf8000000;
7737 if (masked_const != 0)
7738 cost++;
7740 *total = COSTS_N_INSNS (cost);
7741 return false;
7744 if (mode == SImode)
7746 *total = COSTS_N_INSNS (3);
7747 return false;
7750 /* Requires a lib call */
7751 *total = COSTS_N_INSNS (20);
7752 return false;
7754 default:
7755 return arm_rtx_costs_1 (x, outer_code, total, speed);
7760 /* RTX costs for 9e (and later) cores. */
7762 static bool
7763 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7764 int *total, bool speed)
7766 enum machine_mode mode = GET_MODE (x);
7768 if (TARGET_THUMB1)
7770 switch (code)
7772 case MULT:
7773 *total = COSTS_N_INSNS (3);
7774 return true;
7776 default:
7777 *total = thumb1_rtx_costs (x, code, outer_code);
7778 return true;
7782 switch (code)
7784 case MULT:
7785 /* There is no point basing this on the tuning, since it is always the
7786 fast variant if it exists at all. */
7787 if (mode == DImode
7788 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7789 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7790 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7792 *total = COSTS_N_INSNS (2);
7793 return false;
7797 if (mode == DImode)
7799 *total = COSTS_N_INSNS (5);
7800 return false;
7803 if (mode == SImode)
7805 *total = COSTS_N_INSNS (2);
7806 return false;
7809 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7811 if (TARGET_HARD_FLOAT
7812 && (mode == SFmode
7813 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7815 *total = COSTS_N_INSNS (1);
7816 return false;
7820 *total = COSTS_N_INSNS (20);
7821 return false;
7823 default:
7824 return arm_rtx_costs_1 (x, outer_code, total, speed);
7827 /* All address computations that can be done are free, but rtx cost returns
7828 the same for practically all of them. So we weight the different types
7829 of address here in the order (most pref first):
7830 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7831 static inline int
7832 arm_arm_address_cost (rtx x)
7834 enum rtx_code c = GET_CODE (x);
7836 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7837 return 0;
7838 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7839 return 10;
7841 if (c == PLUS)
7843 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7844 return 2;
7846 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7847 return 3;
7849 return 4;
7852 return 6;
7855 static inline int
7856 arm_thumb_address_cost (rtx x)
7858 enum rtx_code c = GET_CODE (x);
7860 if (c == REG)
7861 return 1;
7862 if (c == PLUS
7863 && GET_CODE (XEXP (x, 0)) == REG
7864 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7865 return 1;
7867 return 2;
7870 static int
7871 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7873 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7876 /* Adjust cost hook for XScale. */
7877 static bool
7878 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7880 /* Some true dependencies can have a higher cost depending
7881 on precisely how certain input operands are used. */
7882 if (REG_NOTE_KIND(link) == 0
7883 && recog_memoized (insn) >= 0
7884 && recog_memoized (dep) >= 0)
7886 int shift_opnum = get_attr_shift (insn);
7887 enum attr_type attr_type = get_attr_type (dep);
7889 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7890 operand for INSN. If we have a shifted input operand and the
7891 instruction we depend on is another ALU instruction, then we may
7892 have to account for an additional stall. */
7893 if (shift_opnum != 0
7894 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7896 rtx shifted_operand;
7897 int opno;
7899 /* Get the shifted operand. */
7900 extract_insn (insn);
7901 shifted_operand = recog_data.operand[shift_opnum];
7903 /* Iterate over all the operands in DEP. If we write an operand
7904 that overlaps with SHIFTED_OPERAND, then we have increase the
7905 cost of this dependency. */
7906 extract_insn (dep);
7907 preprocess_constraints ();
7908 for (opno = 0; opno < recog_data.n_operands; opno++)
7910 /* We can ignore strict inputs. */
7911 if (recog_data.operand_type[opno] == OP_IN)
7912 continue;
7914 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7915 shifted_operand))
7917 *cost = 2;
7918 return false;
7923 return true;
7926 /* Adjust cost hook for Cortex A9. */
7927 static bool
7928 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7930 switch (REG_NOTE_KIND (link))
7932 case REG_DEP_ANTI:
7933 *cost = 0;
7934 return false;
7936 case REG_DEP_TRUE:
7937 case REG_DEP_OUTPUT:
7938 if (recog_memoized (insn) >= 0
7939 && recog_memoized (dep) >= 0)
7941 if (GET_CODE (PATTERN (insn)) == SET)
7943 if (GET_MODE_CLASS
7944 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7945 || GET_MODE_CLASS
7946 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7948 enum attr_type attr_type_insn = get_attr_type (insn);
7949 enum attr_type attr_type_dep = get_attr_type (dep);
7951 /* By default all dependencies of the form
7952 s0 = s0 <op> s1
7953 s0 = s0 <op> s2
7954 have an extra latency of 1 cycle because
7955 of the input and output dependency in this
7956 case. However this gets modeled as an true
7957 dependency and hence all these checks. */
7958 if (REG_P (SET_DEST (PATTERN (insn)))
7959 && REG_P (SET_DEST (PATTERN (dep)))
7960 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7961 SET_DEST (PATTERN (dep))))
7963 /* FMACS is a special case where the dependant
7964 instruction can be issued 3 cycles before
7965 the normal latency in case of an output
7966 dependency. */
7967 if ((attr_type_insn == TYPE_FMACS
7968 || attr_type_insn == TYPE_FMACD)
7969 && (attr_type_dep == TYPE_FMACS
7970 || attr_type_dep == TYPE_FMACD))
7972 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7973 *cost = insn_default_latency (dep) - 3;
7974 else
7975 *cost = insn_default_latency (dep);
7976 return false;
7978 else
7980 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7981 *cost = insn_default_latency (dep) + 1;
7982 else
7983 *cost = insn_default_latency (dep);
7985 return false;
7990 break;
7992 default:
7993 gcc_unreachable ();
7996 return true;
7999 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8000 It corrects the value of COST based on the relationship between
8001 INSN and DEP through the dependence LINK. It returns the new
8002 value. There is a per-core adjust_cost hook to adjust scheduler costs
8003 and the per-core hook can choose to completely override the generic
8004 adjust_cost function. Only put bits of code into arm_adjust_cost that
8005 are common across all cores. */
8006 static int
8007 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8009 rtx i_pat, d_pat;
8011 /* When generating Thumb-1 code, we want to place flag-setting operations
8012 close to a conditional branch which depends on them, so that we can
8013 omit the comparison. */
8014 if (TARGET_THUMB1
8015 && REG_NOTE_KIND (link) == 0
8016 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8017 && recog_memoized (dep) >= 0
8018 && get_attr_conds (dep) == CONDS_SET)
8019 return 0;
8021 if (current_tune->sched_adjust_cost != NULL)
8023 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8024 return cost;
8027 /* XXX This is not strictly true for the FPA. */
8028 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8029 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8030 return 0;
8032 /* Call insns don't incur a stall, even if they follow a load. */
8033 if (REG_NOTE_KIND (link) == 0
8034 && GET_CODE (insn) == CALL_INSN)
8035 return 1;
8037 if ((i_pat = single_set (insn)) != NULL
8038 && GET_CODE (SET_SRC (i_pat)) == MEM
8039 && (d_pat = single_set (dep)) != NULL
8040 && GET_CODE (SET_DEST (d_pat)) == MEM)
8042 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8043 /* This is a load after a store, there is no conflict if the load reads
8044 from a cached area. Assume that loads from the stack, and from the
8045 constant pool are cached, and that others will miss. This is a
8046 hack. */
8048 if ((GET_CODE (src_mem) == SYMBOL_REF
8049 && CONSTANT_POOL_ADDRESS_P (src_mem))
8050 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8051 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8052 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8053 return 1;
8056 return cost;
8059 static int fp_consts_inited = 0;
8061 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8062 static const char * const strings_fp[8] =
8064 "0", "1", "2", "3",
8065 "4", "5", "0.5", "10"
8068 static REAL_VALUE_TYPE values_fp[8];
8070 static void
8071 init_fp_table (void)
8073 int i;
8074 REAL_VALUE_TYPE r;
8076 if (TARGET_VFP)
8077 fp_consts_inited = 1;
8078 else
8079 fp_consts_inited = 8;
8081 for (i = 0; i < fp_consts_inited; i++)
8083 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8084 values_fp[i] = r;
8088 /* Return TRUE if rtx X is a valid immediate FP constant. */
8090 arm_const_double_rtx (rtx x)
8092 REAL_VALUE_TYPE r;
8093 int i;
8095 if (!fp_consts_inited)
8096 init_fp_table ();
8098 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8099 if (REAL_VALUE_MINUS_ZERO (r))
8100 return 0;
8102 for (i = 0; i < fp_consts_inited; i++)
8103 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8104 return 1;
8106 return 0;
8109 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8111 neg_const_double_rtx_ok_for_fpa (rtx x)
8113 REAL_VALUE_TYPE r;
8114 int i;
8116 if (!fp_consts_inited)
8117 init_fp_table ();
8119 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8120 r = real_value_negate (&r);
8121 if (REAL_VALUE_MINUS_ZERO (r))
8122 return 0;
8124 for (i = 0; i < 8; i++)
8125 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8126 return 1;
8128 return 0;
8132 /* VFPv3 has a fairly wide range of representable immediates, formed from
8133 "quarter-precision" floating-point values. These can be evaluated using this
8134 formula (with ^ for exponentiation):
8136 -1^s * n * 2^-r
8138 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8139 16 <= n <= 31 and 0 <= r <= 7.
8141 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8143 - A (most-significant) is the sign bit.
8144 - BCD are the exponent (encoded as r XOR 3).
8145 - EFGH are the mantissa (encoded as n - 16).
8148 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8149 fconst[sd] instruction, or -1 if X isn't suitable. */
8150 static int
8151 vfp3_const_double_index (rtx x)
8153 REAL_VALUE_TYPE r, m;
8154 int sign, exponent;
8155 unsigned HOST_WIDE_INT mantissa, mant_hi;
8156 unsigned HOST_WIDE_INT mask;
8157 HOST_WIDE_INT m1, m2;
8158 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8160 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8161 return -1;
8163 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8165 /* We can't represent these things, so detect them first. */
8166 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8167 return -1;
8169 /* Extract sign, exponent and mantissa. */
8170 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8171 r = real_value_abs (&r);
8172 exponent = REAL_EXP (&r);
8173 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8174 highest (sign) bit, with a fixed binary point at bit point_pos.
8175 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8176 bits for the mantissa, this may fail (low bits would be lost). */
8177 real_ldexp (&m, &r, point_pos - exponent);
8178 REAL_VALUE_TO_INT (&m1, &m2, m);
8179 mantissa = m1;
8180 mant_hi = m2;
8182 /* If there are bits set in the low part of the mantissa, we can't
8183 represent this value. */
8184 if (mantissa != 0)
8185 return -1;
8187 /* Now make it so that mantissa contains the most-significant bits, and move
8188 the point_pos to indicate that the least-significant bits have been
8189 discarded. */
8190 point_pos -= HOST_BITS_PER_WIDE_INT;
8191 mantissa = mant_hi;
8193 /* We can permit four significant bits of mantissa only, plus a high bit
8194 which is always 1. */
8195 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8196 if ((mantissa & mask) != 0)
8197 return -1;
8199 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8200 mantissa >>= point_pos - 5;
8202 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8203 floating-point immediate zero with Neon using an integer-zero load, but
8204 that case is handled elsewhere.) */
8205 if (mantissa == 0)
8206 return -1;
8208 gcc_assert (mantissa >= 16 && mantissa <= 31);
8210 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8211 normalized significands are in the range [1, 2). (Our mantissa is shifted
8212 left 4 places at this point relative to normalized IEEE754 values). GCC
8213 internally uses [0.5, 1) (see real.c), so the exponent returned from
8214 REAL_EXP must be altered. */
8215 exponent = 5 - exponent;
8217 if (exponent < 0 || exponent > 7)
8218 return -1;
8220 /* Sign, mantissa and exponent are now in the correct form to plug into the
8221 formula described in the comment above. */
8222 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8225 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8227 vfp3_const_double_rtx (rtx x)
8229 if (!TARGET_VFP3)
8230 return 0;
8232 return vfp3_const_double_index (x) != -1;
8235 /* Recognize immediates which can be used in various Neon instructions. Legal
8236 immediates are described by the following table (for VMVN variants, the
8237 bitwise inverse of the constant shown is recognized. In either case, VMOV
8238 is output and the correct instruction to use for a given constant is chosen
8239 by the assembler). The constant shown is replicated across all elements of
8240 the destination vector.
8242 insn elems variant constant (binary)
8243 ---- ----- ------- -----------------
8244 vmov i32 0 00000000 00000000 00000000 abcdefgh
8245 vmov i32 1 00000000 00000000 abcdefgh 00000000
8246 vmov i32 2 00000000 abcdefgh 00000000 00000000
8247 vmov i32 3 abcdefgh 00000000 00000000 00000000
8248 vmov i16 4 00000000 abcdefgh
8249 vmov i16 5 abcdefgh 00000000
8250 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8251 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8252 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8253 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8254 vmvn i16 10 00000000 abcdefgh
8255 vmvn i16 11 abcdefgh 00000000
8256 vmov i32 12 00000000 00000000 abcdefgh 11111111
8257 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8258 vmov i32 14 00000000 abcdefgh 11111111 11111111
8259 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8260 vmov i8 16 abcdefgh
8261 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8262 eeeeeeee ffffffff gggggggg hhhhhhhh
8263 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8265 For case 18, B = !b. Representable values are exactly those accepted by
8266 vfp3_const_double_index, but are output as floating-point numbers rather
8267 than indices.
8269 Variants 0-5 (inclusive) may also be used as immediates for the second
8270 operand of VORR/VBIC instructions.
8272 The INVERSE argument causes the bitwise inverse of the given operand to be
8273 recognized instead (used for recognizing legal immediates for the VAND/VORN
8274 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8275 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8276 output, rather than the real insns vbic/vorr).
8278 INVERSE makes no difference to the recognition of float vectors.
8280 The return value is the variant of immediate as shown in the above table, or
8281 -1 if the given value doesn't match any of the listed patterns.
8283 static int
8284 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8285 rtx *modconst, int *elementwidth)
8287 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8288 matches = 1; \
8289 for (i = 0; i < idx; i += (STRIDE)) \
8290 if (!(TEST)) \
8291 matches = 0; \
8292 if (matches) \
8294 immtype = (CLASS); \
8295 elsize = (ELSIZE); \
8296 break; \
8299 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8300 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8301 unsigned char bytes[16];
8302 int immtype = -1, matches;
8303 unsigned int invmask = inverse ? 0xff : 0;
8305 /* Vectors of float constants. */
8306 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8308 rtx el0 = CONST_VECTOR_ELT (op, 0);
8309 REAL_VALUE_TYPE r0;
8311 if (!vfp3_const_double_rtx (el0))
8312 return -1;
8314 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8316 for (i = 1; i < n_elts; i++)
8318 rtx elt = CONST_VECTOR_ELT (op, i);
8319 REAL_VALUE_TYPE re;
8321 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8323 if (!REAL_VALUES_EQUAL (r0, re))
8324 return -1;
8327 if (modconst)
8328 *modconst = CONST_VECTOR_ELT (op, 0);
8330 if (elementwidth)
8331 *elementwidth = 0;
8333 return 18;
8336 /* Splat vector constant out into a byte vector. */
8337 for (i = 0; i < n_elts; i++)
8339 rtx el = CONST_VECTOR_ELT (op, i);
8340 unsigned HOST_WIDE_INT elpart;
8341 unsigned int part, parts;
8343 if (GET_CODE (el) == CONST_INT)
8345 elpart = INTVAL (el);
8346 parts = 1;
8348 else if (GET_CODE (el) == CONST_DOUBLE)
8350 elpart = CONST_DOUBLE_LOW (el);
8351 parts = 2;
8353 else
8354 gcc_unreachable ();
8356 for (part = 0; part < parts; part++)
8358 unsigned int byte;
8359 for (byte = 0; byte < innersize; byte++)
8361 bytes[idx++] = (elpart & 0xff) ^ invmask;
8362 elpart >>= BITS_PER_UNIT;
8364 if (GET_CODE (el) == CONST_DOUBLE)
8365 elpart = CONST_DOUBLE_HIGH (el);
8369 /* Sanity check. */
8370 gcc_assert (idx == GET_MODE_SIZE (mode));
8374 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8375 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8377 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8378 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8380 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8381 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8383 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8384 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8386 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8388 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8390 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8391 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8393 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8394 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8396 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8397 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8399 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8400 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8402 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8404 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8406 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8407 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8409 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8410 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8412 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8413 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8415 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8416 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8418 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8420 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8421 && bytes[i] == bytes[(i + 8) % idx]);
8423 while (0);
8425 if (immtype == -1)
8426 return -1;
8428 if (elementwidth)
8429 *elementwidth = elsize;
8431 if (modconst)
8433 unsigned HOST_WIDE_INT imm = 0;
8435 /* Un-invert bytes of recognized vector, if necessary. */
8436 if (invmask != 0)
8437 for (i = 0; i < idx; i++)
8438 bytes[i] ^= invmask;
8440 if (immtype == 17)
8442 /* FIXME: Broken on 32-bit H_W_I hosts. */
8443 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8445 for (i = 0; i < 8; i++)
8446 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8447 << (i * BITS_PER_UNIT);
8449 *modconst = GEN_INT (imm);
8451 else
8453 unsigned HOST_WIDE_INT imm = 0;
8455 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8456 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8458 *modconst = GEN_INT (imm);
8462 return immtype;
8463 #undef CHECK
8466 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8467 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8468 float elements), and a modified constant (whatever should be output for a
8469 VMOV) in *MODCONST. */
8472 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8473 rtx *modconst, int *elementwidth)
8475 rtx tmpconst;
8476 int tmpwidth;
8477 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8479 if (retval == -1)
8480 return 0;
8482 if (modconst)
8483 *modconst = tmpconst;
8485 if (elementwidth)
8486 *elementwidth = tmpwidth;
8488 return 1;
8491 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8492 the immediate is valid, write a constant suitable for using as an operand
8493 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8494 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8497 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8498 rtx *modconst, int *elementwidth)
8500 rtx tmpconst;
8501 int tmpwidth;
8502 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8504 if (retval < 0 || retval > 5)
8505 return 0;
8507 if (modconst)
8508 *modconst = tmpconst;
8510 if (elementwidth)
8511 *elementwidth = tmpwidth;
8513 return 1;
8516 /* Return a string suitable for output of Neon immediate logic operation
8517 MNEM. */
8519 char *
8520 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8521 int inverse, int quad)
8523 int width, is_valid;
8524 static char templ[40];
8526 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8528 gcc_assert (is_valid != 0);
8530 if (quad)
8531 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8532 else
8533 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8535 return templ;
8538 /* Output a sequence of pairwise operations to implement a reduction.
8539 NOTE: We do "too much work" here, because pairwise operations work on two
8540 registers-worth of operands in one go. Unfortunately we can't exploit those
8541 extra calculations to do the full operation in fewer steps, I don't think.
8542 Although all vector elements of the result but the first are ignored, we
8543 actually calculate the same result in each of the elements. An alternative
8544 such as initially loading a vector with zero to use as each of the second
8545 operands would use up an additional register and take an extra instruction,
8546 for no particular gain. */
8548 void
8549 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8550 rtx (*reduc) (rtx, rtx, rtx))
8552 enum machine_mode inner = GET_MODE_INNER (mode);
8553 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8554 rtx tmpsum = op1;
8556 for (i = parts / 2; i >= 1; i /= 2)
8558 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8559 emit_insn (reduc (dest, tmpsum, tmpsum));
8560 tmpsum = dest;
8564 /* If VALS is a vector constant that can be loaded into a register
8565 using VDUP, generate instructions to do so and return an RTX to
8566 assign to the register. Otherwise return NULL_RTX. */
8568 static rtx
8569 neon_vdup_constant (rtx vals)
8571 enum machine_mode mode = GET_MODE (vals);
8572 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8573 int n_elts = GET_MODE_NUNITS (mode);
8574 bool all_same = true;
8575 rtx x;
8576 int i;
8578 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8579 return NULL_RTX;
8581 for (i = 0; i < n_elts; ++i)
8583 x = XVECEXP (vals, 0, i);
8584 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8585 all_same = false;
8588 if (!all_same)
8589 /* The elements are not all the same. We could handle repeating
8590 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8591 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8592 vdup.i16). */
8593 return NULL_RTX;
8595 /* We can load this constant by using VDUP and a constant in a
8596 single ARM register. This will be cheaper than a vector
8597 load. */
8599 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8600 return gen_rtx_VEC_DUPLICATE (mode, x);
8603 /* Generate code to load VALS, which is a PARALLEL containing only
8604 constants (for vec_init) or CONST_VECTOR, efficiently into a
8605 register. Returns an RTX to copy into the register, or NULL_RTX
8606 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8609 neon_make_constant (rtx vals)
8611 enum machine_mode mode = GET_MODE (vals);
8612 rtx target;
8613 rtx const_vec = NULL_RTX;
8614 int n_elts = GET_MODE_NUNITS (mode);
8615 int n_const = 0;
8616 int i;
8618 if (GET_CODE (vals) == CONST_VECTOR)
8619 const_vec = vals;
8620 else if (GET_CODE (vals) == PARALLEL)
8622 /* A CONST_VECTOR must contain only CONST_INTs and
8623 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8624 Only store valid constants in a CONST_VECTOR. */
8625 for (i = 0; i < n_elts; ++i)
8627 rtx x = XVECEXP (vals, 0, i);
8628 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8629 n_const++;
8631 if (n_const == n_elts)
8632 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8634 else
8635 gcc_unreachable ();
8637 if (const_vec != NULL
8638 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8639 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8640 return const_vec;
8641 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8642 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8643 pipeline cycle; creating the constant takes one or two ARM
8644 pipeline cycles. */
8645 return target;
8646 else if (const_vec != NULL_RTX)
8647 /* Load from constant pool. On Cortex-A8 this takes two cycles
8648 (for either double or quad vectors). We can not take advantage
8649 of single-cycle VLD1 because we need a PC-relative addressing
8650 mode. */
8651 return const_vec;
8652 else
8653 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8654 We can not construct an initializer. */
8655 return NULL_RTX;
8658 /* Initialize vector TARGET to VALS. */
8660 void
8661 neon_expand_vector_init (rtx target, rtx vals)
8663 enum machine_mode mode = GET_MODE (target);
8664 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8665 int n_elts = GET_MODE_NUNITS (mode);
8666 int n_var = 0, one_var = -1;
8667 bool all_same = true;
8668 rtx x, mem;
8669 int i;
8671 for (i = 0; i < n_elts; ++i)
8673 x = XVECEXP (vals, 0, i);
8674 if (!CONSTANT_P (x))
8675 ++n_var, one_var = i;
8677 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8678 all_same = false;
8681 if (n_var == 0)
8683 rtx constant = neon_make_constant (vals);
8684 if (constant != NULL_RTX)
8686 emit_move_insn (target, constant);
8687 return;
8691 /* Splat a single non-constant element if we can. */
8692 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8694 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8695 emit_insn (gen_rtx_SET (VOIDmode, target,
8696 gen_rtx_VEC_DUPLICATE (mode, x)));
8697 return;
8700 /* One field is non-constant. Load constant then overwrite varying
8701 field. This is more efficient than using the stack. */
8702 if (n_var == 1)
8704 rtx copy = copy_rtx (vals);
8705 rtx index = GEN_INT (one_var);
8707 /* Load constant part of vector, substitute neighboring value for
8708 varying element. */
8709 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8710 neon_expand_vector_init (target, copy);
8712 /* Insert variable. */
8713 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8714 switch (mode)
8716 case V8QImode:
8717 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8718 break;
8719 case V16QImode:
8720 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8721 break;
8722 case V4HImode:
8723 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8724 break;
8725 case V8HImode:
8726 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8727 break;
8728 case V2SImode:
8729 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8730 break;
8731 case V4SImode:
8732 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8733 break;
8734 case V2SFmode:
8735 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8736 break;
8737 case V4SFmode:
8738 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8739 break;
8740 case V2DImode:
8741 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8742 break;
8743 default:
8744 gcc_unreachable ();
8746 return;
8749 /* Construct the vector in memory one field at a time
8750 and load the whole vector. */
8751 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8752 for (i = 0; i < n_elts; i++)
8753 emit_move_insn (adjust_address_nv (mem, inner_mode,
8754 i * GET_MODE_SIZE (inner_mode)),
8755 XVECEXP (vals, 0, i));
8756 emit_move_insn (target, mem);
8759 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8760 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8761 reported source locations are bogus. */
8763 static void
8764 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8765 const char *err)
8767 HOST_WIDE_INT lane;
8769 gcc_assert (GET_CODE (operand) == CONST_INT);
8771 lane = INTVAL (operand);
8773 if (lane < low || lane >= high)
8774 error (err);
8777 /* Bounds-check lanes. */
8779 void
8780 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8782 bounds_check (operand, low, high, "lane out of range");
8785 /* Bounds-check constants. */
8787 void
8788 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8790 bounds_check (operand, low, high, "constant out of range");
8793 HOST_WIDE_INT
8794 neon_element_bits (enum machine_mode mode)
8796 if (mode == DImode)
8797 return GET_MODE_BITSIZE (mode);
8798 else
8799 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8803 /* Predicates for `match_operand' and `match_operator'. */
8805 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8807 cirrus_memory_offset (rtx op)
8809 /* Reject eliminable registers. */
8810 if (! (reload_in_progress || reload_completed)
8811 && ( reg_mentioned_p (frame_pointer_rtx, op)
8812 || reg_mentioned_p (arg_pointer_rtx, op)
8813 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8814 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8815 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8816 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8817 return 0;
8819 if (GET_CODE (op) == MEM)
8821 rtx ind;
8823 ind = XEXP (op, 0);
8825 /* Match: (mem (reg)). */
8826 if (GET_CODE (ind) == REG)
8827 return 1;
8829 /* Match:
8830 (mem (plus (reg)
8831 (const))). */
8832 if (GET_CODE (ind) == PLUS
8833 && GET_CODE (XEXP (ind, 0)) == REG
8834 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8835 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8836 return 1;
8839 return 0;
8842 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8843 WB is true if full writeback address modes are allowed and is false
8844 if limited writeback address modes (POST_INC and PRE_DEC) are
8845 allowed. */
8848 arm_coproc_mem_operand (rtx op, bool wb)
8850 rtx ind;
8852 /* Reject eliminable registers. */
8853 if (! (reload_in_progress || reload_completed)
8854 && ( reg_mentioned_p (frame_pointer_rtx, op)
8855 || reg_mentioned_p (arg_pointer_rtx, op)
8856 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8857 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8858 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8859 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8860 return FALSE;
8862 /* Constants are converted into offsets from labels. */
8863 if (GET_CODE (op) != MEM)
8864 return FALSE;
8866 ind = XEXP (op, 0);
8868 if (reload_completed
8869 && (GET_CODE (ind) == LABEL_REF
8870 || (GET_CODE (ind) == CONST
8871 && GET_CODE (XEXP (ind, 0)) == PLUS
8872 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8873 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8874 return TRUE;
8876 /* Match: (mem (reg)). */
8877 if (GET_CODE (ind) == REG)
8878 return arm_address_register_rtx_p (ind, 0);
8880 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8881 acceptable in any case (subject to verification by
8882 arm_address_register_rtx_p). We need WB to be true to accept
8883 PRE_INC and POST_DEC. */
8884 if (GET_CODE (ind) == POST_INC
8885 || GET_CODE (ind) == PRE_DEC
8886 || (wb
8887 && (GET_CODE (ind) == PRE_INC
8888 || GET_CODE (ind) == POST_DEC)))
8889 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8891 if (wb
8892 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8893 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8894 && GET_CODE (XEXP (ind, 1)) == PLUS
8895 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8896 ind = XEXP (ind, 1);
8898 /* Match:
8899 (plus (reg)
8900 (const)). */
8901 if (GET_CODE (ind) == PLUS
8902 && GET_CODE (XEXP (ind, 0)) == REG
8903 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8904 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8905 && INTVAL (XEXP (ind, 1)) > -1024
8906 && INTVAL (XEXP (ind, 1)) < 1024
8907 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8908 return TRUE;
8910 return FALSE;
8913 /* Return TRUE if OP is a memory operand which we can load or store a vector
8914 to/from. TYPE is one of the following values:
8915 0 - Vector load/stor (vldr)
8916 1 - Core registers (ldm)
8917 2 - Element/structure loads (vld1)
8920 neon_vector_mem_operand (rtx op, int type)
8922 rtx ind;
8924 /* Reject eliminable registers. */
8925 if (! (reload_in_progress || reload_completed)
8926 && ( reg_mentioned_p (frame_pointer_rtx, op)
8927 || reg_mentioned_p (arg_pointer_rtx, op)
8928 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8929 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8930 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8931 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8932 return FALSE;
8934 /* Constants are converted into offsets from labels. */
8935 if (GET_CODE (op) != MEM)
8936 return FALSE;
8938 ind = XEXP (op, 0);
8940 if (reload_completed
8941 && (GET_CODE (ind) == LABEL_REF
8942 || (GET_CODE (ind) == CONST
8943 && GET_CODE (XEXP (ind, 0)) == PLUS
8944 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8945 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8946 return TRUE;
8948 /* Match: (mem (reg)). */
8949 if (GET_CODE (ind) == REG)
8950 return arm_address_register_rtx_p (ind, 0);
8952 /* Allow post-increment with Neon registers. */
8953 if ((type != 1 && GET_CODE (ind) == POST_INC)
8954 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8955 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8957 /* FIXME: vld1 allows register post-modify. */
8959 /* Match:
8960 (plus (reg)
8961 (const)). */
8962 if (type == 0
8963 && GET_CODE (ind) == PLUS
8964 && GET_CODE (XEXP (ind, 0)) == REG
8965 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8966 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8967 && INTVAL (XEXP (ind, 1)) > -1024
8968 && INTVAL (XEXP (ind, 1)) < 1016
8969 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8970 return TRUE;
8972 return FALSE;
8975 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8976 type. */
8978 neon_struct_mem_operand (rtx op)
8980 rtx ind;
8982 /* Reject eliminable registers. */
8983 if (! (reload_in_progress || reload_completed)
8984 && ( reg_mentioned_p (frame_pointer_rtx, op)
8985 || reg_mentioned_p (arg_pointer_rtx, op)
8986 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8987 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8988 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8989 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8990 return FALSE;
8992 /* Constants are converted into offsets from labels. */
8993 if (GET_CODE (op) != MEM)
8994 return FALSE;
8996 ind = XEXP (op, 0);
8998 if (reload_completed
8999 && (GET_CODE (ind) == LABEL_REF
9000 || (GET_CODE (ind) == CONST
9001 && GET_CODE (XEXP (ind, 0)) == PLUS
9002 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9003 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9004 return TRUE;
9006 /* Match: (mem (reg)). */
9007 if (GET_CODE (ind) == REG)
9008 return arm_address_register_rtx_p (ind, 0);
9010 return FALSE;
9013 /* Return true if X is a register that will be eliminated later on. */
9015 arm_eliminable_register (rtx x)
9017 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9018 || REGNO (x) == ARG_POINTER_REGNUM
9019 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9020 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9023 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9024 coprocessor registers. Otherwise return NO_REGS. */
9026 enum reg_class
9027 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9029 if (mode == HFmode)
9031 if (!TARGET_NEON_FP16)
9032 return GENERAL_REGS;
9033 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9034 return NO_REGS;
9035 return GENERAL_REGS;
9038 if (TARGET_NEON
9039 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9040 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9041 && neon_vector_mem_operand (x, 0))
9042 return NO_REGS;
9044 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9045 return NO_REGS;
9047 return GENERAL_REGS;
9050 /* Values which must be returned in the most-significant end of the return
9051 register. */
9053 static bool
9054 arm_return_in_msb (const_tree valtype)
9056 return (TARGET_AAPCS_BASED
9057 && BYTES_BIG_ENDIAN
9058 && (AGGREGATE_TYPE_P (valtype)
9059 || TREE_CODE (valtype) == COMPLEX_TYPE));
9062 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9063 Use by the Cirrus Maverick code which has to workaround
9064 a hardware bug triggered by such instructions. */
9065 static bool
9066 arm_memory_load_p (rtx insn)
9068 rtx body, lhs, rhs;;
9070 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9071 return false;
9073 body = PATTERN (insn);
9075 if (GET_CODE (body) != SET)
9076 return false;
9078 lhs = XEXP (body, 0);
9079 rhs = XEXP (body, 1);
9081 lhs = REG_OR_SUBREG_RTX (lhs);
9083 /* If the destination is not a general purpose
9084 register we do not have to worry. */
9085 if (GET_CODE (lhs) != REG
9086 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9087 return false;
9089 /* As well as loads from memory we also have to react
9090 to loads of invalid constants which will be turned
9091 into loads from the minipool. */
9092 return (GET_CODE (rhs) == MEM
9093 || GET_CODE (rhs) == SYMBOL_REF
9094 || note_invalid_constants (insn, -1, false));
9097 /* Return TRUE if INSN is a Cirrus instruction. */
9098 static bool
9099 arm_cirrus_insn_p (rtx insn)
9101 enum attr_cirrus attr;
9103 /* get_attr cannot accept USE or CLOBBER. */
9104 if (!insn
9105 || GET_CODE (insn) != INSN
9106 || GET_CODE (PATTERN (insn)) == USE
9107 || GET_CODE (PATTERN (insn)) == CLOBBER)
9108 return 0;
9110 attr = get_attr_cirrus (insn);
9112 return attr != CIRRUS_NOT;
9115 /* Cirrus reorg for invalid instruction combinations. */
9116 static void
9117 cirrus_reorg (rtx first)
9119 enum attr_cirrus attr;
9120 rtx body = PATTERN (first);
9121 rtx t;
9122 int nops;
9124 /* Any branch must be followed by 2 non Cirrus instructions. */
9125 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9127 nops = 0;
9128 t = next_nonnote_insn (first);
9130 if (arm_cirrus_insn_p (t))
9131 ++ nops;
9133 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9134 ++ nops;
9136 while (nops --)
9137 emit_insn_after (gen_nop (), first);
9139 return;
9142 /* (float (blah)) is in parallel with a clobber. */
9143 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9144 body = XVECEXP (body, 0, 0);
9146 if (GET_CODE (body) == SET)
9148 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9150 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9151 be followed by a non Cirrus insn. */
9152 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9154 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9155 emit_insn_after (gen_nop (), first);
9157 return;
9159 else if (arm_memory_load_p (first))
9161 unsigned int arm_regno;
9163 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9164 ldr/cfmv64hr combination where the Rd field is the same
9165 in both instructions must be split with a non Cirrus
9166 insn. Example:
9168 ldr r0, blah
9170 cfmvsr mvf0, r0. */
9172 /* Get Arm register number for ldr insn. */
9173 if (GET_CODE (lhs) == REG)
9174 arm_regno = REGNO (lhs);
9175 else
9177 gcc_assert (GET_CODE (rhs) == REG);
9178 arm_regno = REGNO (rhs);
9181 /* Next insn. */
9182 first = next_nonnote_insn (first);
9184 if (! arm_cirrus_insn_p (first))
9185 return;
9187 body = PATTERN (first);
9189 /* (float (blah)) is in parallel with a clobber. */
9190 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9191 body = XVECEXP (body, 0, 0);
9193 if (GET_CODE (body) == FLOAT)
9194 body = XEXP (body, 0);
9196 if (get_attr_cirrus (first) == CIRRUS_MOVE
9197 && GET_CODE (XEXP (body, 1)) == REG
9198 && arm_regno == REGNO (XEXP (body, 1)))
9199 emit_insn_after (gen_nop (), first);
9201 return;
9205 /* get_attr cannot accept USE or CLOBBER. */
9206 if (!first
9207 || GET_CODE (first) != INSN
9208 || GET_CODE (PATTERN (first)) == USE
9209 || GET_CODE (PATTERN (first)) == CLOBBER)
9210 return;
9212 attr = get_attr_cirrus (first);
9214 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9215 must be followed by a non-coprocessor instruction. */
9216 if (attr == CIRRUS_COMPARE)
9218 nops = 0;
9220 t = next_nonnote_insn (first);
9222 if (arm_cirrus_insn_p (t))
9223 ++ nops;
9225 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9226 ++ nops;
9228 while (nops --)
9229 emit_insn_after (gen_nop (), first);
9231 return;
9235 /* Return TRUE if X references a SYMBOL_REF. */
9237 symbol_mentioned_p (rtx x)
9239 const char * fmt;
9240 int i;
9242 if (GET_CODE (x) == SYMBOL_REF)
9243 return 1;
9245 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9246 are constant offsets, not symbols. */
9247 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9248 return 0;
9250 fmt = GET_RTX_FORMAT (GET_CODE (x));
9252 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9254 if (fmt[i] == 'E')
9256 int j;
9258 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9259 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9260 return 1;
9262 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9263 return 1;
9266 return 0;
9269 /* Return TRUE if X references a LABEL_REF. */
9271 label_mentioned_p (rtx x)
9273 const char * fmt;
9274 int i;
9276 if (GET_CODE (x) == LABEL_REF)
9277 return 1;
9279 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9280 instruction, but they are constant offsets, not symbols. */
9281 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9282 return 0;
9284 fmt = GET_RTX_FORMAT (GET_CODE (x));
9285 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9287 if (fmt[i] == 'E')
9289 int j;
9291 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9292 if (label_mentioned_p (XVECEXP (x, i, j)))
9293 return 1;
9295 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9296 return 1;
9299 return 0;
9303 tls_mentioned_p (rtx x)
9305 switch (GET_CODE (x))
9307 case CONST:
9308 return tls_mentioned_p (XEXP (x, 0));
9310 case UNSPEC:
9311 if (XINT (x, 1) == UNSPEC_TLS)
9312 return 1;
9314 default:
9315 return 0;
9319 /* Must not copy any rtx that uses a pc-relative address. */
9321 static int
9322 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9324 if (GET_CODE (*x) == UNSPEC
9325 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9326 return 1;
9327 return 0;
9330 static bool
9331 arm_cannot_copy_insn_p (rtx insn)
9333 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9336 enum rtx_code
9337 minmax_code (rtx x)
9339 enum rtx_code code = GET_CODE (x);
9341 switch (code)
9343 case SMAX:
9344 return GE;
9345 case SMIN:
9346 return LE;
9347 case UMIN:
9348 return LEU;
9349 case UMAX:
9350 return GEU;
9351 default:
9352 gcc_unreachable ();
9356 /* Return 1 if memory locations are adjacent. */
9358 adjacent_mem_locations (rtx a, rtx b)
9360 /* We don't guarantee to preserve the order of these memory refs. */
9361 if (volatile_refs_p (a) || volatile_refs_p (b))
9362 return 0;
9364 if ((GET_CODE (XEXP (a, 0)) == REG
9365 || (GET_CODE (XEXP (a, 0)) == PLUS
9366 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9367 && (GET_CODE (XEXP (b, 0)) == REG
9368 || (GET_CODE (XEXP (b, 0)) == PLUS
9369 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9371 HOST_WIDE_INT val0 = 0, val1 = 0;
9372 rtx reg0, reg1;
9373 int val_diff;
9375 if (GET_CODE (XEXP (a, 0)) == PLUS)
9377 reg0 = XEXP (XEXP (a, 0), 0);
9378 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9380 else
9381 reg0 = XEXP (a, 0);
9383 if (GET_CODE (XEXP (b, 0)) == PLUS)
9385 reg1 = XEXP (XEXP (b, 0), 0);
9386 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9388 else
9389 reg1 = XEXP (b, 0);
9391 /* Don't accept any offset that will require multiple
9392 instructions to handle, since this would cause the
9393 arith_adjacentmem pattern to output an overlong sequence. */
9394 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9395 return 0;
9397 /* Don't allow an eliminable register: register elimination can make
9398 the offset too large. */
9399 if (arm_eliminable_register (reg0))
9400 return 0;
9402 val_diff = val1 - val0;
9404 if (arm_ld_sched)
9406 /* If the target has load delay slots, then there's no benefit
9407 to using an ldm instruction unless the offset is zero and
9408 we are optimizing for size. */
9409 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9410 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9411 && (val_diff == 4 || val_diff == -4));
9414 return ((REGNO (reg0) == REGNO (reg1))
9415 && (val_diff == 4 || val_diff == -4));
9418 return 0;
9421 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9422 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9423 instruction. ADD_OFFSET is nonzero if the base address register needs
9424 to be modified with an add instruction before we can use it. */
9426 static bool
9427 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9428 int nops, HOST_WIDE_INT add_offset)
9430 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9431 if the offset isn't small enough. The reason 2 ldrs are faster
9432 is because these ARMs are able to do more than one cache access
9433 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9434 whilst the ARM8 has a double bandwidth cache. This means that
9435 these cores can do both an instruction fetch and a data fetch in
9436 a single cycle, so the trick of calculating the address into a
9437 scratch register (one of the result regs) and then doing a load
9438 multiple actually becomes slower (and no smaller in code size).
9439 That is the transformation
9441 ldr rd1, [rbase + offset]
9442 ldr rd2, [rbase + offset + 4]
9446 add rd1, rbase, offset
9447 ldmia rd1, {rd1, rd2}
9449 produces worse code -- '3 cycles + any stalls on rd2' instead of
9450 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9451 access per cycle, the first sequence could never complete in less
9452 than 6 cycles, whereas the ldm sequence would only take 5 and
9453 would make better use of sequential accesses if not hitting the
9454 cache.
9456 We cheat here and test 'arm_ld_sched' which we currently know to
9457 only be true for the ARM8, ARM9 and StrongARM. If this ever
9458 changes, then the test below needs to be reworked. */
9459 if (nops == 2 && arm_ld_sched && add_offset != 0)
9460 return false;
9462 /* XScale has load-store double instructions, but they have stricter
9463 alignment requirements than load-store multiple, so we cannot
9464 use them.
9466 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9467 the pipeline until completion.
9469 NREGS CYCLES
9475 An ldr instruction takes 1-3 cycles, but does not block the
9476 pipeline.
9478 NREGS CYCLES
9479 1 1-3
9480 2 2-6
9481 3 3-9
9482 4 4-12
9484 Best case ldr will always win. However, the more ldr instructions
9485 we issue, the less likely we are to be able to schedule them well.
9486 Using ldr instructions also increases code size.
9488 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9489 for counts of 3 or 4 regs. */
9490 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9491 return false;
9492 return true;
9495 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9496 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9497 an array ORDER which describes the sequence to use when accessing the
9498 offsets that produces an ascending order. In this sequence, each
9499 offset must be larger by exactly 4 than the previous one. ORDER[0]
9500 must have been filled in with the lowest offset by the caller.
9501 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9502 we use to verify that ORDER produces an ascending order of registers.
9503 Return true if it was possible to construct such an order, false if
9504 not. */
9506 static bool
9507 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9508 int *unsorted_regs)
9510 int i;
9511 for (i = 1; i < nops; i++)
9513 int j;
9515 order[i] = order[i - 1];
9516 for (j = 0; j < nops; j++)
9517 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9519 /* We must find exactly one offset that is higher than the
9520 previous one by 4. */
9521 if (order[i] != order[i - 1])
9522 return false;
9523 order[i] = j;
9525 if (order[i] == order[i - 1])
9526 return false;
9527 /* The register numbers must be ascending. */
9528 if (unsorted_regs != NULL
9529 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9530 return false;
9532 return true;
9535 /* Used to determine in a peephole whether a sequence of load
9536 instructions can be changed into a load-multiple instruction.
9537 NOPS is the number of separate load instructions we are examining. The
9538 first NOPS entries in OPERANDS are the destination registers, the
9539 next NOPS entries are memory operands. If this function is
9540 successful, *BASE is set to the common base register of the memory
9541 accesses; *LOAD_OFFSET is set to the first memory location's offset
9542 from that base register.
9543 REGS is an array filled in with the destination register numbers.
9544 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9545 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9546 the sequence of registers in REGS matches the loads from ascending memory
9547 locations, and the function verifies that the register numbers are
9548 themselves ascending. If CHECK_REGS is false, the register numbers
9549 are stored in the order they are found in the operands. */
9550 static int
9551 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9552 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9554 int unsorted_regs[MAX_LDM_STM_OPS];
9555 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9556 int order[MAX_LDM_STM_OPS];
9557 rtx base_reg_rtx = NULL;
9558 int base_reg = -1;
9559 int i, ldm_case;
9561 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9562 easily extended if required. */
9563 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9565 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9567 /* Loop over the operands and check that the memory references are
9568 suitable (i.e. immediate offsets from the same base register). At
9569 the same time, extract the target register, and the memory
9570 offsets. */
9571 for (i = 0; i < nops; i++)
9573 rtx reg;
9574 rtx offset;
9576 /* Convert a subreg of a mem into the mem itself. */
9577 if (GET_CODE (operands[nops + i]) == SUBREG)
9578 operands[nops + i] = alter_subreg (operands + (nops + i));
9580 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9582 /* Don't reorder volatile memory references; it doesn't seem worth
9583 looking for the case where the order is ok anyway. */
9584 if (MEM_VOLATILE_P (operands[nops + i]))
9585 return 0;
9587 offset = const0_rtx;
9589 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9590 || (GET_CODE (reg) == SUBREG
9591 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9592 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9593 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9594 == REG)
9595 || (GET_CODE (reg) == SUBREG
9596 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9597 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9598 == CONST_INT)))
9600 if (i == 0)
9602 base_reg = REGNO (reg);
9603 base_reg_rtx = reg;
9604 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9605 return 0;
9607 else if (base_reg != (int) REGNO (reg))
9608 /* Not addressed from the same base register. */
9609 return 0;
9611 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9612 ? REGNO (operands[i])
9613 : REGNO (SUBREG_REG (operands[i])));
9615 /* If it isn't an integer register, or if it overwrites the
9616 base register but isn't the last insn in the list, then
9617 we can't do this. */
9618 if (unsorted_regs[i] < 0
9619 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9620 || unsorted_regs[i] > 14
9621 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9622 return 0;
9624 unsorted_offsets[i] = INTVAL (offset);
9625 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9626 order[0] = i;
9628 else
9629 /* Not a suitable memory address. */
9630 return 0;
9633 /* All the useful information has now been extracted from the
9634 operands into unsorted_regs and unsorted_offsets; additionally,
9635 order[0] has been set to the lowest offset in the list. Sort
9636 the offsets into order, verifying that they are adjacent, and
9637 check that the register numbers are ascending. */
9638 if (!compute_offset_order (nops, unsorted_offsets, order,
9639 check_regs ? unsorted_regs : NULL))
9640 return 0;
9642 if (saved_order)
9643 memcpy (saved_order, order, sizeof order);
9645 if (base)
9647 *base = base_reg;
9649 for (i = 0; i < nops; i++)
9650 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9652 *load_offset = unsorted_offsets[order[0]];
9655 if (TARGET_THUMB1
9656 && !peep2_reg_dead_p (nops, base_reg_rtx))
9657 return 0;
9659 if (unsorted_offsets[order[0]] == 0)
9660 ldm_case = 1; /* ldmia */
9661 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9662 ldm_case = 2; /* ldmib */
9663 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9664 ldm_case = 3; /* ldmda */
9665 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9666 ldm_case = 4; /* ldmdb */
9667 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9668 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9669 ldm_case = 5;
9670 else
9671 return 0;
9673 if (!multiple_operation_profitable_p (false, nops,
9674 ldm_case == 5
9675 ? unsorted_offsets[order[0]] : 0))
9676 return 0;
9678 return ldm_case;
9681 /* Used to determine in a peephole whether a sequence of store instructions can
9682 be changed into a store-multiple instruction.
9683 NOPS is the number of separate store instructions we are examining.
9684 NOPS_TOTAL is the total number of instructions recognized by the peephole
9685 pattern.
9686 The first NOPS entries in OPERANDS are the source registers, the next
9687 NOPS entries are memory operands. If this function is successful, *BASE is
9688 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9689 to the first memory location's offset from that base register. REGS is an
9690 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9691 likewise filled with the corresponding rtx's.
9692 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9693 numbers to to an ascending order of stores.
9694 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9695 from ascending memory locations, and the function verifies that the register
9696 numbers are themselves ascending. If CHECK_REGS is false, the register
9697 numbers are stored in the order they are found in the operands. */
9698 static int
9699 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9700 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9701 HOST_WIDE_INT *load_offset, bool check_regs)
9703 int unsorted_regs[MAX_LDM_STM_OPS];
9704 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9705 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9706 int order[MAX_LDM_STM_OPS];
9707 int base_reg = -1;
9708 rtx base_reg_rtx = NULL;
9709 int i, stm_case;
9711 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9712 easily extended if required. */
9713 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9715 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9717 /* Loop over the operands and check that the memory references are
9718 suitable (i.e. immediate offsets from the same base register). At
9719 the same time, extract the target register, and the memory
9720 offsets. */
9721 for (i = 0; i < nops; i++)
9723 rtx reg;
9724 rtx offset;
9726 /* Convert a subreg of a mem into the mem itself. */
9727 if (GET_CODE (operands[nops + i]) == SUBREG)
9728 operands[nops + i] = alter_subreg (operands + (nops + i));
9730 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9732 /* Don't reorder volatile memory references; it doesn't seem worth
9733 looking for the case where the order is ok anyway. */
9734 if (MEM_VOLATILE_P (operands[nops + i]))
9735 return 0;
9737 offset = const0_rtx;
9739 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9740 || (GET_CODE (reg) == SUBREG
9741 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9742 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9743 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9744 == REG)
9745 || (GET_CODE (reg) == SUBREG
9746 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9747 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9748 == CONST_INT)))
9750 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9751 ? operands[i] : SUBREG_REG (operands[i]));
9752 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9754 if (i == 0)
9756 base_reg = REGNO (reg);
9757 base_reg_rtx = reg;
9758 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9759 return 0;
9761 else if (base_reg != (int) REGNO (reg))
9762 /* Not addressed from the same base register. */
9763 return 0;
9765 /* If it isn't an integer register, then we can't do this. */
9766 if (unsorted_regs[i] < 0
9767 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9768 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9769 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9770 || unsorted_regs[i] > 14)
9771 return 0;
9773 unsorted_offsets[i] = INTVAL (offset);
9774 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9775 order[0] = i;
9777 else
9778 /* Not a suitable memory address. */
9779 return 0;
9782 /* All the useful information has now been extracted from the
9783 operands into unsorted_regs and unsorted_offsets; additionally,
9784 order[0] has been set to the lowest offset in the list. Sort
9785 the offsets into order, verifying that they are adjacent, and
9786 check that the register numbers are ascending. */
9787 if (!compute_offset_order (nops, unsorted_offsets, order,
9788 check_regs ? unsorted_regs : NULL))
9789 return 0;
9791 if (saved_order)
9792 memcpy (saved_order, order, sizeof order);
9794 if (base)
9796 *base = base_reg;
9798 for (i = 0; i < nops; i++)
9800 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9801 if (reg_rtxs)
9802 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9805 *load_offset = unsorted_offsets[order[0]];
9808 if (TARGET_THUMB1
9809 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9810 return 0;
9812 if (unsorted_offsets[order[0]] == 0)
9813 stm_case = 1; /* stmia */
9814 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9815 stm_case = 2; /* stmib */
9816 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9817 stm_case = 3; /* stmda */
9818 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9819 stm_case = 4; /* stmdb */
9820 else
9821 return 0;
9823 if (!multiple_operation_profitable_p (false, nops, 0))
9824 return 0;
9826 return stm_case;
9829 /* Routines for use in generating RTL. */
9831 /* Generate a load-multiple instruction. COUNT is the number of loads in
9832 the instruction; REGS and MEMS are arrays containing the operands.
9833 BASEREG is the base register to be used in addressing the memory operands.
9834 WBACK_OFFSET is nonzero if the instruction should update the base
9835 register. */
9837 static rtx
9838 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9839 HOST_WIDE_INT wback_offset)
9841 int i = 0, j;
9842 rtx result;
9844 if (!multiple_operation_profitable_p (false, count, 0))
9846 rtx seq;
9848 start_sequence ();
9850 for (i = 0; i < count; i++)
9851 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9853 if (wback_offset != 0)
9854 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9856 seq = get_insns ();
9857 end_sequence ();
9859 return seq;
9862 result = gen_rtx_PARALLEL (VOIDmode,
9863 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9864 if (wback_offset != 0)
9866 XVECEXP (result, 0, 0)
9867 = gen_rtx_SET (VOIDmode, basereg,
9868 plus_constant (basereg, wback_offset));
9869 i = 1;
9870 count++;
9873 for (j = 0; i < count; i++, j++)
9874 XVECEXP (result, 0, i)
9875 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9877 return result;
9880 /* Generate a store-multiple instruction. COUNT is the number of stores in
9881 the instruction; REGS and MEMS are arrays containing the operands.
9882 BASEREG is the base register to be used in addressing the memory operands.
9883 WBACK_OFFSET is nonzero if the instruction should update the base
9884 register. */
9886 static rtx
9887 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9888 HOST_WIDE_INT wback_offset)
9890 int i = 0, j;
9891 rtx result;
9893 if (GET_CODE (basereg) == PLUS)
9894 basereg = XEXP (basereg, 0);
9896 if (!multiple_operation_profitable_p (false, count, 0))
9898 rtx seq;
9900 start_sequence ();
9902 for (i = 0; i < count; i++)
9903 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9905 if (wback_offset != 0)
9906 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9908 seq = get_insns ();
9909 end_sequence ();
9911 return seq;
9914 result = gen_rtx_PARALLEL (VOIDmode,
9915 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9916 if (wback_offset != 0)
9918 XVECEXP (result, 0, 0)
9919 = gen_rtx_SET (VOIDmode, basereg,
9920 plus_constant (basereg, wback_offset));
9921 i = 1;
9922 count++;
9925 for (j = 0; i < count; i++, j++)
9926 XVECEXP (result, 0, i)
9927 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9929 return result;
9932 /* Generate either a load-multiple or a store-multiple instruction. This
9933 function can be used in situations where we can start with a single MEM
9934 rtx and adjust its address upwards.
9935 COUNT is the number of operations in the instruction, not counting a
9936 possible update of the base register. REGS is an array containing the
9937 register operands.
9938 BASEREG is the base register to be used in addressing the memory operands,
9939 which are constructed from BASEMEM.
9940 WRITE_BACK specifies whether the generated instruction should include an
9941 update of the base register.
9942 OFFSETP is used to pass an offset to and from this function; this offset
9943 is not used when constructing the address (instead BASEMEM should have an
9944 appropriate offset in its address), it is used only for setting
9945 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9947 static rtx
9948 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9949 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9951 rtx mems[MAX_LDM_STM_OPS];
9952 HOST_WIDE_INT offset = *offsetp;
9953 int i;
9955 gcc_assert (count <= MAX_LDM_STM_OPS);
9957 if (GET_CODE (basereg) == PLUS)
9958 basereg = XEXP (basereg, 0);
9960 for (i = 0; i < count; i++)
9962 rtx addr = plus_constant (basereg, i * 4);
9963 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9964 offset += 4;
9967 if (write_back)
9968 *offsetp = offset;
9970 if (is_load)
9971 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9972 write_back ? 4 * count : 0);
9973 else
9974 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9975 write_back ? 4 * count : 0);
9979 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9980 rtx basemem, HOST_WIDE_INT *offsetp)
9982 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9983 offsetp);
9987 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9988 rtx basemem, HOST_WIDE_INT *offsetp)
9990 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9991 offsetp);
9994 /* Called from a peephole2 expander to turn a sequence of loads into an
9995 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9996 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9997 is true if we can reorder the registers because they are used commutatively
9998 subsequently.
9999 Returns true iff we could generate a new instruction. */
10001 bool
10002 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10004 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10005 rtx mems[MAX_LDM_STM_OPS];
10006 int i, j, base_reg;
10007 rtx base_reg_rtx;
10008 HOST_WIDE_INT offset;
10009 int write_back = FALSE;
10010 int ldm_case;
10011 rtx addr;
10013 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10014 &base_reg, &offset, !sort_regs);
10016 if (ldm_case == 0)
10017 return false;
10019 if (sort_regs)
10020 for (i = 0; i < nops - 1; i++)
10021 for (j = i + 1; j < nops; j++)
10022 if (regs[i] > regs[j])
10024 int t = regs[i];
10025 regs[i] = regs[j];
10026 regs[j] = t;
10028 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10030 if (TARGET_THUMB1)
10032 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10033 gcc_assert (ldm_case == 1 || ldm_case == 5);
10034 write_back = TRUE;
10037 if (ldm_case == 5)
10039 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10040 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10041 offset = 0;
10042 if (!TARGET_THUMB1)
10044 base_reg = regs[0];
10045 base_reg_rtx = newbase;
10049 for (i = 0; i < nops; i++)
10051 addr = plus_constant (base_reg_rtx, offset + i * 4);
10052 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10053 SImode, addr, 0);
10055 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10056 write_back ? offset + i * 4 : 0));
10057 return true;
10060 /* Called from a peephole2 expander to turn a sequence of stores into an
10061 STM instruction. OPERANDS are the operands found by the peephole matcher;
10062 NOPS indicates how many separate stores we are trying to combine.
10063 Returns true iff we could generate a new instruction. */
10065 bool
10066 gen_stm_seq (rtx *operands, int nops)
10068 int i;
10069 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10070 rtx mems[MAX_LDM_STM_OPS];
10071 int base_reg;
10072 rtx base_reg_rtx;
10073 HOST_WIDE_INT offset;
10074 int write_back = FALSE;
10075 int stm_case;
10076 rtx addr;
10077 bool base_reg_dies;
10079 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10080 mem_order, &base_reg, &offset, true);
10082 if (stm_case == 0)
10083 return false;
10085 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10087 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10088 if (TARGET_THUMB1)
10090 gcc_assert (base_reg_dies);
10091 write_back = TRUE;
10094 if (stm_case == 5)
10096 gcc_assert (base_reg_dies);
10097 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10098 offset = 0;
10101 addr = plus_constant (base_reg_rtx, offset);
10103 for (i = 0; i < nops; i++)
10105 addr = plus_constant (base_reg_rtx, offset + i * 4);
10106 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10107 SImode, addr, 0);
10109 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10110 write_back ? offset + i * 4 : 0));
10111 return true;
10114 /* Called from a peephole2 expander to turn a sequence of stores that are
10115 preceded by constant loads into an STM instruction. OPERANDS are the
10116 operands found by the peephole matcher; NOPS indicates how many
10117 separate stores we are trying to combine; there are 2 * NOPS
10118 instructions in the peephole.
10119 Returns true iff we could generate a new instruction. */
10121 bool
10122 gen_const_stm_seq (rtx *operands, int nops)
10124 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10125 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10126 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10127 rtx mems[MAX_LDM_STM_OPS];
10128 int base_reg;
10129 rtx base_reg_rtx;
10130 HOST_WIDE_INT offset;
10131 int write_back = FALSE;
10132 int stm_case;
10133 rtx addr;
10134 bool base_reg_dies;
10135 int i, j;
10136 HARD_REG_SET allocated;
10138 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10139 mem_order, &base_reg, &offset, false);
10141 if (stm_case == 0)
10142 return false;
10144 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10146 /* If the same register is used more than once, try to find a free
10147 register. */
10148 CLEAR_HARD_REG_SET (allocated);
10149 for (i = 0; i < nops; i++)
10151 for (j = i + 1; j < nops; j++)
10152 if (regs[i] == regs[j])
10154 rtx t = peep2_find_free_register (0, nops * 2,
10155 TARGET_THUMB1 ? "l" : "r",
10156 SImode, &allocated);
10157 if (t == NULL_RTX)
10158 return false;
10159 reg_rtxs[i] = t;
10160 regs[i] = REGNO (t);
10164 /* Compute an ordering that maps the register numbers to an ascending
10165 sequence. */
10166 reg_order[0] = 0;
10167 for (i = 0; i < nops; i++)
10168 if (regs[i] < regs[reg_order[0]])
10169 reg_order[0] = i;
10171 for (i = 1; i < nops; i++)
10173 int this_order = reg_order[i - 1];
10174 for (j = 0; j < nops; j++)
10175 if (regs[j] > regs[reg_order[i - 1]]
10176 && (this_order == reg_order[i - 1]
10177 || regs[j] < regs[this_order]))
10178 this_order = j;
10179 reg_order[i] = this_order;
10182 /* Ensure that registers that must be live after the instruction end
10183 up with the correct value. */
10184 for (i = 0; i < nops; i++)
10186 int this_order = reg_order[i];
10187 if ((this_order != mem_order[i]
10188 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10189 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10190 return false;
10193 /* Load the constants. */
10194 for (i = 0; i < nops; i++)
10196 rtx op = operands[2 * nops + mem_order[i]];
10197 sorted_regs[i] = regs[reg_order[i]];
10198 emit_move_insn (reg_rtxs[reg_order[i]], op);
10201 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10203 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10204 if (TARGET_THUMB1)
10206 gcc_assert (base_reg_dies);
10207 write_back = TRUE;
10210 if (stm_case == 5)
10212 gcc_assert (base_reg_dies);
10213 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10214 offset = 0;
10217 addr = plus_constant (base_reg_rtx, offset);
10219 for (i = 0; i < nops; i++)
10221 addr = plus_constant (base_reg_rtx, offset + i * 4);
10222 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10223 SImode, addr, 0);
10225 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10226 write_back ? offset + i * 4 : 0));
10227 return true;
10231 arm_gen_movmemqi (rtx *operands)
10233 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10234 HOST_WIDE_INT srcoffset, dstoffset;
10235 int i;
10236 rtx src, dst, srcbase, dstbase;
10237 rtx part_bytes_reg = NULL;
10238 rtx mem;
10240 if (GET_CODE (operands[2]) != CONST_INT
10241 || GET_CODE (operands[3]) != CONST_INT
10242 || INTVAL (operands[2]) > 64
10243 || INTVAL (operands[3]) & 3)
10244 return 0;
10246 dstbase = operands[0];
10247 srcbase = operands[1];
10249 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10250 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10252 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10253 out_words_to_go = INTVAL (operands[2]) / 4;
10254 last_bytes = INTVAL (operands[2]) & 3;
10255 dstoffset = srcoffset = 0;
10257 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10258 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10260 for (i = 0; in_words_to_go >= 2; i+=4)
10262 if (in_words_to_go > 4)
10263 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10264 TRUE, srcbase, &srcoffset));
10265 else
10266 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10267 src, FALSE, srcbase,
10268 &srcoffset));
10270 if (out_words_to_go)
10272 if (out_words_to_go > 4)
10273 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10274 TRUE, dstbase, &dstoffset));
10275 else if (out_words_to_go != 1)
10276 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10277 out_words_to_go, dst,
10278 (last_bytes == 0
10279 ? FALSE : TRUE),
10280 dstbase, &dstoffset));
10281 else
10283 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10284 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10285 if (last_bytes != 0)
10287 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10288 dstoffset += 4;
10293 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10294 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10297 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10298 if (out_words_to_go)
10300 rtx sreg;
10302 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10303 sreg = copy_to_reg (mem);
10305 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10306 emit_move_insn (mem, sreg);
10307 in_words_to_go--;
10309 gcc_assert (!in_words_to_go); /* Sanity check */
10312 if (in_words_to_go)
10314 gcc_assert (in_words_to_go > 0);
10316 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10317 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10320 gcc_assert (!last_bytes || part_bytes_reg);
10322 if (BYTES_BIG_ENDIAN && last_bytes)
10324 rtx tmp = gen_reg_rtx (SImode);
10326 /* The bytes we want are in the top end of the word. */
10327 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10328 GEN_INT (8 * (4 - last_bytes))));
10329 part_bytes_reg = tmp;
10331 while (last_bytes)
10333 mem = adjust_automodify_address (dstbase, QImode,
10334 plus_constant (dst, last_bytes - 1),
10335 dstoffset + last_bytes - 1);
10336 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10338 if (--last_bytes)
10340 tmp = gen_reg_rtx (SImode);
10341 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10342 part_bytes_reg = tmp;
10347 else
10349 if (last_bytes > 1)
10351 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10352 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10353 last_bytes -= 2;
10354 if (last_bytes)
10356 rtx tmp = gen_reg_rtx (SImode);
10357 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10358 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10359 part_bytes_reg = tmp;
10360 dstoffset += 2;
10364 if (last_bytes)
10366 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10367 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10371 return 1;
10374 /* Select a dominance comparison mode if possible for a test of the general
10375 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10376 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10377 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10378 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10379 In all cases OP will be either EQ or NE, but we don't need to know which
10380 here. If we are unable to support a dominance comparison we return
10381 CC mode. This will then fail to match for the RTL expressions that
10382 generate this call. */
10383 enum machine_mode
10384 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10386 enum rtx_code cond1, cond2;
10387 int swapped = 0;
10389 /* Currently we will probably get the wrong result if the individual
10390 comparisons are not simple. This also ensures that it is safe to
10391 reverse a comparison if necessary. */
10392 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10393 != CCmode)
10394 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10395 != CCmode))
10396 return CCmode;
10398 /* The if_then_else variant of this tests the second condition if the
10399 first passes, but is true if the first fails. Reverse the first
10400 condition to get a true "inclusive-or" expression. */
10401 if (cond_or == DOM_CC_NX_OR_Y)
10402 cond1 = reverse_condition (cond1);
10404 /* If the comparisons are not equal, and one doesn't dominate the other,
10405 then we can't do this. */
10406 if (cond1 != cond2
10407 && !comparison_dominates_p (cond1, cond2)
10408 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10409 return CCmode;
10411 if (swapped)
10413 enum rtx_code temp = cond1;
10414 cond1 = cond2;
10415 cond2 = temp;
10418 switch (cond1)
10420 case EQ:
10421 if (cond_or == DOM_CC_X_AND_Y)
10422 return CC_DEQmode;
10424 switch (cond2)
10426 case EQ: return CC_DEQmode;
10427 case LE: return CC_DLEmode;
10428 case LEU: return CC_DLEUmode;
10429 case GE: return CC_DGEmode;
10430 case GEU: return CC_DGEUmode;
10431 default: gcc_unreachable ();
10434 case LT:
10435 if (cond_or == DOM_CC_X_AND_Y)
10436 return CC_DLTmode;
10438 switch (cond2)
10440 case LT:
10441 return CC_DLTmode;
10442 case LE:
10443 return CC_DLEmode;
10444 case NE:
10445 return CC_DNEmode;
10446 default:
10447 gcc_unreachable ();
10450 case GT:
10451 if (cond_or == DOM_CC_X_AND_Y)
10452 return CC_DGTmode;
10454 switch (cond2)
10456 case GT:
10457 return CC_DGTmode;
10458 case GE:
10459 return CC_DGEmode;
10460 case NE:
10461 return CC_DNEmode;
10462 default:
10463 gcc_unreachable ();
10466 case LTU:
10467 if (cond_or == DOM_CC_X_AND_Y)
10468 return CC_DLTUmode;
10470 switch (cond2)
10472 case LTU:
10473 return CC_DLTUmode;
10474 case LEU:
10475 return CC_DLEUmode;
10476 case NE:
10477 return CC_DNEmode;
10478 default:
10479 gcc_unreachable ();
10482 case GTU:
10483 if (cond_or == DOM_CC_X_AND_Y)
10484 return CC_DGTUmode;
10486 switch (cond2)
10488 case GTU:
10489 return CC_DGTUmode;
10490 case GEU:
10491 return CC_DGEUmode;
10492 case NE:
10493 return CC_DNEmode;
10494 default:
10495 gcc_unreachable ();
10498 /* The remaining cases only occur when both comparisons are the
10499 same. */
10500 case NE:
10501 gcc_assert (cond1 == cond2);
10502 return CC_DNEmode;
10504 case LE:
10505 gcc_assert (cond1 == cond2);
10506 return CC_DLEmode;
10508 case GE:
10509 gcc_assert (cond1 == cond2);
10510 return CC_DGEmode;
10512 case LEU:
10513 gcc_assert (cond1 == cond2);
10514 return CC_DLEUmode;
10516 case GEU:
10517 gcc_assert (cond1 == cond2);
10518 return CC_DGEUmode;
10520 default:
10521 gcc_unreachable ();
10525 enum machine_mode
10526 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10528 /* All floating point compares return CCFP if it is an equality
10529 comparison, and CCFPE otherwise. */
10530 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10532 switch (op)
10534 case EQ:
10535 case NE:
10536 case UNORDERED:
10537 case ORDERED:
10538 case UNLT:
10539 case UNLE:
10540 case UNGT:
10541 case UNGE:
10542 case UNEQ:
10543 case LTGT:
10544 return CCFPmode;
10546 case LT:
10547 case LE:
10548 case GT:
10549 case GE:
10550 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10551 return CCFPmode;
10552 return CCFPEmode;
10554 default:
10555 gcc_unreachable ();
10559 /* A compare with a shifted operand. Because of canonicalization, the
10560 comparison will have to be swapped when we emit the assembler. */
10561 if (GET_MODE (y) == SImode
10562 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10563 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10564 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10565 || GET_CODE (x) == ROTATERT))
10566 return CC_SWPmode;
10568 /* This operation is performed swapped, but since we only rely on the Z
10569 flag we don't need an additional mode. */
10570 if (GET_MODE (y) == SImode
10571 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10572 && GET_CODE (x) == NEG
10573 && (op == EQ || op == NE))
10574 return CC_Zmode;
10576 /* This is a special case that is used by combine to allow a
10577 comparison of a shifted byte load to be split into a zero-extend
10578 followed by a comparison of the shifted integer (only valid for
10579 equalities and unsigned inequalities). */
10580 if (GET_MODE (x) == SImode
10581 && GET_CODE (x) == ASHIFT
10582 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10583 && GET_CODE (XEXP (x, 0)) == SUBREG
10584 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10585 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10586 && (op == EQ || op == NE
10587 || op == GEU || op == GTU || op == LTU || op == LEU)
10588 && GET_CODE (y) == CONST_INT)
10589 return CC_Zmode;
10591 /* A construct for a conditional compare, if the false arm contains
10592 0, then both conditions must be true, otherwise either condition
10593 must be true. Not all conditions are possible, so CCmode is
10594 returned if it can't be done. */
10595 if (GET_CODE (x) == IF_THEN_ELSE
10596 && (XEXP (x, 2) == const0_rtx
10597 || XEXP (x, 2) == const1_rtx)
10598 && COMPARISON_P (XEXP (x, 0))
10599 && COMPARISON_P (XEXP (x, 1)))
10600 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10601 INTVAL (XEXP (x, 2)));
10603 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10604 if (GET_CODE (x) == AND
10605 && (op == EQ || op == NE)
10606 && COMPARISON_P (XEXP (x, 0))
10607 && COMPARISON_P (XEXP (x, 1)))
10608 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10609 DOM_CC_X_AND_Y);
10611 if (GET_CODE (x) == IOR
10612 && (op == EQ || op == NE)
10613 && COMPARISON_P (XEXP (x, 0))
10614 && COMPARISON_P (XEXP (x, 1)))
10615 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10616 DOM_CC_X_OR_Y);
10618 /* An operation (on Thumb) where we want to test for a single bit.
10619 This is done by shifting that bit up into the top bit of a
10620 scratch register; we can then branch on the sign bit. */
10621 if (TARGET_THUMB1
10622 && GET_MODE (x) == SImode
10623 && (op == EQ || op == NE)
10624 && GET_CODE (x) == ZERO_EXTRACT
10625 && XEXP (x, 1) == const1_rtx)
10626 return CC_Nmode;
10628 /* An operation that sets the condition codes as a side-effect, the
10629 V flag is not set correctly, so we can only use comparisons where
10630 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10631 instead.) */
10632 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10633 if (GET_MODE (x) == SImode
10634 && y == const0_rtx
10635 && (op == EQ || op == NE || op == LT || op == GE)
10636 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10637 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10638 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10639 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10640 || GET_CODE (x) == LSHIFTRT
10641 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10642 || GET_CODE (x) == ROTATERT
10643 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10644 return CC_NOOVmode;
10646 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10647 return CC_Zmode;
10649 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10650 && GET_CODE (x) == PLUS
10651 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10652 return CC_Cmode;
10654 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10656 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10657 available. */
10658 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10659 return CCmode;
10661 switch (op)
10663 case EQ:
10664 case NE:
10665 /* A DImode comparison against zero can be implemented by
10666 or'ing the two halves together. */
10667 if (y == const0_rtx)
10668 return CC_Zmode;
10670 /* We can do an equality test in three Thumb instructions. */
10671 if (!TARGET_ARM)
10672 return CC_Zmode;
10674 /* FALLTHROUGH */
10676 case LTU:
10677 case LEU:
10678 case GTU:
10679 case GEU:
10680 /* DImode unsigned comparisons can be implemented by cmp +
10681 cmpeq without a scratch register. Not worth doing in
10682 Thumb-2. */
10683 if (TARGET_ARM)
10684 return CC_CZmode;
10686 /* FALLTHROUGH */
10688 case LT:
10689 case LE:
10690 case GT:
10691 case GE:
10692 /* DImode signed and unsigned comparisons can be implemented
10693 by cmp + sbcs with a scratch register, but that does not
10694 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10695 gcc_assert (op != EQ && op != NE);
10696 return CC_NCVmode;
10698 default:
10699 gcc_unreachable ();
10703 return CCmode;
10706 /* X and Y are two things to compare using CODE. Emit the compare insn and
10707 return the rtx for register 0 in the proper mode. FP means this is a
10708 floating point compare: I don't think that it is needed on the arm. */
10710 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10712 enum machine_mode mode;
10713 rtx cc_reg;
10714 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10716 /* We might have X as a constant, Y as a register because of the predicates
10717 used for cmpdi. If so, force X to a register here. */
10718 if (dimode_comparison && !REG_P (x))
10719 x = force_reg (DImode, x);
10721 mode = SELECT_CC_MODE (code, x, y);
10722 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10724 if (dimode_comparison
10725 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10726 && mode != CC_CZmode)
10728 rtx clobber, set;
10730 /* To compare two non-zero values for equality, XOR them and
10731 then compare against zero. Not used for ARM mode; there
10732 CC_CZmode is cheaper. */
10733 if (mode == CC_Zmode && y != const0_rtx)
10735 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10736 y = const0_rtx;
10738 /* A scratch register is required. */
10739 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10740 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10741 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10743 else
10744 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10746 return cc_reg;
10749 /* Generate a sequence of insns that will generate the correct return
10750 address mask depending on the physical architecture that the program
10751 is running on. */
10753 arm_gen_return_addr_mask (void)
10755 rtx reg = gen_reg_rtx (Pmode);
10757 emit_insn (gen_return_addr_mask (reg));
10758 return reg;
10761 void
10762 arm_reload_in_hi (rtx *operands)
10764 rtx ref = operands[1];
10765 rtx base, scratch;
10766 HOST_WIDE_INT offset = 0;
10768 if (GET_CODE (ref) == SUBREG)
10770 offset = SUBREG_BYTE (ref);
10771 ref = SUBREG_REG (ref);
10774 if (GET_CODE (ref) == REG)
10776 /* We have a pseudo which has been spilt onto the stack; there
10777 are two cases here: the first where there is a simple
10778 stack-slot replacement and a second where the stack-slot is
10779 out of range, or is used as a subreg. */
10780 if (reg_equiv_mem[REGNO (ref)])
10782 ref = reg_equiv_mem[REGNO (ref)];
10783 base = find_replacement (&XEXP (ref, 0));
10785 else
10786 /* The slot is out of range, or was dressed up in a SUBREG. */
10787 base = reg_equiv_address[REGNO (ref)];
10789 else
10790 base = find_replacement (&XEXP (ref, 0));
10792 /* Handle the case where the address is too complex to be offset by 1. */
10793 if (GET_CODE (base) == MINUS
10794 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10796 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10798 emit_set_insn (base_plus, base);
10799 base = base_plus;
10801 else if (GET_CODE (base) == PLUS)
10803 /* The addend must be CONST_INT, or we would have dealt with it above. */
10804 HOST_WIDE_INT hi, lo;
10806 offset += INTVAL (XEXP (base, 1));
10807 base = XEXP (base, 0);
10809 /* Rework the address into a legal sequence of insns. */
10810 /* Valid range for lo is -4095 -> 4095 */
10811 lo = (offset >= 0
10812 ? (offset & 0xfff)
10813 : -((-offset) & 0xfff));
10815 /* Corner case, if lo is the max offset then we would be out of range
10816 once we have added the additional 1 below, so bump the msb into the
10817 pre-loading insn(s). */
10818 if (lo == 4095)
10819 lo &= 0x7ff;
10821 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10822 ^ (HOST_WIDE_INT) 0x80000000)
10823 - (HOST_WIDE_INT) 0x80000000);
10825 gcc_assert (hi + lo == offset);
10827 if (hi != 0)
10829 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10831 /* Get the base address; addsi3 knows how to handle constants
10832 that require more than one insn. */
10833 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10834 base = base_plus;
10835 offset = lo;
10839 /* Operands[2] may overlap operands[0] (though it won't overlap
10840 operands[1]), that's why we asked for a DImode reg -- so we can
10841 use the bit that does not overlap. */
10842 if (REGNO (operands[2]) == REGNO (operands[0]))
10843 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10844 else
10845 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10847 emit_insn (gen_zero_extendqisi2 (scratch,
10848 gen_rtx_MEM (QImode,
10849 plus_constant (base,
10850 offset))));
10851 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10852 gen_rtx_MEM (QImode,
10853 plus_constant (base,
10854 offset + 1))));
10855 if (!BYTES_BIG_ENDIAN)
10856 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10857 gen_rtx_IOR (SImode,
10858 gen_rtx_ASHIFT
10859 (SImode,
10860 gen_rtx_SUBREG (SImode, operands[0], 0),
10861 GEN_INT (8)),
10862 scratch));
10863 else
10864 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10865 gen_rtx_IOR (SImode,
10866 gen_rtx_ASHIFT (SImode, scratch,
10867 GEN_INT (8)),
10868 gen_rtx_SUBREG (SImode, operands[0], 0)));
10871 /* Handle storing a half-word to memory during reload by synthesizing as two
10872 byte stores. Take care not to clobber the input values until after we
10873 have moved them somewhere safe. This code assumes that if the DImode
10874 scratch in operands[2] overlaps either the input value or output address
10875 in some way, then that value must die in this insn (we absolutely need
10876 two scratch registers for some corner cases). */
10877 void
10878 arm_reload_out_hi (rtx *operands)
10880 rtx ref = operands[0];
10881 rtx outval = operands[1];
10882 rtx base, scratch;
10883 HOST_WIDE_INT offset = 0;
10885 if (GET_CODE (ref) == SUBREG)
10887 offset = SUBREG_BYTE (ref);
10888 ref = SUBREG_REG (ref);
10891 if (GET_CODE (ref) == REG)
10893 /* We have a pseudo which has been spilt onto the stack; there
10894 are two cases here: the first where there is a simple
10895 stack-slot replacement and a second where the stack-slot is
10896 out of range, or is used as a subreg. */
10897 if (reg_equiv_mem[REGNO (ref)])
10899 ref = reg_equiv_mem[REGNO (ref)];
10900 base = find_replacement (&XEXP (ref, 0));
10902 else
10903 /* The slot is out of range, or was dressed up in a SUBREG. */
10904 base = reg_equiv_address[REGNO (ref)];
10906 else
10907 base = find_replacement (&XEXP (ref, 0));
10909 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10911 /* Handle the case where the address is too complex to be offset by 1. */
10912 if (GET_CODE (base) == MINUS
10913 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10915 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10917 /* Be careful not to destroy OUTVAL. */
10918 if (reg_overlap_mentioned_p (base_plus, outval))
10920 /* Updating base_plus might destroy outval, see if we can
10921 swap the scratch and base_plus. */
10922 if (!reg_overlap_mentioned_p (scratch, outval))
10924 rtx tmp = scratch;
10925 scratch = base_plus;
10926 base_plus = tmp;
10928 else
10930 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10932 /* Be conservative and copy OUTVAL into the scratch now,
10933 this should only be necessary if outval is a subreg
10934 of something larger than a word. */
10935 /* XXX Might this clobber base? I can't see how it can,
10936 since scratch is known to overlap with OUTVAL, and
10937 must be wider than a word. */
10938 emit_insn (gen_movhi (scratch_hi, outval));
10939 outval = scratch_hi;
10943 emit_set_insn (base_plus, base);
10944 base = base_plus;
10946 else if (GET_CODE (base) == PLUS)
10948 /* The addend must be CONST_INT, or we would have dealt with it above. */
10949 HOST_WIDE_INT hi, lo;
10951 offset += INTVAL (XEXP (base, 1));
10952 base = XEXP (base, 0);
10954 /* Rework the address into a legal sequence of insns. */
10955 /* Valid range for lo is -4095 -> 4095 */
10956 lo = (offset >= 0
10957 ? (offset & 0xfff)
10958 : -((-offset) & 0xfff));
10960 /* Corner case, if lo is the max offset then we would be out of range
10961 once we have added the additional 1 below, so bump the msb into the
10962 pre-loading insn(s). */
10963 if (lo == 4095)
10964 lo &= 0x7ff;
10966 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10967 ^ (HOST_WIDE_INT) 0x80000000)
10968 - (HOST_WIDE_INT) 0x80000000);
10970 gcc_assert (hi + lo == offset);
10972 if (hi != 0)
10974 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10976 /* Be careful not to destroy OUTVAL. */
10977 if (reg_overlap_mentioned_p (base_plus, outval))
10979 /* Updating base_plus might destroy outval, see if we
10980 can swap the scratch and base_plus. */
10981 if (!reg_overlap_mentioned_p (scratch, outval))
10983 rtx tmp = scratch;
10984 scratch = base_plus;
10985 base_plus = tmp;
10987 else
10989 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10991 /* Be conservative and copy outval into scratch now,
10992 this should only be necessary if outval is a
10993 subreg of something larger than a word. */
10994 /* XXX Might this clobber base? I can't see how it
10995 can, since scratch is known to overlap with
10996 outval. */
10997 emit_insn (gen_movhi (scratch_hi, outval));
10998 outval = scratch_hi;
11002 /* Get the base address; addsi3 knows how to handle constants
11003 that require more than one insn. */
11004 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11005 base = base_plus;
11006 offset = lo;
11010 if (BYTES_BIG_ENDIAN)
11012 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11013 plus_constant (base, offset + 1)),
11014 gen_lowpart (QImode, outval)));
11015 emit_insn (gen_lshrsi3 (scratch,
11016 gen_rtx_SUBREG (SImode, outval, 0),
11017 GEN_INT (8)));
11018 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11019 gen_lowpart (QImode, scratch)));
11021 else
11023 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11024 gen_lowpart (QImode, outval)));
11025 emit_insn (gen_lshrsi3 (scratch,
11026 gen_rtx_SUBREG (SImode, outval, 0),
11027 GEN_INT (8)));
11028 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11029 plus_constant (base, offset + 1)),
11030 gen_lowpart (QImode, scratch)));
11034 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11035 (padded to the size of a word) should be passed in a register. */
11037 static bool
11038 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11040 if (TARGET_AAPCS_BASED)
11041 return must_pass_in_stack_var_size (mode, type);
11042 else
11043 return must_pass_in_stack_var_size_or_pad (mode, type);
11047 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11048 Return true if an argument passed on the stack should be padded upwards,
11049 i.e. if the least-significant byte has useful data.
11050 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11051 aggregate types are placed in the lowest memory address. */
11053 bool
11054 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11056 if (!TARGET_AAPCS_BASED)
11057 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11059 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11060 return false;
11062 return true;
11066 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11067 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11068 byte of the register has useful data, and return the opposite if the
11069 most significant byte does.
11070 For AAPCS, small aggregates and small complex types are always padded
11071 upwards. */
11073 bool
11074 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11075 tree type, int first ATTRIBUTE_UNUSED)
11077 if (TARGET_AAPCS_BASED
11078 && BYTES_BIG_ENDIAN
11079 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11080 && int_size_in_bytes (type) <= 4)
11081 return true;
11083 /* Otherwise, use default padding. */
11084 return !BYTES_BIG_ENDIAN;
11088 /* Print a symbolic form of X to the debug file, F. */
11089 static void
11090 arm_print_value (FILE *f, rtx x)
11092 switch (GET_CODE (x))
11094 case CONST_INT:
11095 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11096 return;
11098 case CONST_DOUBLE:
11099 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11100 return;
11102 case CONST_VECTOR:
11104 int i;
11106 fprintf (f, "<");
11107 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11109 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11110 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11111 fputc (',', f);
11113 fprintf (f, ">");
11115 return;
11117 case CONST_STRING:
11118 fprintf (f, "\"%s\"", XSTR (x, 0));
11119 return;
11121 case SYMBOL_REF:
11122 fprintf (f, "`%s'", XSTR (x, 0));
11123 return;
11125 case LABEL_REF:
11126 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11127 return;
11129 case CONST:
11130 arm_print_value (f, XEXP (x, 0));
11131 return;
11133 case PLUS:
11134 arm_print_value (f, XEXP (x, 0));
11135 fprintf (f, "+");
11136 arm_print_value (f, XEXP (x, 1));
11137 return;
11139 case PC:
11140 fprintf (f, "pc");
11141 return;
11143 default:
11144 fprintf (f, "????");
11145 return;
11149 /* Routines for manipulation of the constant pool. */
11151 /* Arm instructions cannot load a large constant directly into a
11152 register; they have to come from a pc relative load. The constant
11153 must therefore be placed in the addressable range of the pc
11154 relative load. Depending on the precise pc relative load
11155 instruction the range is somewhere between 256 bytes and 4k. This
11156 means that we often have to dump a constant inside a function, and
11157 generate code to branch around it.
11159 It is important to minimize this, since the branches will slow
11160 things down and make the code larger.
11162 Normally we can hide the table after an existing unconditional
11163 branch so that there is no interruption of the flow, but in the
11164 worst case the code looks like this:
11166 ldr rn, L1
11168 b L2
11169 align
11170 L1: .long value
11174 ldr rn, L3
11176 b L4
11177 align
11178 L3: .long value
11182 We fix this by performing a scan after scheduling, which notices
11183 which instructions need to have their operands fetched from the
11184 constant table and builds the table.
11186 The algorithm starts by building a table of all the constants that
11187 need fixing up and all the natural barriers in the function (places
11188 where a constant table can be dropped without breaking the flow).
11189 For each fixup we note how far the pc-relative replacement will be
11190 able to reach and the offset of the instruction into the function.
11192 Having built the table we then group the fixes together to form
11193 tables that are as large as possible (subject to addressing
11194 constraints) and emit each table of constants after the last
11195 barrier that is within range of all the instructions in the group.
11196 If a group does not contain a barrier, then we forcibly create one
11197 by inserting a jump instruction into the flow. Once the table has
11198 been inserted, the insns are then modified to reference the
11199 relevant entry in the pool.
11201 Possible enhancements to the algorithm (not implemented) are:
11203 1) For some processors and object formats, there may be benefit in
11204 aligning the pools to the start of cache lines; this alignment
11205 would need to be taken into account when calculating addressability
11206 of a pool. */
11208 /* These typedefs are located at the start of this file, so that
11209 they can be used in the prototypes there. This comment is to
11210 remind readers of that fact so that the following structures
11211 can be understood more easily.
11213 typedef struct minipool_node Mnode;
11214 typedef struct minipool_fixup Mfix; */
11216 struct minipool_node
11218 /* Doubly linked chain of entries. */
11219 Mnode * next;
11220 Mnode * prev;
11221 /* The maximum offset into the code that this entry can be placed. While
11222 pushing fixes for forward references, all entries are sorted in order
11223 of increasing max_address. */
11224 HOST_WIDE_INT max_address;
11225 /* Similarly for an entry inserted for a backwards ref. */
11226 HOST_WIDE_INT min_address;
11227 /* The number of fixes referencing this entry. This can become zero
11228 if we "unpush" an entry. In this case we ignore the entry when we
11229 come to emit the code. */
11230 int refcount;
11231 /* The offset from the start of the minipool. */
11232 HOST_WIDE_INT offset;
11233 /* The value in table. */
11234 rtx value;
11235 /* The mode of value. */
11236 enum machine_mode mode;
11237 /* The size of the value. With iWMMXt enabled
11238 sizes > 4 also imply an alignment of 8-bytes. */
11239 int fix_size;
11242 struct minipool_fixup
11244 Mfix * next;
11245 rtx insn;
11246 HOST_WIDE_INT address;
11247 rtx * loc;
11248 enum machine_mode mode;
11249 int fix_size;
11250 rtx value;
11251 Mnode * minipool;
11252 HOST_WIDE_INT forwards;
11253 HOST_WIDE_INT backwards;
11256 /* Fixes less than a word need padding out to a word boundary. */
11257 #define MINIPOOL_FIX_SIZE(mode) \
11258 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11260 static Mnode * minipool_vector_head;
11261 static Mnode * minipool_vector_tail;
11262 static rtx minipool_vector_label;
11263 static int minipool_pad;
11265 /* The linked list of all minipool fixes required for this function. */
11266 Mfix * minipool_fix_head;
11267 Mfix * minipool_fix_tail;
11268 /* The fix entry for the current minipool, once it has been placed. */
11269 Mfix * minipool_barrier;
11271 /* Determines if INSN is the start of a jump table. Returns the end
11272 of the TABLE or NULL_RTX. */
11273 static rtx
11274 is_jump_table (rtx insn)
11276 rtx table;
11278 if (GET_CODE (insn) == JUMP_INSN
11279 && JUMP_LABEL (insn) != NULL
11280 && ((table = next_real_insn (JUMP_LABEL (insn)))
11281 == next_real_insn (insn))
11282 && table != NULL
11283 && GET_CODE (table) == JUMP_INSN
11284 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11285 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11286 return table;
11288 return NULL_RTX;
11291 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11292 #define JUMP_TABLES_IN_TEXT_SECTION 0
11293 #endif
11295 static HOST_WIDE_INT
11296 get_jump_table_size (rtx insn)
11298 /* ADDR_VECs only take room if read-only data does into the text
11299 section. */
11300 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11302 rtx body = PATTERN (insn);
11303 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11304 HOST_WIDE_INT size;
11305 HOST_WIDE_INT modesize;
11307 modesize = GET_MODE_SIZE (GET_MODE (body));
11308 size = modesize * XVECLEN (body, elt);
11309 switch (modesize)
11311 case 1:
11312 /* Round up size of TBB table to a halfword boundary. */
11313 size = (size + 1) & ~(HOST_WIDE_INT)1;
11314 break;
11315 case 2:
11316 /* No padding necessary for TBH. */
11317 break;
11318 case 4:
11319 /* Add two bytes for alignment on Thumb. */
11320 if (TARGET_THUMB)
11321 size += 2;
11322 break;
11323 default:
11324 gcc_unreachable ();
11326 return size;
11329 return 0;
11332 /* Move a minipool fix MP from its current location to before MAX_MP.
11333 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11334 constraints may need updating. */
11335 static Mnode *
11336 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11337 HOST_WIDE_INT max_address)
11339 /* The code below assumes these are different. */
11340 gcc_assert (mp != max_mp);
11342 if (max_mp == NULL)
11344 if (max_address < mp->max_address)
11345 mp->max_address = max_address;
11347 else
11349 if (max_address > max_mp->max_address - mp->fix_size)
11350 mp->max_address = max_mp->max_address - mp->fix_size;
11351 else
11352 mp->max_address = max_address;
11354 /* Unlink MP from its current position. Since max_mp is non-null,
11355 mp->prev must be non-null. */
11356 mp->prev->next = mp->next;
11357 if (mp->next != NULL)
11358 mp->next->prev = mp->prev;
11359 else
11360 minipool_vector_tail = mp->prev;
11362 /* Re-insert it before MAX_MP. */
11363 mp->next = max_mp;
11364 mp->prev = max_mp->prev;
11365 max_mp->prev = mp;
11367 if (mp->prev != NULL)
11368 mp->prev->next = mp;
11369 else
11370 minipool_vector_head = mp;
11373 /* Save the new entry. */
11374 max_mp = mp;
11376 /* Scan over the preceding entries and adjust their addresses as
11377 required. */
11378 while (mp->prev != NULL
11379 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11381 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11382 mp = mp->prev;
11385 return max_mp;
11388 /* Add a constant to the minipool for a forward reference. Returns the
11389 node added or NULL if the constant will not fit in this pool. */
11390 static Mnode *
11391 add_minipool_forward_ref (Mfix *fix)
11393 /* If set, max_mp is the first pool_entry that has a lower
11394 constraint than the one we are trying to add. */
11395 Mnode * max_mp = NULL;
11396 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11397 Mnode * mp;
11399 /* If the minipool starts before the end of FIX->INSN then this FIX
11400 can not be placed into the current pool. Furthermore, adding the
11401 new constant pool entry may cause the pool to start FIX_SIZE bytes
11402 earlier. */
11403 if (minipool_vector_head &&
11404 (fix->address + get_attr_length (fix->insn)
11405 >= minipool_vector_head->max_address - fix->fix_size))
11406 return NULL;
11408 /* Scan the pool to see if a constant with the same value has
11409 already been added. While we are doing this, also note the
11410 location where we must insert the constant if it doesn't already
11411 exist. */
11412 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11414 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11415 && fix->mode == mp->mode
11416 && (GET_CODE (fix->value) != CODE_LABEL
11417 || (CODE_LABEL_NUMBER (fix->value)
11418 == CODE_LABEL_NUMBER (mp->value)))
11419 && rtx_equal_p (fix->value, mp->value))
11421 /* More than one fix references this entry. */
11422 mp->refcount++;
11423 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11426 /* Note the insertion point if necessary. */
11427 if (max_mp == NULL
11428 && mp->max_address > max_address)
11429 max_mp = mp;
11431 /* If we are inserting an 8-bytes aligned quantity and
11432 we have not already found an insertion point, then
11433 make sure that all such 8-byte aligned quantities are
11434 placed at the start of the pool. */
11435 if (ARM_DOUBLEWORD_ALIGN
11436 && max_mp == NULL
11437 && fix->fix_size >= 8
11438 && mp->fix_size < 8)
11440 max_mp = mp;
11441 max_address = mp->max_address;
11445 /* The value is not currently in the minipool, so we need to create
11446 a new entry for it. If MAX_MP is NULL, the entry will be put on
11447 the end of the list since the placement is less constrained than
11448 any existing entry. Otherwise, we insert the new fix before
11449 MAX_MP and, if necessary, adjust the constraints on the other
11450 entries. */
11451 mp = XNEW (Mnode);
11452 mp->fix_size = fix->fix_size;
11453 mp->mode = fix->mode;
11454 mp->value = fix->value;
11455 mp->refcount = 1;
11456 /* Not yet required for a backwards ref. */
11457 mp->min_address = -65536;
11459 if (max_mp == NULL)
11461 mp->max_address = max_address;
11462 mp->next = NULL;
11463 mp->prev = minipool_vector_tail;
11465 if (mp->prev == NULL)
11467 minipool_vector_head = mp;
11468 minipool_vector_label = gen_label_rtx ();
11470 else
11471 mp->prev->next = mp;
11473 minipool_vector_tail = mp;
11475 else
11477 if (max_address > max_mp->max_address - mp->fix_size)
11478 mp->max_address = max_mp->max_address - mp->fix_size;
11479 else
11480 mp->max_address = max_address;
11482 mp->next = max_mp;
11483 mp->prev = max_mp->prev;
11484 max_mp->prev = mp;
11485 if (mp->prev != NULL)
11486 mp->prev->next = mp;
11487 else
11488 minipool_vector_head = mp;
11491 /* Save the new entry. */
11492 max_mp = mp;
11494 /* Scan over the preceding entries and adjust their addresses as
11495 required. */
11496 while (mp->prev != NULL
11497 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11499 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11500 mp = mp->prev;
11503 return max_mp;
11506 static Mnode *
11507 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11508 HOST_WIDE_INT min_address)
11510 HOST_WIDE_INT offset;
11512 /* The code below assumes these are different. */
11513 gcc_assert (mp != min_mp);
11515 if (min_mp == NULL)
11517 if (min_address > mp->min_address)
11518 mp->min_address = min_address;
11520 else
11522 /* We will adjust this below if it is too loose. */
11523 mp->min_address = min_address;
11525 /* Unlink MP from its current position. Since min_mp is non-null,
11526 mp->next must be non-null. */
11527 mp->next->prev = mp->prev;
11528 if (mp->prev != NULL)
11529 mp->prev->next = mp->next;
11530 else
11531 minipool_vector_head = mp->next;
11533 /* Reinsert it after MIN_MP. */
11534 mp->prev = min_mp;
11535 mp->next = min_mp->next;
11536 min_mp->next = mp;
11537 if (mp->next != NULL)
11538 mp->next->prev = mp;
11539 else
11540 minipool_vector_tail = mp;
11543 min_mp = mp;
11545 offset = 0;
11546 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11548 mp->offset = offset;
11549 if (mp->refcount > 0)
11550 offset += mp->fix_size;
11552 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11553 mp->next->min_address = mp->min_address + mp->fix_size;
11556 return min_mp;
11559 /* Add a constant to the minipool for a backward reference. Returns the
11560 node added or NULL if the constant will not fit in this pool.
11562 Note that the code for insertion for a backwards reference can be
11563 somewhat confusing because the calculated offsets for each fix do
11564 not take into account the size of the pool (which is still under
11565 construction. */
11566 static Mnode *
11567 add_minipool_backward_ref (Mfix *fix)
11569 /* If set, min_mp is the last pool_entry that has a lower constraint
11570 than the one we are trying to add. */
11571 Mnode *min_mp = NULL;
11572 /* This can be negative, since it is only a constraint. */
11573 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11574 Mnode *mp;
11576 /* If we can't reach the current pool from this insn, or if we can't
11577 insert this entry at the end of the pool without pushing other
11578 fixes out of range, then we don't try. This ensures that we
11579 can't fail later on. */
11580 if (min_address >= minipool_barrier->address
11581 || (minipool_vector_tail->min_address + fix->fix_size
11582 >= minipool_barrier->address))
11583 return NULL;
11585 /* Scan the pool to see if a constant with the same value has
11586 already been added. While we are doing this, also note the
11587 location where we must insert the constant if it doesn't already
11588 exist. */
11589 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11591 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11592 && fix->mode == mp->mode
11593 && (GET_CODE (fix->value) != CODE_LABEL
11594 || (CODE_LABEL_NUMBER (fix->value)
11595 == CODE_LABEL_NUMBER (mp->value)))
11596 && rtx_equal_p (fix->value, mp->value)
11597 /* Check that there is enough slack to move this entry to the
11598 end of the table (this is conservative). */
11599 && (mp->max_address
11600 > (minipool_barrier->address
11601 + minipool_vector_tail->offset
11602 + minipool_vector_tail->fix_size)))
11604 mp->refcount++;
11605 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11608 if (min_mp != NULL)
11609 mp->min_address += fix->fix_size;
11610 else
11612 /* Note the insertion point if necessary. */
11613 if (mp->min_address < min_address)
11615 /* For now, we do not allow the insertion of 8-byte alignment
11616 requiring nodes anywhere but at the start of the pool. */
11617 if (ARM_DOUBLEWORD_ALIGN
11618 && fix->fix_size >= 8 && mp->fix_size < 8)
11619 return NULL;
11620 else
11621 min_mp = mp;
11623 else if (mp->max_address
11624 < minipool_barrier->address + mp->offset + fix->fix_size)
11626 /* Inserting before this entry would push the fix beyond
11627 its maximum address (which can happen if we have
11628 re-located a forwards fix); force the new fix to come
11629 after it. */
11630 if (ARM_DOUBLEWORD_ALIGN
11631 && fix->fix_size >= 8 && mp->fix_size < 8)
11632 return NULL;
11633 else
11635 min_mp = mp;
11636 min_address = mp->min_address + fix->fix_size;
11639 /* Do not insert a non-8-byte aligned quantity before 8-byte
11640 aligned quantities. */
11641 else if (ARM_DOUBLEWORD_ALIGN
11642 && fix->fix_size < 8
11643 && mp->fix_size >= 8)
11645 min_mp = mp;
11646 min_address = mp->min_address + fix->fix_size;
11651 /* We need to create a new entry. */
11652 mp = XNEW (Mnode);
11653 mp->fix_size = fix->fix_size;
11654 mp->mode = fix->mode;
11655 mp->value = fix->value;
11656 mp->refcount = 1;
11657 mp->max_address = minipool_barrier->address + 65536;
11659 mp->min_address = min_address;
11661 if (min_mp == NULL)
11663 mp->prev = NULL;
11664 mp->next = minipool_vector_head;
11666 if (mp->next == NULL)
11668 minipool_vector_tail = mp;
11669 minipool_vector_label = gen_label_rtx ();
11671 else
11672 mp->next->prev = mp;
11674 minipool_vector_head = mp;
11676 else
11678 mp->next = min_mp->next;
11679 mp->prev = min_mp;
11680 min_mp->next = mp;
11682 if (mp->next != NULL)
11683 mp->next->prev = mp;
11684 else
11685 minipool_vector_tail = mp;
11688 /* Save the new entry. */
11689 min_mp = mp;
11691 if (mp->prev)
11692 mp = mp->prev;
11693 else
11694 mp->offset = 0;
11696 /* Scan over the following entries and adjust their offsets. */
11697 while (mp->next != NULL)
11699 if (mp->next->min_address < mp->min_address + mp->fix_size)
11700 mp->next->min_address = mp->min_address + mp->fix_size;
11702 if (mp->refcount)
11703 mp->next->offset = mp->offset + mp->fix_size;
11704 else
11705 mp->next->offset = mp->offset;
11707 mp = mp->next;
11710 return min_mp;
11713 static void
11714 assign_minipool_offsets (Mfix *barrier)
11716 HOST_WIDE_INT offset = 0;
11717 Mnode *mp;
11719 minipool_barrier = barrier;
11721 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11723 mp->offset = offset;
11725 if (mp->refcount > 0)
11726 offset += mp->fix_size;
11730 /* Output the literal table */
11731 static void
11732 dump_minipool (rtx scan)
11734 Mnode * mp;
11735 Mnode * nmp;
11736 int align64 = 0;
11738 if (ARM_DOUBLEWORD_ALIGN)
11739 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11740 if (mp->refcount > 0 && mp->fix_size >= 8)
11742 align64 = 1;
11743 break;
11746 if (dump_file)
11747 fprintf (dump_file,
11748 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11749 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11751 scan = emit_label_after (gen_label_rtx (), scan);
11752 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11753 scan = emit_label_after (minipool_vector_label, scan);
11755 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11757 if (mp->refcount > 0)
11759 if (dump_file)
11761 fprintf (dump_file,
11762 ";; Offset %u, min %ld, max %ld ",
11763 (unsigned) mp->offset, (unsigned long) mp->min_address,
11764 (unsigned long) mp->max_address);
11765 arm_print_value (dump_file, mp->value);
11766 fputc ('\n', dump_file);
11769 switch (mp->fix_size)
11771 #ifdef HAVE_consttable_1
11772 case 1:
11773 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11774 break;
11776 #endif
11777 #ifdef HAVE_consttable_2
11778 case 2:
11779 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11780 break;
11782 #endif
11783 #ifdef HAVE_consttable_4
11784 case 4:
11785 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11786 break;
11788 #endif
11789 #ifdef HAVE_consttable_8
11790 case 8:
11791 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11792 break;
11794 #endif
11795 #ifdef HAVE_consttable_16
11796 case 16:
11797 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11798 break;
11800 #endif
11801 default:
11802 gcc_unreachable ();
11806 nmp = mp->next;
11807 free (mp);
11810 minipool_vector_head = minipool_vector_tail = NULL;
11811 scan = emit_insn_after (gen_consttable_end (), scan);
11812 scan = emit_barrier_after (scan);
11815 /* Return the cost of forcibly inserting a barrier after INSN. */
11816 static int
11817 arm_barrier_cost (rtx insn)
11819 /* Basing the location of the pool on the loop depth is preferable,
11820 but at the moment, the basic block information seems to be
11821 corrupt by this stage of the compilation. */
11822 int base_cost = 50;
11823 rtx next = next_nonnote_insn (insn);
11825 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11826 base_cost -= 20;
11828 switch (GET_CODE (insn))
11830 case CODE_LABEL:
11831 /* It will always be better to place the table before the label, rather
11832 than after it. */
11833 return 50;
11835 case INSN:
11836 case CALL_INSN:
11837 return base_cost;
11839 case JUMP_INSN:
11840 return base_cost - 10;
11842 default:
11843 return base_cost + 10;
11847 /* Find the best place in the insn stream in the range
11848 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11849 Create the barrier by inserting a jump and add a new fix entry for
11850 it. */
11851 static Mfix *
11852 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11854 HOST_WIDE_INT count = 0;
11855 rtx barrier;
11856 rtx from = fix->insn;
11857 /* The instruction after which we will insert the jump. */
11858 rtx selected = NULL;
11859 int selected_cost;
11860 /* The address at which the jump instruction will be placed. */
11861 HOST_WIDE_INT selected_address;
11862 Mfix * new_fix;
11863 HOST_WIDE_INT max_count = max_address - fix->address;
11864 rtx label = gen_label_rtx ();
11866 selected_cost = arm_barrier_cost (from);
11867 selected_address = fix->address;
11869 while (from && count < max_count)
11871 rtx tmp;
11872 int new_cost;
11874 /* This code shouldn't have been called if there was a natural barrier
11875 within range. */
11876 gcc_assert (GET_CODE (from) != BARRIER);
11878 /* Count the length of this insn. */
11879 count += get_attr_length (from);
11881 /* If there is a jump table, add its length. */
11882 tmp = is_jump_table (from);
11883 if (tmp != NULL)
11885 count += get_jump_table_size (tmp);
11887 /* Jump tables aren't in a basic block, so base the cost on
11888 the dispatch insn. If we select this location, we will
11889 still put the pool after the table. */
11890 new_cost = arm_barrier_cost (from);
11892 if (count < max_count
11893 && (!selected || new_cost <= selected_cost))
11895 selected = tmp;
11896 selected_cost = new_cost;
11897 selected_address = fix->address + count;
11900 /* Continue after the dispatch table. */
11901 from = NEXT_INSN (tmp);
11902 continue;
11905 new_cost = arm_barrier_cost (from);
11907 if (count < max_count
11908 && (!selected || new_cost <= selected_cost))
11910 selected = from;
11911 selected_cost = new_cost;
11912 selected_address = fix->address + count;
11915 from = NEXT_INSN (from);
11918 /* Make sure that we found a place to insert the jump. */
11919 gcc_assert (selected);
11921 /* Create a new JUMP_INSN that branches around a barrier. */
11922 from = emit_jump_insn_after (gen_jump (label), selected);
11923 JUMP_LABEL (from) = label;
11924 barrier = emit_barrier_after (from);
11925 emit_label_after (label, barrier);
11927 /* Create a minipool barrier entry for the new barrier. */
11928 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11929 new_fix->insn = barrier;
11930 new_fix->address = selected_address;
11931 new_fix->next = fix->next;
11932 fix->next = new_fix;
11934 return new_fix;
11937 /* Record that there is a natural barrier in the insn stream at
11938 ADDRESS. */
11939 static void
11940 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11942 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11944 fix->insn = insn;
11945 fix->address = address;
11947 fix->next = NULL;
11948 if (minipool_fix_head != NULL)
11949 minipool_fix_tail->next = fix;
11950 else
11951 minipool_fix_head = fix;
11953 minipool_fix_tail = fix;
11956 /* Record INSN, which will need fixing up to load a value from the
11957 minipool. ADDRESS is the offset of the insn since the start of the
11958 function; LOC is a pointer to the part of the insn which requires
11959 fixing; VALUE is the constant that must be loaded, which is of type
11960 MODE. */
11961 static void
11962 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11963 enum machine_mode mode, rtx value)
11965 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11967 fix->insn = insn;
11968 fix->address = address;
11969 fix->loc = loc;
11970 fix->mode = mode;
11971 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11972 fix->value = value;
11973 fix->forwards = get_attr_pool_range (insn);
11974 fix->backwards = get_attr_neg_pool_range (insn);
11975 fix->minipool = NULL;
11977 /* If an insn doesn't have a range defined for it, then it isn't
11978 expecting to be reworked by this code. Better to stop now than
11979 to generate duff assembly code. */
11980 gcc_assert (fix->forwards || fix->backwards);
11982 /* If an entry requires 8-byte alignment then assume all constant pools
11983 require 4 bytes of padding. Trying to do this later on a per-pool
11984 basis is awkward because existing pool entries have to be modified. */
11985 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11986 minipool_pad = 4;
11988 if (dump_file)
11990 fprintf (dump_file,
11991 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11992 GET_MODE_NAME (mode),
11993 INSN_UID (insn), (unsigned long) address,
11994 -1 * (long)fix->backwards, (long)fix->forwards);
11995 arm_print_value (dump_file, fix->value);
11996 fprintf (dump_file, "\n");
11999 /* Add it to the chain of fixes. */
12000 fix->next = NULL;
12002 if (minipool_fix_head != NULL)
12003 minipool_fix_tail->next = fix;
12004 else
12005 minipool_fix_head = fix;
12007 minipool_fix_tail = fix;
12010 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12011 Returns the number of insns needed, or 99 if we don't know how to
12012 do it. */
12014 arm_const_double_inline_cost (rtx val)
12016 rtx lowpart, highpart;
12017 enum machine_mode mode;
12019 mode = GET_MODE (val);
12021 if (mode == VOIDmode)
12022 mode = DImode;
12024 gcc_assert (GET_MODE_SIZE (mode) == 8);
12026 lowpart = gen_lowpart (SImode, val);
12027 highpart = gen_highpart_mode (SImode, mode, val);
12029 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12030 gcc_assert (GET_CODE (highpart) == CONST_INT);
12032 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12033 NULL_RTX, NULL_RTX, 0, 0)
12034 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12035 NULL_RTX, NULL_RTX, 0, 0));
12038 /* Return true if it is worthwhile to split a 64-bit constant into two
12039 32-bit operations. This is the case if optimizing for size, or
12040 if we have load delay slots, or if one 32-bit part can be done with
12041 a single data operation. */
12042 bool
12043 arm_const_double_by_parts (rtx val)
12045 enum machine_mode mode = GET_MODE (val);
12046 rtx part;
12048 if (optimize_size || arm_ld_sched)
12049 return true;
12051 if (mode == VOIDmode)
12052 mode = DImode;
12054 part = gen_highpart_mode (SImode, mode, val);
12056 gcc_assert (GET_CODE (part) == CONST_INT);
12058 if (const_ok_for_arm (INTVAL (part))
12059 || const_ok_for_arm (~INTVAL (part)))
12060 return true;
12062 part = gen_lowpart (SImode, val);
12064 gcc_assert (GET_CODE (part) == CONST_INT);
12066 if (const_ok_for_arm (INTVAL (part))
12067 || const_ok_for_arm (~INTVAL (part)))
12068 return true;
12070 return false;
12073 /* Return true if it is possible to inline both the high and low parts
12074 of a 64-bit constant into 32-bit data processing instructions. */
12075 bool
12076 arm_const_double_by_immediates (rtx val)
12078 enum machine_mode mode = GET_MODE (val);
12079 rtx part;
12081 if (mode == VOIDmode)
12082 mode = DImode;
12084 part = gen_highpart_mode (SImode, mode, val);
12086 gcc_assert (GET_CODE (part) == CONST_INT);
12088 if (!const_ok_for_arm (INTVAL (part)))
12089 return false;
12091 part = gen_lowpart (SImode, val);
12093 gcc_assert (GET_CODE (part) == CONST_INT);
12095 if (!const_ok_for_arm (INTVAL (part)))
12096 return false;
12098 return true;
12101 /* Scan INSN and note any of its operands that need fixing.
12102 If DO_PUSHES is false we do not actually push any of the fixups
12103 needed. The function returns TRUE if any fixups were needed/pushed.
12104 This is used by arm_memory_load_p() which needs to know about loads
12105 of constants that will be converted into minipool loads. */
12106 static bool
12107 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12109 bool result = false;
12110 int opno;
12112 extract_insn (insn);
12114 if (!constrain_operands (1))
12115 fatal_insn_not_found (insn);
12117 if (recog_data.n_alternatives == 0)
12118 return false;
12120 /* Fill in recog_op_alt with information about the constraints of
12121 this insn. */
12122 preprocess_constraints ();
12124 for (opno = 0; opno < recog_data.n_operands; opno++)
12126 /* Things we need to fix can only occur in inputs. */
12127 if (recog_data.operand_type[opno] != OP_IN)
12128 continue;
12130 /* If this alternative is a memory reference, then any mention
12131 of constants in this alternative is really to fool reload
12132 into allowing us to accept one there. We need to fix them up
12133 now so that we output the right code. */
12134 if (recog_op_alt[opno][which_alternative].memory_ok)
12136 rtx op = recog_data.operand[opno];
12138 if (CONSTANT_P (op))
12140 if (do_pushes)
12141 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12142 recog_data.operand_mode[opno], op);
12143 result = true;
12145 else if (GET_CODE (op) == MEM
12146 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12147 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12149 if (do_pushes)
12151 rtx cop = avoid_constant_pool_reference (op);
12153 /* Casting the address of something to a mode narrower
12154 than a word can cause avoid_constant_pool_reference()
12155 to return the pool reference itself. That's no good to
12156 us here. Lets just hope that we can use the
12157 constant pool value directly. */
12158 if (op == cop)
12159 cop = get_pool_constant (XEXP (op, 0));
12161 push_minipool_fix (insn, address,
12162 recog_data.operand_loc[opno],
12163 recog_data.operand_mode[opno], cop);
12166 result = true;
12171 return result;
12174 /* Convert instructions to their cc-clobbering variant if possible, since
12175 that allows us to use smaller encodings. */
12177 static void
12178 thumb2_reorg (void)
12180 basic_block bb;
12181 regset_head live;
12183 INIT_REG_SET (&live);
12185 /* We are freeing block_for_insn in the toplev to keep compatibility
12186 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12187 compute_bb_for_insn ();
12188 df_analyze ();
12190 FOR_EACH_BB (bb)
12192 rtx insn;
12194 COPY_REG_SET (&live, DF_LR_OUT (bb));
12195 df_simulate_initialize_backwards (bb, &live);
12196 FOR_BB_INSNS_REVERSE (bb, insn)
12198 if (NONJUMP_INSN_P (insn)
12199 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12201 rtx pat = PATTERN (insn);
12202 if (GET_CODE (pat) == SET
12203 && low_register_operand (XEXP (pat, 0), SImode)
12204 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12205 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12206 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12208 rtx dst = XEXP (pat, 0);
12209 rtx src = XEXP (pat, 1);
12210 rtx op0 = XEXP (src, 0);
12211 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12212 ? XEXP (src, 1) : NULL);
12214 if (rtx_equal_p (dst, op0)
12215 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12217 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12218 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12219 rtvec vec = gen_rtvec (2, pat, clobber);
12221 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12222 INSN_CODE (insn) = -1;
12224 /* We can also handle a commutative operation where the
12225 second operand matches the destination. */
12226 else if (op1 && rtx_equal_p (dst, op1))
12228 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12229 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12230 rtvec vec;
12232 src = copy_rtx (src);
12233 XEXP (src, 0) = op1;
12234 XEXP (src, 1) = op0;
12235 pat = gen_rtx_SET (VOIDmode, dst, src);
12236 vec = gen_rtvec (2, pat, clobber);
12237 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12238 INSN_CODE (insn) = -1;
12243 if (NONDEBUG_INSN_P (insn))
12244 df_simulate_one_insn_backwards (bb, insn, &live);
12248 CLEAR_REG_SET (&live);
12251 /* Gcc puts the pool in the wrong place for ARM, since we can only
12252 load addresses a limited distance around the pc. We do some
12253 special munging to move the constant pool values to the correct
12254 point in the code. */
12255 static void
12256 arm_reorg (void)
12258 rtx insn;
12259 HOST_WIDE_INT address = 0;
12260 Mfix * fix;
12262 if (TARGET_THUMB2)
12263 thumb2_reorg ();
12265 minipool_fix_head = minipool_fix_tail = NULL;
12267 /* The first insn must always be a note, or the code below won't
12268 scan it properly. */
12269 insn = get_insns ();
12270 gcc_assert (GET_CODE (insn) == NOTE);
12271 minipool_pad = 0;
12273 /* Scan all the insns and record the operands that will need fixing. */
12274 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12276 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12277 && (arm_cirrus_insn_p (insn)
12278 || GET_CODE (insn) == JUMP_INSN
12279 || arm_memory_load_p (insn)))
12280 cirrus_reorg (insn);
12282 if (GET_CODE (insn) == BARRIER)
12283 push_minipool_barrier (insn, address);
12284 else if (INSN_P (insn))
12286 rtx table;
12288 note_invalid_constants (insn, address, true);
12289 address += get_attr_length (insn);
12291 /* If the insn is a vector jump, add the size of the table
12292 and skip the table. */
12293 if ((table = is_jump_table (insn)) != NULL)
12295 address += get_jump_table_size (table);
12296 insn = table;
12301 fix = minipool_fix_head;
12303 /* Now scan the fixups and perform the required changes. */
12304 while (fix)
12306 Mfix * ftmp;
12307 Mfix * fdel;
12308 Mfix * last_added_fix;
12309 Mfix * last_barrier = NULL;
12310 Mfix * this_fix;
12312 /* Skip any further barriers before the next fix. */
12313 while (fix && GET_CODE (fix->insn) == BARRIER)
12314 fix = fix->next;
12316 /* No more fixes. */
12317 if (fix == NULL)
12318 break;
12320 last_added_fix = NULL;
12322 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12324 if (GET_CODE (ftmp->insn) == BARRIER)
12326 if (ftmp->address >= minipool_vector_head->max_address)
12327 break;
12329 last_barrier = ftmp;
12331 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12332 break;
12334 last_added_fix = ftmp; /* Keep track of the last fix added. */
12337 /* If we found a barrier, drop back to that; any fixes that we
12338 could have reached but come after the barrier will now go in
12339 the next mini-pool. */
12340 if (last_barrier != NULL)
12342 /* Reduce the refcount for those fixes that won't go into this
12343 pool after all. */
12344 for (fdel = last_barrier->next;
12345 fdel && fdel != ftmp;
12346 fdel = fdel->next)
12348 fdel->minipool->refcount--;
12349 fdel->minipool = NULL;
12352 ftmp = last_barrier;
12354 else
12356 /* ftmp is first fix that we can't fit into this pool and
12357 there no natural barriers that we could use. Insert a
12358 new barrier in the code somewhere between the previous
12359 fix and this one, and arrange to jump around it. */
12360 HOST_WIDE_INT max_address;
12362 /* The last item on the list of fixes must be a barrier, so
12363 we can never run off the end of the list of fixes without
12364 last_barrier being set. */
12365 gcc_assert (ftmp);
12367 max_address = minipool_vector_head->max_address;
12368 /* Check that there isn't another fix that is in range that
12369 we couldn't fit into this pool because the pool was
12370 already too large: we need to put the pool before such an
12371 instruction. The pool itself may come just after the
12372 fix because create_fix_barrier also allows space for a
12373 jump instruction. */
12374 if (ftmp->address < max_address)
12375 max_address = ftmp->address + 1;
12377 last_barrier = create_fix_barrier (last_added_fix, max_address);
12380 assign_minipool_offsets (last_barrier);
12382 while (ftmp)
12384 if (GET_CODE (ftmp->insn) != BARRIER
12385 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12386 == NULL))
12387 break;
12389 ftmp = ftmp->next;
12392 /* Scan over the fixes we have identified for this pool, fixing them
12393 up and adding the constants to the pool itself. */
12394 for (this_fix = fix; this_fix && ftmp != this_fix;
12395 this_fix = this_fix->next)
12396 if (GET_CODE (this_fix->insn) != BARRIER)
12398 rtx addr
12399 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12400 minipool_vector_label),
12401 this_fix->minipool->offset);
12402 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12405 dump_minipool (last_barrier->insn);
12406 fix = ftmp;
12409 /* From now on we must synthesize any constants that we can't handle
12410 directly. This can happen if the RTL gets split during final
12411 instruction generation. */
12412 after_arm_reorg = 1;
12414 /* Free the minipool memory. */
12415 obstack_free (&minipool_obstack, minipool_startobj);
12418 /* Routines to output assembly language. */
12420 /* If the rtx is the correct value then return the string of the number.
12421 In this way we can ensure that valid double constants are generated even
12422 when cross compiling. */
12423 const char *
12424 fp_immediate_constant (rtx x)
12426 REAL_VALUE_TYPE r;
12427 int i;
12429 if (!fp_consts_inited)
12430 init_fp_table ();
12432 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12433 for (i = 0; i < 8; i++)
12434 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12435 return strings_fp[i];
12437 gcc_unreachable ();
12440 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12441 static const char *
12442 fp_const_from_val (REAL_VALUE_TYPE *r)
12444 int i;
12446 if (!fp_consts_inited)
12447 init_fp_table ();
12449 for (i = 0; i < 8; i++)
12450 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12451 return strings_fp[i];
12453 gcc_unreachable ();
12456 /* Output the operands of a LDM/STM instruction to STREAM.
12457 MASK is the ARM register set mask of which only bits 0-15 are important.
12458 REG is the base register, either the frame pointer or the stack pointer,
12459 INSTR is the possibly suffixed load or store instruction.
12460 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12462 static void
12463 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12464 unsigned long mask, int rfe)
12466 unsigned i;
12467 bool not_first = FALSE;
12469 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12470 fputc ('\t', stream);
12471 asm_fprintf (stream, instr, reg);
12472 fputc ('{', stream);
12474 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12475 if (mask & (1 << i))
12477 if (not_first)
12478 fprintf (stream, ", ");
12480 asm_fprintf (stream, "%r", i);
12481 not_first = TRUE;
12484 if (rfe)
12485 fprintf (stream, "}^\n");
12486 else
12487 fprintf (stream, "}\n");
12491 /* Output a FLDMD instruction to STREAM.
12492 BASE if the register containing the address.
12493 REG and COUNT specify the register range.
12494 Extra registers may be added to avoid hardware bugs.
12496 We output FLDMD even for ARMv5 VFP implementations. Although
12497 FLDMD is technically not supported until ARMv6, it is believed
12498 that all VFP implementations support its use in this context. */
12500 static void
12501 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12503 int i;
12505 /* Workaround ARM10 VFPr1 bug. */
12506 if (count == 2 && !arm_arch6)
12508 if (reg == 15)
12509 reg--;
12510 count++;
12513 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12514 load into multiple parts if we have to handle more than 16 registers. */
12515 if (count > 16)
12517 vfp_output_fldmd (stream, base, reg, 16);
12518 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12519 return;
12522 fputc ('\t', stream);
12523 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12525 for (i = reg; i < reg + count; i++)
12527 if (i > reg)
12528 fputs (", ", stream);
12529 asm_fprintf (stream, "d%d", i);
12531 fputs ("}\n", stream);
12536 /* Output the assembly for a store multiple. */
12538 const char *
12539 vfp_output_fstmd (rtx * operands)
12541 char pattern[100];
12542 int p;
12543 int base;
12544 int i;
12546 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12547 p = strlen (pattern);
12549 gcc_assert (GET_CODE (operands[1]) == REG);
12551 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12552 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12554 p += sprintf (&pattern[p], ", d%d", base + i);
12556 strcpy (&pattern[p], "}");
12558 output_asm_insn (pattern, operands);
12559 return "";
12563 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12564 number of bytes pushed. */
12566 static int
12567 vfp_emit_fstmd (int base_reg, int count)
12569 rtx par;
12570 rtx dwarf;
12571 rtx tmp, reg;
12572 int i;
12574 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12575 register pairs are stored by a store multiple insn. We avoid this
12576 by pushing an extra pair. */
12577 if (count == 2 && !arm_arch6)
12579 if (base_reg == LAST_VFP_REGNUM - 3)
12580 base_reg -= 2;
12581 count++;
12584 /* FSTMD may not store more than 16 doubleword registers at once. Split
12585 larger stores into multiple parts (up to a maximum of two, in
12586 practice). */
12587 if (count > 16)
12589 int saved;
12590 /* NOTE: base_reg is an internal register number, so each D register
12591 counts as 2. */
12592 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12593 saved += vfp_emit_fstmd (base_reg, 16);
12594 return saved;
12597 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12598 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12600 reg = gen_rtx_REG (DFmode, base_reg);
12601 base_reg += 2;
12603 XVECEXP (par, 0, 0)
12604 = gen_rtx_SET (VOIDmode,
12605 gen_frame_mem
12606 (BLKmode,
12607 gen_rtx_PRE_MODIFY (Pmode,
12608 stack_pointer_rtx,
12609 plus_constant
12610 (stack_pointer_rtx,
12611 - (count * 8)))
12613 gen_rtx_UNSPEC (BLKmode,
12614 gen_rtvec (1, reg),
12615 UNSPEC_PUSH_MULT));
12617 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12618 plus_constant (stack_pointer_rtx, -(count * 8)));
12619 RTX_FRAME_RELATED_P (tmp) = 1;
12620 XVECEXP (dwarf, 0, 0) = tmp;
12622 tmp = gen_rtx_SET (VOIDmode,
12623 gen_frame_mem (DFmode, stack_pointer_rtx),
12624 reg);
12625 RTX_FRAME_RELATED_P (tmp) = 1;
12626 XVECEXP (dwarf, 0, 1) = tmp;
12628 for (i = 1; i < count; i++)
12630 reg = gen_rtx_REG (DFmode, base_reg);
12631 base_reg += 2;
12632 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12634 tmp = gen_rtx_SET (VOIDmode,
12635 gen_frame_mem (DFmode,
12636 plus_constant (stack_pointer_rtx,
12637 i * 8)),
12638 reg);
12639 RTX_FRAME_RELATED_P (tmp) = 1;
12640 XVECEXP (dwarf, 0, i + 1) = tmp;
12643 par = emit_insn (par);
12644 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12645 RTX_FRAME_RELATED_P (par) = 1;
12647 return count * 8;
12650 /* Emit a call instruction with pattern PAT. ADDR is the address of
12651 the call target. */
12653 void
12654 arm_emit_call_insn (rtx pat, rtx addr)
12656 rtx insn;
12658 insn = emit_call_insn (pat);
12660 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12661 If the call might use such an entry, add a use of the PIC register
12662 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12663 if (TARGET_VXWORKS_RTP
12664 && flag_pic
12665 && GET_CODE (addr) == SYMBOL_REF
12666 && (SYMBOL_REF_DECL (addr)
12667 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12668 : !SYMBOL_REF_LOCAL_P (addr)))
12670 require_pic_register ();
12671 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12675 /* Output a 'call' insn. */
12676 const char *
12677 output_call (rtx *operands)
12679 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12681 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12682 if (REGNO (operands[0]) == LR_REGNUM)
12684 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12685 output_asm_insn ("mov%?\t%0, %|lr", operands);
12688 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12690 if (TARGET_INTERWORK || arm_arch4t)
12691 output_asm_insn ("bx%?\t%0", operands);
12692 else
12693 output_asm_insn ("mov%?\t%|pc, %0", operands);
12695 return "";
12698 /* Output a 'call' insn that is a reference in memory. This is
12699 disabled for ARMv5 and we prefer a blx instead because otherwise
12700 there's a significant performance overhead. */
12701 const char *
12702 output_call_mem (rtx *operands)
12704 gcc_assert (!arm_arch5);
12705 if (TARGET_INTERWORK)
12707 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12708 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12709 output_asm_insn ("bx%?\t%|ip", operands);
12711 else if (regno_use_in (LR_REGNUM, operands[0]))
12713 /* LR is used in the memory address. We load the address in the
12714 first instruction. It's safe to use IP as the target of the
12715 load since the call will kill it anyway. */
12716 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12717 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12718 if (arm_arch4t)
12719 output_asm_insn ("bx%?\t%|ip", operands);
12720 else
12721 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12723 else
12725 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12726 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12729 return "";
12733 /* Output a move from arm registers to an fpa registers.
12734 OPERANDS[0] is an fpa register.
12735 OPERANDS[1] is the first registers of an arm register pair. */
12736 const char *
12737 output_mov_long_double_fpa_from_arm (rtx *operands)
12739 int arm_reg0 = REGNO (operands[1]);
12740 rtx ops[3];
12742 gcc_assert (arm_reg0 != IP_REGNUM);
12744 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12745 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12746 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12748 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12749 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12751 return "";
12754 /* Output a move from an fpa register to arm registers.
12755 OPERANDS[0] is the first registers of an arm register pair.
12756 OPERANDS[1] is an fpa register. */
12757 const char *
12758 output_mov_long_double_arm_from_fpa (rtx *operands)
12760 int arm_reg0 = REGNO (operands[0]);
12761 rtx ops[3];
12763 gcc_assert (arm_reg0 != IP_REGNUM);
12765 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12766 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12767 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12769 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12770 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12771 return "";
12774 /* Output a move from arm registers to arm registers of a long double
12775 OPERANDS[0] is the destination.
12776 OPERANDS[1] is the source. */
12777 const char *
12778 output_mov_long_double_arm_from_arm (rtx *operands)
12780 /* We have to be careful here because the two might overlap. */
12781 int dest_start = REGNO (operands[0]);
12782 int src_start = REGNO (operands[1]);
12783 rtx ops[2];
12784 int i;
12786 if (dest_start < src_start)
12788 for (i = 0; i < 3; i++)
12790 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12791 ops[1] = gen_rtx_REG (SImode, src_start + i);
12792 output_asm_insn ("mov%?\t%0, %1", ops);
12795 else
12797 for (i = 2; i >= 0; i--)
12799 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12800 ops[1] = gen_rtx_REG (SImode, src_start + i);
12801 output_asm_insn ("mov%?\t%0, %1", ops);
12805 return "";
12808 void
12809 arm_emit_movpair (rtx dest, rtx src)
12811 /* If the src is an immediate, simplify it. */
12812 if (CONST_INT_P (src))
12814 HOST_WIDE_INT val = INTVAL (src);
12815 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12816 if ((val >> 16) & 0x0000ffff)
12817 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12818 GEN_INT (16)),
12819 GEN_INT ((val >> 16) & 0x0000ffff));
12820 return;
12822 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12823 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12826 /* Output a move from arm registers to an fpa registers.
12827 OPERANDS[0] is an fpa register.
12828 OPERANDS[1] is the first registers of an arm register pair. */
12829 const char *
12830 output_mov_double_fpa_from_arm (rtx *operands)
12832 int arm_reg0 = REGNO (operands[1]);
12833 rtx ops[2];
12835 gcc_assert (arm_reg0 != IP_REGNUM);
12837 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12838 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12839 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12840 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12841 return "";
12844 /* Output a move from an fpa register to arm registers.
12845 OPERANDS[0] is the first registers of an arm register pair.
12846 OPERANDS[1] is an fpa register. */
12847 const char *
12848 output_mov_double_arm_from_fpa (rtx *operands)
12850 int arm_reg0 = REGNO (operands[0]);
12851 rtx ops[2];
12853 gcc_assert (arm_reg0 != IP_REGNUM);
12855 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12856 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12857 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12858 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12859 return "";
12862 /* Output a move between double words.
12863 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12864 or MEM<-REG and all MEMs must be offsettable addresses. */
12865 const char *
12866 output_move_double (rtx *operands)
12868 enum rtx_code code0 = GET_CODE (operands[0]);
12869 enum rtx_code code1 = GET_CODE (operands[1]);
12870 rtx otherops[3];
12872 if (code0 == REG)
12874 unsigned int reg0 = REGNO (operands[0]);
12876 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12878 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12880 switch (GET_CODE (XEXP (operands[1], 0)))
12882 case REG:
12883 if (TARGET_LDRD
12884 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12885 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12886 else
12887 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12888 break;
12890 case PRE_INC:
12891 gcc_assert (TARGET_LDRD);
12892 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12893 break;
12895 case PRE_DEC:
12896 if (TARGET_LDRD)
12897 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12898 else
12899 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12900 break;
12902 case POST_INC:
12903 if (TARGET_LDRD)
12904 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12905 else
12906 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12907 break;
12909 case POST_DEC:
12910 gcc_assert (TARGET_LDRD);
12911 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12912 break;
12914 case PRE_MODIFY:
12915 case POST_MODIFY:
12916 /* Autoicrement addressing modes should never have overlapping
12917 base and destination registers, and overlapping index registers
12918 are already prohibited, so this doesn't need to worry about
12919 fix_cm3_ldrd. */
12920 otherops[0] = operands[0];
12921 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12922 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12924 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12926 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12928 /* Registers overlap so split out the increment. */
12929 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12930 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12932 else
12934 /* Use a single insn if we can.
12935 FIXME: IWMMXT allows offsets larger than ldrd can
12936 handle, fix these up with a pair of ldr. */
12937 if (TARGET_THUMB2
12938 || GET_CODE (otherops[2]) != CONST_INT
12939 || (INTVAL (otherops[2]) > -256
12940 && INTVAL (otherops[2]) < 256))
12941 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12942 else
12944 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12945 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12949 else
12951 /* Use a single insn if we can.
12952 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12953 fix these up with a pair of ldr. */
12954 if (TARGET_THUMB2
12955 || GET_CODE (otherops[2]) != CONST_INT
12956 || (INTVAL (otherops[2]) > -256
12957 && INTVAL (otherops[2]) < 256))
12958 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12959 else
12961 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12962 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12965 break;
12967 case LABEL_REF:
12968 case CONST:
12969 /* We might be able to use ldrd %0, %1 here. However the range is
12970 different to ldr/adr, and it is broken on some ARMv7-M
12971 implementations. */
12972 /* Use the second register of the pair to avoid problematic
12973 overlap. */
12974 otherops[1] = operands[1];
12975 output_asm_insn ("adr%?\t%0, %1", otherops);
12976 operands[1] = otherops[0];
12977 if (TARGET_LDRD)
12978 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12979 else
12980 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12981 break;
12983 /* ??? This needs checking for thumb2. */
12984 default:
12985 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12986 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12988 otherops[0] = operands[0];
12989 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12990 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12992 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12994 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12996 switch ((int) INTVAL (otherops[2]))
12998 case -8:
12999 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13000 return "";
13001 case -4:
13002 if (TARGET_THUMB2)
13003 break;
13004 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13005 return "";
13006 case 4:
13007 if (TARGET_THUMB2)
13008 break;
13009 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13010 return "";
13013 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13014 operands[1] = otherops[0];
13015 if (TARGET_LDRD
13016 && (GET_CODE (otherops[2]) == REG
13017 || TARGET_THUMB2
13018 || (GET_CODE (otherops[2]) == CONST_INT
13019 && INTVAL (otherops[2]) > -256
13020 && INTVAL (otherops[2]) < 256)))
13022 if (reg_overlap_mentioned_p (operands[0],
13023 otherops[2]))
13025 rtx tmp;
13026 /* Swap base and index registers over to
13027 avoid a conflict. */
13028 tmp = otherops[1];
13029 otherops[1] = otherops[2];
13030 otherops[2] = tmp;
13032 /* If both registers conflict, it will usually
13033 have been fixed by a splitter. */
13034 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13035 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13037 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13038 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13040 else
13042 otherops[0] = operands[0];
13043 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13045 return "";
13048 if (GET_CODE (otherops[2]) == CONST_INT)
13050 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13051 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13052 else
13053 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13055 else
13056 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13058 else
13059 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13061 if (TARGET_LDRD)
13062 return "ldr%(d%)\t%0, [%1]";
13064 return "ldm%(ia%)\t%1, %M0";
13066 else
13068 otherops[1] = adjust_address (operands[1], SImode, 4);
13069 /* Take care of overlapping base/data reg. */
13070 if (reg_mentioned_p (operands[0], operands[1]))
13072 output_asm_insn ("ldr%?\t%0, %1", otherops);
13073 output_asm_insn ("ldr%?\t%0, %1", operands);
13075 else
13077 output_asm_insn ("ldr%?\t%0, %1", operands);
13078 output_asm_insn ("ldr%?\t%0, %1", otherops);
13083 else
13085 /* Constraints should ensure this. */
13086 gcc_assert (code0 == MEM && code1 == REG);
13087 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13089 switch (GET_CODE (XEXP (operands[0], 0)))
13091 case REG:
13092 if (TARGET_LDRD)
13093 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13094 else
13095 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13096 break;
13098 case PRE_INC:
13099 gcc_assert (TARGET_LDRD);
13100 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13101 break;
13103 case PRE_DEC:
13104 if (TARGET_LDRD)
13105 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13106 else
13107 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13108 break;
13110 case POST_INC:
13111 if (TARGET_LDRD)
13112 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13113 else
13114 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13115 break;
13117 case POST_DEC:
13118 gcc_assert (TARGET_LDRD);
13119 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13120 break;
13122 case PRE_MODIFY:
13123 case POST_MODIFY:
13124 otherops[0] = operands[1];
13125 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13126 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13128 /* IWMMXT allows offsets larger than ldrd can handle,
13129 fix these up with a pair of ldr. */
13130 if (!TARGET_THUMB2
13131 && GET_CODE (otherops[2]) == CONST_INT
13132 && (INTVAL(otherops[2]) <= -256
13133 || INTVAL(otherops[2]) >= 256))
13135 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13137 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13138 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13140 else
13142 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13143 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13146 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13147 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13148 else
13149 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13150 break;
13152 case PLUS:
13153 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13154 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13156 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13158 case -8:
13159 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13160 return "";
13162 case -4:
13163 if (TARGET_THUMB2)
13164 break;
13165 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13166 return "";
13168 case 4:
13169 if (TARGET_THUMB2)
13170 break;
13171 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13172 return "";
13175 if (TARGET_LDRD
13176 && (GET_CODE (otherops[2]) == REG
13177 || TARGET_THUMB2
13178 || (GET_CODE (otherops[2]) == CONST_INT
13179 && INTVAL (otherops[2]) > -256
13180 && INTVAL (otherops[2]) < 256)))
13182 otherops[0] = operands[1];
13183 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13184 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13185 return "";
13187 /* Fall through */
13189 default:
13190 otherops[0] = adjust_address (operands[0], SImode, 4);
13191 otherops[1] = operands[1];
13192 output_asm_insn ("str%?\t%1, %0", operands);
13193 output_asm_insn ("str%?\t%H1, %0", otherops);
13197 return "";
13200 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13201 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13203 const char *
13204 output_move_quad (rtx *operands)
13206 if (REG_P (operands[0]))
13208 /* Load, or reg->reg move. */
13210 if (MEM_P (operands[1]))
13212 switch (GET_CODE (XEXP (operands[1], 0)))
13214 case REG:
13215 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13216 break;
13218 case LABEL_REF:
13219 case CONST:
13220 output_asm_insn ("adr%?\t%0, %1", operands);
13221 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13222 break;
13224 default:
13225 gcc_unreachable ();
13228 else
13230 rtx ops[2];
13231 int dest, src, i;
13233 gcc_assert (REG_P (operands[1]));
13235 dest = REGNO (operands[0]);
13236 src = REGNO (operands[1]);
13238 /* This seems pretty dumb, but hopefully GCC won't try to do it
13239 very often. */
13240 if (dest < src)
13241 for (i = 0; i < 4; i++)
13243 ops[0] = gen_rtx_REG (SImode, dest + i);
13244 ops[1] = gen_rtx_REG (SImode, src + i);
13245 output_asm_insn ("mov%?\t%0, %1", ops);
13247 else
13248 for (i = 3; i >= 0; i--)
13250 ops[0] = gen_rtx_REG (SImode, dest + i);
13251 ops[1] = gen_rtx_REG (SImode, src + i);
13252 output_asm_insn ("mov%?\t%0, %1", ops);
13256 else
13258 gcc_assert (MEM_P (operands[0]));
13259 gcc_assert (REG_P (operands[1]));
13260 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13262 switch (GET_CODE (XEXP (operands[0], 0)))
13264 case REG:
13265 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13266 break;
13268 default:
13269 gcc_unreachable ();
13273 return "";
13276 /* Output a VFP load or store instruction. */
13278 const char *
13279 output_move_vfp (rtx *operands)
13281 rtx reg, mem, addr, ops[2];
13282 int load = REG_P (operands[0]);
13283 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13284 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13285 const char *templ;
13286 char buff[50];
13287 enum machine_mode mode;
13289 reg = operands[!load];
13290 mem = operands[load];
13292 mode = GET_MODE (reg);
13294 gcc_assert (REG_P (reg));
13295 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13296 gcc_assert (mode == SFmode
13297 || mode == DFmode
13298 || mode == SImode
13299 || mode == DImode
13300 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13301 gcc_assert (MEM_P (mem));
13303 addr = XEXP (mem, 0);
13305 switch (GET_CODE (addr))
13307 case PRE_DEC:
13308 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13309 ops[0] = XEXP (addr, 0);
13310 ops[1] = reg;
13311 break;
13313 case POST_INC:
13314 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13315 ops[0] = XEXP (addr, 0);
13316 ops[1] = reg;
13317 break;
13319 default:
13320 templ = "f%s%c%%?\t%%%s0, %%1%s";
13321 ops[0] = reg;
13322 ops[1] = mem;
13323 break;
13326 sprintf (buff, templ,
13327 load ? "ld" : "st",
13328 dp ? 'd' : 's',
13329 dp ? "P" : "",
13330 integer_p ? "\t%@ int" : "");
13331 output_asm_insn (buff, ops);
13333 return "";
13336 /* Output a Neon quad-word load or store, or a load or store for
13337 larger structure modes.
13339 WARNING: The ordering of elements is weird in big-endian mode,
13340 because we use VSTM, as required by the EABI. GCC RTL defines
13341 element ordering based on in-memory order. This can be differ
13342 from the architectural ordering of elements within a NEON register.
13343 The intrinsics defined in arm_neon.h use the NEON register element
13344 ordering, not the GCC RTL element ordering.
13346 For example, the in-memory ordering of a big-endian a quadword
13347 vector with 16-bit elements when stored from register pair {d0,d1}
13348 will be (lowest address first, d0[N] is NEON register element N):
13350 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13352 When necessary, quadword registers (dN, dN+1) are moved to ARM
13353 registers from rN in the order:
13355 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13357 So that STM/LDM can be used on vectors in ARM registers, and the
13358 same memory layout will result as if VSTM/VLDM were used. */
13360 const char *
13361 output_move_neon (rtx *operands)
13363 rtx reg, mem, addr, ops[2];
13364 int regno, load = REG_P (operands[0]);
13365 const char *templ;
13366 char buff[50];
13367 enum machine_mode mode;
13369 reg = operands[!load];
13370 mem = operands[load];
13372 mode = GET_MODE (reg);
13374 gcc_assert (REG_P (reg));
13375 regno = REGNO (reg);
13376 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13377 || NEON_REGNO_OK_FOR_QUAD (regno));
13378 gcc_assert (VALID_NEON_DREG_MODE (mode)
13379 || VALID_NEON_QREG_MODE (mode)
13380 || VALID_NEON_STRUCT_MODE (mode));
13381 gcc_assert (MEM_P (mem));
13383 addr = XEXP (mem, 0);
13385 /* Strip off const from addresses like (const (plus (...))). */
13386 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13387 addr = XEXP (addr, 0);
13389 switch (GET_CODE (addr))
13391 case POST_INC:
13392 templ = "v%smia%%?\t%%0!, %%h1";
13393 ops[0] = XEXP (addr, 0);
13394 ops[1] = reg;
13395 break;
13397 case PRE_DEC:
13398 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13399 templ = "v%smdb%%?\t%%0!, %%h1";
13400 ops[0] = XEXP (addr, 0);
13401 ops[1] = reg;
13402 break;
13404 case POST_MODIFY:
13405 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13406 gcc_unreachable ();
13408 case LABEL_REF:
13409 case PLUS:
13411 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13412 int i;
13413 int overlap = -1;
13414 for (i = 0; i < nregs; i++)
13416 /* We're only using DImode here because it's a convenient size. */
13417 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13418 ops[1] = adjust_address (mem, DImode, 8 * i);
13419 if (reg_overlap_mentioned_p (ops[0], mem))
13421 gcc_assert (overlap == -1);
13422 overlap = i;
13424 else
13426 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13427 output_asm_insn (buff, ops);
13430 if (overlap != -1)
13432 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13433 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13434 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13435 output_asm_insn (buff, ops);
13438 return "";
13441 default:
13442 templ = "v%smia%%?\t%%m0, %%h1";
13443 ops[0] = mem;
13444 ops[1] = reg;
13447 sprintf (buff, templ, load ? "ld" : "st");
13448 output_asm_insn (buff, ops);
13450 return "";
13453 /* Compute and return the length of neon_mov<mode>, where <mode> is
13454 one of VSTRUCT modes: EI, OI, CI or XI. */
13456 arm_attr_length_move_neon (rtx insn)
13458 rtx reg, mem, addr;
13459 int load;
13460 enum machine_mode mode;
13462 extract_insn_cached (insn);
13464 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13466 mode = GET_MODE (recog_data.operand[0]);
13467 switch (mode)
13469 case EImode:
13470 case OImode:
13471 return 8;
13472 case CImode:
13473 return 12;
13474 case XImode:
13475 return 16;
13476 default:
13477 gcc_unreachable ();
13481 load = REG_P (recog_data.operand[0]);
13482 reg = recog_data.operand[!load];
13483 mem = recog_data.operand[load];
13485 gcc_assert (MEM_P (mem));
13487 mode = GET_MODE (reg);
13488 addr = XEXP (mem, 0);
13490 /* Strip off const from addresses like (const (plus (...))). */
13491 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13492 addr = XEXP (addr, 0);
13494 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13496 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13497 return insns * 4;
13499 else
13500 return 4;
13503 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13504 return zero. */
13507 arm_address_offset_is_imm (rtx insn)
13509 rtx mem, addr;
13511 extract_insn_cached (insn);
13513 if (REG_P (recog_data.operand[0]))
13514 return 0;
13516 mem = recog_data.operand[0];
13518 gcc_assert (MEM_P (mem));
13520 addr = XEXP (mem, 0);
13522 if (GET_CODE (addr) == REG
13523 || (GET_CODE (addr) == PLUS
13524 && GET_CODE (XEXP (addr, 0)) == REG
13525 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13526 return 1;
13527 else
13528 return 0;
13531 /* Output an ADD r, s, #n where n may be too big for one instruction.
13532 If adding zero to one register, output nothing. */
13533 const char *
13534 output_add_immediate (rtx *operands)
13536 HOST_WIDE_INT n = INTVAL (operands[2]);
13538 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13540 if (n < 0)
13541 output_multi_immediate (operands,
13542 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13543 -n);
13544 else
13545 output_multi_immediate (operands,
13546 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13550 return "";
13553 /* Output a multiple immediate operation.
13554 OPERANDS is the vector of operands referred to in the output patterns.
13555 INSTR1 is the output pattern to use for the first constant.
13556 INSTR2 is the output pattern to use for subsequent constants.
13557 IMMED_OP is the index of the constant slot in OPERANDS.
13558 N is the constant value. */
13559 static const char *
13560 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13561 int immed_op, HOST_WIDE_INT n)
13563 #if HOST_BITS_PER_WIDE_INT > 32
13564 n &= 0xffffffff;
13565 #endif
13567 if (n == 0)
13569 /* Quick and easy output. */
13570 operands[immed_op] = const0_rtx;
13571 output_asm_insn (instr1, operands);
13573 else
13575 int i;
13576 const char * instr = instr1;
13578 /* Note that n is never zero here (which would give no output). */
13579 for (i = 0; i < 32; i += 2)
13581 if (n & (3 << i))
13583 operands[immed_op] = GEN_INT (n & (255 << i));
13584 output_asm_insn (instr, operands);
13585 instr = instr2;
13586 i += 6;
13591 return "";
13594 /* Return the name of a shifter operation. */
13595 static const char *
13596 arm_shift_nmem(enum rtx_code code)
13598 switch (code)
13600 case ASHIFT:
13601 return ARM_LSL_NAME;
13603 case ASHIFTRT:
13604 return "asr";
13606 case LSHIFTRT:
13607 return "lsr";
13609 case ROTATERT:
13610 return "ror";
13612 default:
13613 abort();
13617 /* Return the appropriate ARM instruction for the operation code.
13618 The returned result should not be overwritten. OP is the rtx of the
13619 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13620 was shifted. */
13621 const char *
13622 arithmetic_instr (rtx op, int shift_first_arg)
13624 switch (GET_CODE (op))
13626 case PLUS:
13627 return "add";
13629 case MINUS:
13630 return shift_first_arg ? "rsb" : "sub";
13632 case IOR:
13633 return "orr";
13635 case XOR:
13636 return "eor";
13638 case AND:
13639 return "and";
13641 case ASHIFT:
13642 case ASHIFTRT:
13643 case LSHIFTRT:
13644 case ROTATERT:
13645 return arm_shift_nmem(GET_CODE(op));
13647 default:
13648 gcc_unreachable ();
13652 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13653 for the operation code. The returned result should not be overwritten.
13654 OP is the rtx code of the shift.
13655 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13656 shift. */
13657 static const char *
13658 shift_op (rtx op, HOST_WIDE_INT *amountp)
13660 const char * mnem;
13661 enum rtx_code code = GET_CODE (op);
13663 switch (GET_CODE (XEXP (op, 1)))
13665 case REG:
13666 case SUBREG:
13667 *amountp = -1;
13668 break;
13670 case CONST_INT:
13671 *amountp = INTVAL (XEXP (op, 1));
13672 break;
13674 default:
13675 gcc_unreachable ();
13678 switch (code)
13680 case ROTATE:
13681 gcc_assert (*amountp != -1);
13682 *amountp = 32 - *amountp;
13683 code = ROTATERT;
13685 /* Fall through. */
13687 case ASHIFT:
13688 case ASHIFTRT:
13689 case LSHIFTRT:
13690 case ROTATERT:
13691 mnem = arm_shift_nmem(code);
13692 break;
13694 case MULT:
13695 /* We never have to worry about the amount being other than a
13696 power of 2, since this case can never be reloaded from a reg. */
13697 gcc_assert (*amountp != -1);
13698 *amountp = int_log2 (*amountp);
13699 return ARM_LSL_NAME;
13701 default:
13702 gcc_unreachable ();
13705 if (*amountp != -1)
13707 /* This is not 100% correct, but follows from the desire to merge
13708 multiplication by a power of 2 with the recognizer for a
13709 shift. >=32 is not a valid shift for "lsl", so we must try and
13710 output a shift that produces the correct arithmetical result.
13711 Using lsr #32 is identical except for the fact that the carry bit
13712 is not set correctly if we set the flags; but we never use the
13713 carry bit from such an operation, so we can ignore that. */
13714 if (code == ROTATERT)
13715 /* Rotate is just modulo 32. */
13716 *amountp &= 31;
13717 else if (*amountp != (*amountp & 31))
13719 if (code == ASHIFT)
13720 mnem = "lsr";
13721 *amountp = 32;
13724 /* Shifts of 0 are no-ops. */
13725 if (*amountp == 0)
13726 return NULL;
13729 return mnem;
13732 /* Obtain the shift from the POWER of two. */
13734 static HOST_WIDE_INT
13735 int_log2 (HOST_WIDE_INT power)
13737 HOST_WIDE_INT shift = 0;
13739 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13741 gcc_assert (shift <= 31);
13742 shift++;
13745 return shift;
13748 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13749 because /bin/as is horribly restrictive. The judgement about
13750 whether or not each character is 'printable' (and can be output as
13751 is) or not (and must be printed with an octal escape) must be made
13752 with reference to the *host* character set -- the situation is
13753 similar to that discussed in the comments above pp_c_char in
13754 c-pretty-print.c. */
13756 #define MAX_ASCII_LEN 51
13758 void
13759 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13761 int i;
13762 int len_so_far = 0;
13764 fputs ("\t.ascii\t\"", stream);
13766 for (i = 0; i < len; i++)
13768 int c = p[i];
13770 if (len_so_far >= MAX_ASCII_LEN)
13772 fputs ("\"\n\t.ascii\t\"", stream);
13773 len_so_far = 0;
13776 if (ISPRINT (c))
13778 if (c == '\\' || c == '\"')
13780 putc ('\\', stream);
13781 len_so_far++;
13783 putc (c, stream);
13784 len_so_far++;
13786 else
13788 fprintf (stream, "\\%03o", c);
13789 len_so_far += 4;
13793 fputs ("\"\n", stream);
13796 /* Compute the register save mask for registers 0 through 12
13797 inclusive. This code is used by arm_compute_save_reg_mask. */
13799 static unsigned long
13800 arm_compute_save_reg0_reg12_mask (void)
13802 unsigned long func_type = arm_current_func_type ();
13803 unsigned long save_reg_mask = 0;
13804 unsigned int reg;
13806 if (IS_INTERRUPT (func_type))
13808 unsigned int max_reg;
13809 /* Interrupt functions must not corrupt any registers,
13810 even call clobbered ones. If this is a leaf function
13811 we can just examine the registers used by the RTL, but
13812 otherwise we have to assume that whatever function is
13813 called might clobber anything, and so we have to save
13814 all the call-clobbered registers as well. */
13815 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13816 /* FIQ handlers have registers r8 - r12 banked, so
13817 we only need to check r0 - r7, Normal ISRs only
13818 bank r14 and r15, so we must check up to r12.
13819 r13 is the stack pointer which is always preserved,
13820 so we do not need to consider it here. */
13821 max_reg = 7;
13822 else
13823 max_reg = 12;
13825 for (reg = 0; reg <= max_reg; reg++)
13826 if (df_regs_ever_live_p (reg)
13827 || (! current_function_is_leaf && call_used_regs[reg]))
13828 save_reg_mask |= (1 << reg);
13830 /* Also save the pic base register if necessary. */
13831 if (flag_pic
13832 && !TARGET_SINGLE_PIC_BASE
13833 && arm_pic_register != INVALID_REGNUM
13834 && crtl->uses_pic_offset_table)
13835 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13837 else if (IS_VOLATILE(func_type))
13839 /* For noreturn functions we historically omitted register saves
13840 altogether. However this really messes up debugging. As a
13841 compromise save just the frame pointers. Combined with the link
13842 register saved elsewhere this should be sufficient to get
13843 a backtrace. */
13844 if (frame_pointer_needed)
13845 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13846 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13847 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13848 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13849 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13851 else
13853 /* In the normal case we only need to save those registers
13854 which are call saved and which are used by this function. */
13855 for (reg = 0; reg <= 11; reg++)
13856 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13857 save_reg_mask |= (1 << reg);
13859 /* Handle the frame pointer as a special case. */
13860 if (frame_pointer_needed)
13861 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13863 /* If we aren't loading the PIC register,
13864 don't stack it even though it may be live. */
13865 if (flag_pic
13866 && !TARGET_SINGLE_PIC_BASE
13867 && arm_pic_register != INVALID_REGNUM
13868 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13869 || crtl->uses_pic_offset_table))
13870 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13872 /* The prologue will copy SP into R0, so save it. */
13873 if (IS_STACKALIGN (func_type))
13874 save_reg_mask |= 1;
13877 /* Save registers so the exception handler can modify them. */
13878 if (crtl->calls_eh_return)
13880 unsigned int i;
13882 for (i = 0; ; i++)
13884 reg = EH_RETURN_DATA_REGNO (i);
13885 if (reg == INVALID_REGNUM)
13886 break;
13887 save_reg_mask |= 1 << reg;
13891 return save_reg_mask;
13895 /* Compute the number of bytes used to store the static chain register on the
13896 stack, above the stack frame. We need to know this accurately to get the
13897 alignment of the rest of the stack frame correct. */
13899 static int arm_compute_static_chain_stack_bytes (void)
13901 unsigned long func_type = arm_current_func_type ();
13902 int static_chain_stack_bytes = 0;
13904 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13905 IS_NESTED (func_type) &&
13906 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13907 static_chain_stack_bytes = 4;
13909 return static_chain_stack_bytes;
13913 /* Compute a bit mask of which registers need to be
13914 saved on the stack for the current function.
13915 This is used by arm_get_frame_offsets, which may add extra registers. */
13917 static unsigned long
13918 arm_compute_save_reg_mask (void)
13920 unsigned int save_reg_mask = 0;
13921 unsigned long func_type = arm_current_func_type ();
13922 unsigned int reg;
13924 if (IS_NAKED (func_type))
13925 /* This should never really happen. */
13926 return 0;
13928 /* If we are creating a stack frame, then we must save the frame pointer,
13929 IP (which will hold the old stack pointer), LR and the PC. */
13930 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13931 save_reg_mask |=
13932 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13933 | (1 << IP_REGNUM)
13934 | (1 << LR_REGNUM)
13935 | (1 << PC_REGNUM);
13937 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13939 /* Decide if we need to save the link register.
13940 Interrupt routines have their own banked link register,
13941 so they never need to save it.
13942 Otherwise if we do not use the link register we do not need to save
13943 it. If we are pushing other registers onto the stack however, we
13944 can save an instruction in the epilogue by pushing the link register
13945 now and then popping it back into the PC. This incurs extra memory
13946 accesses though, so we only do it when optimizing for size, and only
13947 if we know that we will not need a fancy return sequence. */
13948 if (df_regs_ever_live_p (LR_REGNUM)
13949 || (save_reg_mask
13950 && optimize_size
13951 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13952 && !crtl->calls_eh_return))
13953 save_reg_mask |= 1 << LR_REGNUM;
13955 if (cfun->machine->lr_save_eliminated)
13956 save_reg_mask &= ~ (1 << LR_REGNUM);
13958 if (TARGET_REALLY_IWMMXT
13959 && ((bit_count (save_reg_mask)
13960 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13961 arm_compute_static_chain_stack_bytes())
13962 ) % 2) != 0)
13964 /* The total number of registers that are going to be pushed
13965 onto the stack is odd. We need to ensure that the stack
13966 is 64-bit aligned before we start to save iWMMXt registers,
13967 and also before we start to create locals. (A local variable
13968 might be a double or long long which we will load/store using
13969 an iWMMXt instruction). Therefore we need to push another
13970 ARM register, so that the stack will be 64-bit aligned. We
13971 try to avoid using the arg registers (r0 -r3) as they might be
13972 used to pass values in a tail call. */
13973 for (reg = 4; reg <= 12; reg++)
13974 if ((save_reg_mask & (1 << reg)) == 0)
13975 break;
13977 if (reg <= 12)
13978 save_reg_mask |= (1 << reg);
13979 else
13981 cfun->machine->sibcall_blocked = 1;
13982 save_reg_mask |= (1 << 3);
13986 /* We may need to push an additional register for use initializing the
13987 PIC base register. */
13988 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13989 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13991 reg = thumb_find_work_register (1 << 4);
13992 if (!call_used_regs[reg])
13993 save_reg_mask |= (1 << reg);
13996 return save_reg_mask;
14000 /* Compute a bit mask of which registers need to be
14001 saved on the stack for the current function. */
14002 static unsigned long
14003 thumb1_compute_save_reg_mask (void)
14005 unsigned long mask;
14006 unsigned reg;
14008 mask = 0;
14009 for (reg = 0; reg < 12; reg ++)
14010 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14011 mask |= 1 << reg;
14013 if (flag_pic
14014 && !TARGET_SINGLE_PIC_BASE
14015 && arm_pic_register != INVALID_REGNUM
14016 && crtl->uses_pic_offset_table)
14017 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14019 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14020 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14021 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14023 /* LR will also be pushed if any lo regs are pushed. */
14024 if (mask & 0xff || thumb_force_lr_save ())
14025 mask |= (1 << LR_REGNUM);
14027 /* Make sure we have a low work register if we need one.
14028 We will need one if we are going to push a high register,
14029 but we are not currently intending to push a low register. */
14030 if ((mask & 0xff) == 0
14031 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14033 /* Use thumb_find_work_register to choose which register
14034 we will use. If the register is live then we will
14035 have to push it. Use LAST_LO_REGNUM as our fallback
14036 choice for the register to select. */
14037 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14038 /* Make sure the register returned by thumb_find_work_register is
14039 not part of the return value. */
14040 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14041 reg = LAST_LO_REGNUM;
14043 if (! call_used_regs[reg])
14044 mask |= 1 << reg;
14047 /* The 504 below is 8 bytes less than 512 because there are two possible
14048 alignment words. We can't tell here if they will be present or not so we
14049 have to play it safe and assume that they are. */
14050 if ((CALLER_INTERWORKING_SLOT_SIZE +
14051 ROUND_UP_WORD (get_frame_size ()) +
14052 crtl->outgoing_args_size) >= 504)
14054 /* This is the same as the code in thumb1_expand_prologue() which
14055 determines which register to use for stack decrement. */
14056 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14057 if (mask & (1 << reg))
14058 break;
14060 if (reg > LAST_LO_REGNUM)
14062 /* Make sure we have a register available for stack decrement. */
14063 mask |= 1 << LAST_LO_REGNUM;
14067 return mask;
14071 /* Return the number of bytes required to save VFP registers. */
14072 static int
14073 arm_get_vfp_saved_size (void)
14075 unsigned int regno;
14076 int count;
14077 int saved;
14079 saved = 0;
14080 /* Space for saved VFP registers. */
14081 if (TARGET_HARD_FLOAT && TARGET_VFP)
14083 count = 0;
14084 for (regno = FIRST_VFP_REGNUM;
14085 regno < LAST_VFP_REGNUM;
14086 regno += 2)
14088 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14089 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14091 if (count > 0)
14093 /* Workaround ARM10 VFPr1 bug. */
14094 if (count == 2 && !arm_arch6)
14095 count++;
14096 saved += count * 8;
14098 count = 0;
14100 else
14101 count++;
14103 if (count > 0)
14105 if (count == 2 && !arm_arch6)
14106 count++;
14107 saved += count * 8;
14110 return saved;
14114 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14115 everything bar the final return instruction. */
14116 const char *
14117 output_return_instruction (rtx operand, int really_return, int reverse)
14119 char conditional[10];
14120 char instr[100];
14121 unsigned reg;
14122 unsigned long live_regs_mask;
14123 unsigned long func_type;
14124 arm_stack_offsets *offsets;
14126 func_type = arm_current_func_type ();
14128 if (IS_NAKED (func_type))
14129 return "";
14131 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14133 /* If this function was declared non-returning, and we have
14134 found a tail call, then we have to trust that the called
14135 function won't return. */
14136 if (really_return)
14138 rtx ops[2];
14140 /* Otherwise, trap an attempted return by aborting. */
14141 ops[0] = operand;
14142 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14143 : "abort");
14144 assemble_external_libcall (ops[1]);
14145 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14148 return "";
14151 gcc_assert (!cfun->calls_alloca || really_return);
14153 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14155 cfun->machine->return_used_this_function = 1;
14157 offsets = arm_get_frame_offsets ();
14158 live_regs_mask = offsets->saved_regs_mask;
14160 if (live_regs_mask)
14162 const char * return_reg;
14164 /* If we do not have any special requirements for function exit
14165 (e.g. interworking) then we can load the return address
14166 directly into the PC. Otherwise we must load it into LR. */
14167 if (really_return
14168 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14169 return_reg = reg_names[PC_REGNUM];
14170 else
14171 return_reg = reg_names[LR_REGNUM];
14173 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14175 /* There are three possible reasons for the IP register
14176 being saved. 1) a stack frame was created, in which case
14177 IP contains the old stack pointer, or 2) an ISR routine
14178 corrupted it, or 3) it was saved to align the stack on
14179 iWMMXt. In case 1, restore IP into SP, otherwise just
14180 restore IP. */
14181 if (frame_pointer_needed)
14183 live_regs_mask &= ~ (1 << IP_REGNUM);
14184 live_regs_mask |= (1 << SP_REGNUM);
14186 else
14187 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14190 /* On some ARM architectures it is faster to use LDR rather than
14191 LDM to load a single register. On other architectures, the
14192 cost is the same. In 26 bit mode, or for exception handlers,
14193 we have to use LDM to load the PC so that the CPSR is also
14194 restored. */
14195 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14196 if (live_regs_mask == (1U << reg))
14197 break;
14199 if (reg <= LAST_ARM_REGNUM
14200 && (reg != LR_REGNUM
14201 || ! really_return
14202 || ! IS_INTERRUPT (func_type)))
14204 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14205 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14207 else
14209 char *p;
14210 int first = 1;
14212 /* Generate the load multiple instruction to restore the
14213 registers. Note we can get here, even if
14214 frame_pointer_needed is true, but only if sp already
14215 points to the base of the saved core registers. */
14216 if (live_regs_mask & (1 << SP_REGNUM))
14218 unsigned HOST_WIDE_INT stack_adjust;
14220 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14221 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14223 if (stack_adjust && arm_arch5 && TARGET_ARM)
14224 if (TARGET_UNIFIED_ASM)
14225 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14226 else
14227 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14228 else
14230 /* If we can't use ldmib (SA110 bug),
14231 then try to pop r3 instead. */
14232 if (stack_adjust)
14233 live_regs_mask |= 1 << 3;
14235 if (TARGET_UNIFIED_ASM)
14236 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14237 else
14238 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14241 else
14242 if (TARGET_UNIFIED_ASM)
14243 sprintf (instr, "pop%s\t{", conditional);
14244 else
14245 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14247 p = instr + strlen (instr);
14249 for (reg = 0; reg <= SP_REGNUM; reg++)
14250 if (live_regs_mask & (1 << reg))
14252 int l = strlen (reg_names[reg]);
14254 if (first)
14255 first = 0;
14256 else
14258 memcpy (p, ", ", 2);
14259 p += 2;
14262 memcpy (p, "%|", 2);
14263 memcpy (p + 2, reg_names[reg], l);
14264 p += l + 2;
14267 if (live_regs_mask & (1 << LR_REGNUM))
14269 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14270 /* If returning from an interrupt, restore the CPSR. */
14271 if (IS_INTERRUPT (func_type))
14272 strcat (p, "^");
14274 else
14275 strcpy (p, "}");
14278 output_asm_insn (instr, & operand);
14280 /* See if we need to generate an extra instruction to
14281 perform the actual function return. */
14282 if (really_return
14283 && func_type != ARM_FT_INTERWORKED
14284 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14286 /* The return has already been handled
14287 by loading the LR into the PC. */
14288 really_return = 0;
14292 if (really_return)
14294 switch ((int) ARM_FUNC_TYPE (func_type))
14296 case ARM_FT_ISR:
14297 case ARM_FT_FIQ:
14298 /* ??? This is wrong for unified assembly syntax. */
14299 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14300 break;
14302 case ARM_FT_INTERWORKED:
14303 sprintf (instr, "bx%s\t%%|lr", conditional);
14304 break;
14306 case ARM_FT_EXCEPTION:
14307 /* ??? This is wrong for unified assembly syntax. */
14308 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14309 break;
14311 default:
14312 /* Use bx if it's available. */
14313 if (arm_arch5 || arm_arch4t)
14314 sprintf (instr, "bx%s\t%%|lr", conditional);
14315 else
14316 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14317 break;
14320 output_asm_insn (instr, & operand);
14323 return "";
14326 /* Write the function name into the code section, directly preceding
14327 the function prologue.
14329 Code will be output similar to this:
14331 .ascii "arm_poke_function_name", 0
14332 .align
14334 .word 0xff000000 + (t1 - t0)
14335 arm_poke_function_name
14336 mov ip, sp
14337 stmfd sp!, {fp, ip, lr, pc}
14338 sub fp, ip, #4
14340 When performing a stack backtrace, code can inspect the value
14341 of 'pc' stored at 'fp' + 0. If the trace function then looks
14342 at location pc - 12 and the top 8 bits are set, then we know
14343 that there is a function name embedded immediately preceding this
14344 location and has length ((pc[-3]) & 0xff000000).
14346 We assume that pc is declared as a pointer to an unsigned long.
14348 It is of no benefit to output the function name if we are assembling
14349 a leaf function. These function types will not contain a stack
14350 backtrace structure, therefore it is not possible to determine the
14351 function name. */
14352 void
14353 arm_poke_function_name (FILE *stream, const char *name)
14355 unsigned long alignlength;
14356 unsigned long length;
14357 rtx x;
14359 length = strlen (name) + 1;
14360 alignlength = ROUND_UP_WORD (length);
14362 ASM_OUTPUT_ASCII (stream, name, length);
14363 ASM_OUTPUT_ALIGN (stream, 2);
14364 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14365 assemble_aligned_integer (UNITS_PER_WORD, x);
14368 /* Place some comments into the assembler stream
14369 describing the current function. */
14370 static void
14371 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14373 unsigned long func_type;
14375 if (TARGET_THUMB1)
14377 thumb1_output_function_prologue (f, frame_size);
14378 return;
14381 /* Sanity check. */
14382 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14384 func_type = arm_current_func_type ();
14386 switch ((int) ARM_FUNC_TYPE (func_type))
14388 default:
14389 case ARM_FT_NORMAL:
14390 break;
14391 case ARM_FT_INTERWORKED:
14392 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14393 break;
14394 case ARM_FT_ISR:
14395 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14396 break;
14397 case ARM_FT_FIQ:
14398 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14399 break;
14400 case ARM_FT_EXCEPTION:
14401 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14402 break;
14405 if (IS_NAKED (func_type))
14406 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14408 if (IS_VOLATILE (func_type))
14409 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14411 if (IS_NESTED (func_type))
14412 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14413 if (IS_STACKALIGN (func_type))
14414 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14416 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14417 crtl->args.size,
14418 crtl->args.pretend_args_size, frame_size);
14420 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14421 frame_pointer_needed,
14422 cfun->machine->uses_anonymous_args);
14424 if (cfun->machine->lr_save_eliminated)
14425 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14427 if (crtl->calls_eh_return)
14428 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14432 const char *
14433 arm_output_epilogue (rtx sibling)
14435 int reg;
14436 unsigned long saved_regs_mask;
14437 unsigned long func_type;
14438 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14439 frame that is $fp + 4 for a non-variadic function. */
14440 int floats_offset = 0;
14441 rtx operands[3];
14442 FILE * f = asm_out_file;
14443 unsigned int lrm_count = 0;
14444 int really_return = (sibling == NULL);
14445 int start_reg;
14446 arm_stack_offsets *offsets;
14448 /* If we have already generated the return instruction
14449 then it is futile to generate anything else. */
14450 if (use_return_insn (FALSE, sibling) &&
14451 (cfun->machine->return_used_this_function != 0))
14452 return "";
14454 func_type = arm_current_func_type ();
14456 if (IS_NAKED (func_type))
14457 /* Naked functions don't have epilogues. */
14458 return "";
14460 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14462 rtx op;
14464 /* A volatile function should never return. Call abort. */
14465 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14466 assemble_external_libcall (op);
14467 output_asm_insn ("bl\t%a0", &op);
14469 return "";
14472 /* If we are throwing an exception, then we really must be doing a
14473 return, so we can't tail-call. */
14474 gcc_assert (!crtl->calls_eh_return || really_return);
14476 offsets = arm_get_frame_offsets ();
14477 saved_regs_mask = offsets->saved_regs_mask;
14479 if (TARGET_IWMMXT)
14480 lrm_count = bit_count (saved_regs_mask);
14482 floats_offset = offsets->saved_args;
14483 /* Compute how far away the floats will be. */
14484 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14485 if (saved_regs_mask & (1 << reg))
14486 floats_offset += 4;
14488 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14490 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14491 int vfp_offset = offsets->frame;
14493 if (TARGET_FPA_EMU2)
14495 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14496 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14498 floats_offset += 12;
14499 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14500 reg, FP_REGNUM, floats_offset - vfp_offset);
14503 else
14505 start_reg = LAST_FPA_REGNUM;
14507 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14509 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14511 floats_offset += 12;
14513 /* We can't unstack more than four registers at once. */
14514 if (start_reg - reg == 3)
14516 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14517 reg, FP_REGNUM, floats_offset - vfp_offset);
14518 start_reg = reg - 1;
14521 else
14523 if (reg != start_reg)
14524 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14525 reg + 1, start_reg - reg,
14526 FP_REGNUM, floats_offset - vfp_offset);
14527 start_reg = reg - 1;
14531 /* Just in case the last register checked also needs unstacking. */
14532 if (reg != start_reg)
14533 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14534 reg + 1, start_reg - reg,
14535 FP_REGNUM, floats_offset - vfp_offset);
14538 if (TARGET_HARD_FLOAT && TARGET_VFP)
14540 int saved_size;
14542 /* The fldmd insns do not have base+offset addressing
14543 modes, so we use IP to hold the address. */
14544 saved_size = arm_get_vfp_saved_size ();
14546 if (saved_size > 0)
14548 floats_offset += saved_size;
14549 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14550 FP_REGNUM, floats_offset - vfp_offset);
14552 start_reg = FIRST_VFP_REGNUM;
14553 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14555 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14556 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14558 if (start_reg != reg)
14559 vfp_output_fldmd (f, IP_REGNUM,
14560 (start_reg - FIRST_VFP_REGNUM) / 2,
14561 (reg - start_reg) / 2);
14562 start_reg = reg + 2;
14565 if (start_reg != reg)
14566 vfp_output_fldmd (f, IP_REGNUM,
14567 (start_reg - FIRST_VFP_REGNUM) / 2,
14568 (reg - start_reg) / 2);
14571 if (TARGET_IWMMXT)
14573 /* The frame pointer is guaranteed to be non-double-word aligned.
14574 This is because it is set to (old_stack_pointer - 4) and the
14575 old_stack_pointer was double word aligned. Thus the offset to
14576 the iWMMXt registers to be loaded must also be non-double-word
14577 sized, so that the resultant address *is* double-word aligned.
14578 We can ignore floats_offset since that was already included in
14579 the live_regs_mask. */
14580 lrm_count += (lrm_count % 2 ? 2 : 1);
14582 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14583 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14585 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14586 reg, FP_REGNUM, lrm_count * 4);
14587 lrm_count += 2;
14591 /* saved_regs_mask should contain the IP, which at the time of stack
14592 frame generation actually contains the old stack pointer. So a
14593 quick way to unwind the stack is just pop the IP register directly
14594 into the stack pointer. */
14595 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14596 saved_regs_mask &= ~ (1 << IP_REGNUM);
14597 saved_regs_mask |= (1 << SP_REGNUM);
14599 /* There are two registers left in saved_regs_mask - LR and PC. We
14600 only need to restore the LR register (the return address), but to
14601 save time we can load it directly into the PC, unless we need a
14602 special function exit sequence, or we are not really returning. */
14603 if (really_return
14604 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14605 && !crtl->calls_eh_return)
14606 /* Delete the LR from the register mask, so that the LR on
14607 the stack is loaded into the PC in the register mask. */
14608 saved_regs_mask &= ~ (1 << LR_REGNUM);
14609 else
14610 saved_regs_mask &= ~ (1 << PC_REGNUM);
14612 /* We must use SP as the base register, because SP is one of the
14613 registers being restored. If an interrupt or page fault
14614 happens in the ldm instruction, the SP might or might not
14615 have been restored. That would be bad, as then SP will no
14616 longer indicate the safe area of stack, and we can get stack
14617 corruption. Using SP as the base register means that it will
14618 be reset correctly to the original value, should an interrupt
14619 occur. If the stack pointer already points at the right
14620 place, then omit the subtraction. */
14621 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14622 || cfun->calls_alloca)
14623 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14624 4 * bit_count (saved_regs_mask));
14625 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14627 if (IS_INTERRUPT (func_type))
14628 /* Interrupt handlers will have pushed the
14629 IP onto the stack, so restore it now. */
14630 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14632 else
14634 /* This branch is executed for ARM mode (non-apcs frames) and
14635 Thumb-2 mode. Frame layout is essentially the same for those
14636 cases, except that in ARM mode frame pointer points to the
14637 first saved register, while in Thumb-2 mode the frame pointer points
14638 to the last saved register.
14640 It is possible to make frame pointer point to last saved
14641 register in both cases, and remove some conditionals below.
14642 That means that fp setup in prologue would be just "mov fp, sp"
14643 and sp restore in epilogue would be just "mov sp, fp", whereas
14644 now we have to use add/sub in those cases. However, the value
14645 of that would be marginal, as both mov and add/sub are 32-bit
14646 in ARM mode, and it would require extra conditionals
14647 in arm_expand_prologue to distingish ARM-apcs-frame case
14648 (where frame pointer is required to point at first register)
14649 and ARM-non-apcs-frame. Therefore, such change is postponed
14650 until real need arise. */
14651 unsigned HOST_WIDE_INT amount;
14652 int rfe;
14653 /* Restore stack pointer if necessary. */
14654 if (TARGET_ARM && frame_pointer_needed)
14656 operands[0] = stack_pointer_rtx;
14657 operands[1] = hard_frame_pointer_rtx;
14659 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14660 output_add_immediate (operands);
14662 else
14664 if (frame_pointer_needed)
14666 /* For Thumb-2 restore sp from the frame pointer.
14667 Operand restrictions mean we have to incrememnt FP, then copy
14668 to SP. */
14669 amount = offsets->locals_base - offsets->saved_regs;
14670 operands[0] = hard_frame_pointer_rtx;
14672 else
14674 unsigned long count;
14675 operands[0] = stack_pointer_rtx;
14676 amount = offsets->outgoing_args - offsets->saved_regs;
14677 /* pop call clobbered registers if it avoids a
14678 separate stack adjustment. */
14679 count = offsets->saved_regs - offsets->saved_args;
14680 if (optimize_size
14681 && count != 0
14682 && !crtl->calls_eh_return
14683 && bit_count(saved_regs_mask) * 4 == count
14684 && !IS_INTERRUPT (func_type)
14685 && !crtl->tail_call_emit)
14687 unsigned long mask;
14688 /* Preserve return values, of any size. */
14689 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14690 mask ^= 0xf;
14691 mask &= ~saved_regs_mask;
14692 reg = 0;
14693 while (bit_count (mask) * 4 > amount)
14695 while ((mask & (1 << reg)) == 0)
14696 reg++;
14697 mask &= ~(1 << reg);
14699 if (bit_count (mask) * 4 == amount) {
14700 amount = 0;
14701 saved_regs_mask |= mask;
14706 if (amount)
14708 operands[1] = operands[0];
14709 operands[2] = GEN_INT (amount);
14710 output_add_immediate (operands);
14712 if (frame_pointer_needed)
14713 asm_fprintf (f, "\tmov\t%r, %r\n",
14714 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14717 if (TARGET_FPA_EMU2)
14719 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14720 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14721 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14722 reg, SP_REGNUM);
14724 else
14726 start_reg = FIRST_FPA_REGNUM;
14728 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14730 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14732 if (reg - start_reg == 3)
14734 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14735 start_reg, SP_REGNUM);
14736 start_reg = reg + 1;
14739 else
14741 if (reg != start_reg)
14742 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14743 start_reg, reg - start_reg,
14744 SP_REGNUM);
14746 start_reg = reg + 1;
14750 /* Just in case the last register checked also needs unstacking. */
14751 if (reg != start_reg)
14752 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14753 start_reg, reg - start_reg, SP_REGNUM);
14756 if (TARGET_HARD_FLOAT && TARGET_VFP)
14758 int end_reg = LAST_VFP_REGNUM + 1;
14760 /* Scan the registers in reverse order. We need to match
14761 any groupings made in the prologue and generate matching
14762 pop operations. */
14763 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14765 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14766 && (!df_regs_ever_live_p (reg + 1)
14767 || call_used_regs[reg + 1]))
14769 if (end_reg > reg + 2)
14770 vfp_output_fldmd (f, SP_REGNUM,
14771 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14772 (end_reg - (reg + 2)) / 2);
14773 end_reg = reg;
14776 if (end_reg > reg + 2)
14777 vfp_output_fldmd (f, SP_REGNUM, 0,
14778 (end_reg - (reg + 2)) / 2);
14781 if (TARGET_IWMMXT)
14782 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14783 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14784 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14786 /* If we can, restore the LR into the PC. */
14787 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14788 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14789 && !IS_STACKALIGN (func_type)
14790 && really_return
14791 && crtl->args.pretend_args_size == 0
14792 && saved_regs_mask & (1 << LR_REGNUM)
14793 && !crtl->calls_eh_return)
14795 saved_regs_mask &= ~ (1 << LR_REGNUM);
14796 saved_regs_mask |= (1 << PC_REGNUM);
14797 rfe = IS_INTERRUPT (func_type);
14799 else
14800 rfe = 0;
14802 /* Load the registers off the stack. If we only have one register
14803 to load use the LDR instruction - it is faster. For Thumb-2
14804 always use pop and the assembler will pick the best instruction.*/
14805 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14806 && !IS_INTERRUPT(func_type))
14808 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14810 else if (saved_regs_mask)
14812 if (saved_regs_mask & (1 << SP_REGNUM))
14813 /* Note - write back to the stack register is not enabled
14814 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14815 in the list of registers and if we add writeback the
14816 instruction becomes UNPREDICTABLE. */
14817 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14818 rfe);
14819 else if (TARGET_ARM)
14820 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14821 rfe);
14822 else
14823 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14826 if (crtl->args.pretend_args_size)
14828 /* Unwind the pre-pushed regs. */
14829 operands[0] = operands[1] = stack_pointer_rtx;
14830 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14831 output_add_immediate (operands);
14835 /* We may have already restored PC directly from the stack. */
14836 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14837 return "";
14839 /* Stack adjustment for exception handler. */
14840 if (crtl->calls_eh_return)
14841 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14842 ARM_EH_STACKADJ_REGNUM);
14844 /* Generate the return instruction. */
14845 switch ((int) ARM_FUNC_TYPE (func_type))
14847 case ARM_FT_ISR:
14848 case ARM_FT_FIQ:
14849 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14850 break;
14852 case ARM_FT_EXCEPTION:
14853 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14854 break;
14856 case ARM_FT_INTERWORKED:
14857 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14858 break;
14860 default:
14861 if (IS_STACKALIGN (func_type))
14863 /* See comment in arm_expand_prologue. */
14864 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14866 if (arm_arch5 || arm_arch4t)
14867 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14868 else
14869 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14870 break;
14873 return "";
14876 static void
14877 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14878 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14880 arm_stack_offsets *offsets;
14882 if (TARGET_THUMB1)
14884 int regno;
14886 /* Emit any call-via-reg trampolines that are needed for v4t support
14887 of call_reg and call_value_reg type insns. */
14888 for (regno = 0; regno < LR_REGNUM; regno++)
14890 rtx label = cfun->machine->call_via[regno];
14892 if (label != NULL)
14894 switch_to_section (function_section (current_function_decl));
14895 targetm.asm_out.internal_label (asm_out_file, "L",
14896 CODE_LABEL_NUMBER (label));
14897 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14901 /* ??? Probably not safe to set this here, since it assumes that a
14902 function will be emitted as assembly immediately after we generate
14903 RTL for it. This does not happen for inline functions. */
14904 cfun->machine->return_used_this_function = 0;
14906 else /* TARGET_32BIT */
14908 /* We need to take into account any stack-frame rounding. */
14909 offsets = arm_get_frame_offsets ();
14911 gcc_assert (!use_return_insn (FALSE, NULL)
14912 || (cfun->machine->return_used_this_function != 0)
14913 || offsets->saved_regs == offsets->outgoing_args
14914 || frame_pointer_needed);
14916 /* Reset the ARM-specific per-function variables. */
14917 after_arm_reorg = 0;
14921 /* Generate and emit an insn that we will recognize as a push_multi.
14922 Unfortunately, since this insn does not reflect very well the actual
14923 semantics of the operation, we need to annotate the insn for the benefit
14924 of DWARF2 frame unwind information. */
14925 static rtx
14926 emit_multi_reg_push (unsigned long mask)
14928 int num_regs = 0;
14929 int num_dwarf_regs;
14930 int i, j;
14931 rtx par;
14932 rtx dwarf;
14933 int dwarf_par_index;
14934 rtx tmp, reg;
14936 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14937 if (mask & (1 << i))
14938 num_regs++;
14940 gcc_assert (num_regs && num_regs <= 16);
14942 /* We don't record the PC in the dwarf frame information. */
14943 num_dwarf_regs = num_regs;
14944 if (mask & (1 << PC_REGNUM))
14945 num_dwarf_regs--;
14947 /* For the body of the insn we are going to generate an UNSPEC in
14948 parallel with several USEs. This allows the insn to be recognized
14949 by the push_multi pattern in the arm.md file.
14951 The body of the insn looks something like this:
14953 (parallel [
14954 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14955 (const_int:SI <num>)))
14956 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14957 (use (reg:SI XX))
14958 (use (reg:SI YY))
14962 For the frame note however, we try to be more explicit and actually
14963 show each register being stored into the stack frame, plus a (single)
14964 decrement of the stack pointer. We do it this way in order to be
14965 friendly to the stack unwinding code, which only wants to see a single
14966 stack decrement per instruction. The RTL we generate for the note looks
14967 something like this:
14969 (sequence [
14970 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14971 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14972 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14973 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14977 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14978 instead we'd have a parallel expression detailing all
14979 the stores to the various memory addresses so that debug
14980 information is more up-to-date. Remember however while writing
14981 this to take care of the constraints with the push instruction.
14983 Note also that this has to be taken care of for the VFP registers.
14985 For more see PR43399. */
14987 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14988 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14989 dwarf_par_index = 1;
14991 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14993 if (mask & (1 << i))
14995 reg = gen_rtx_REG (SImode, i);
14997 XVECEXP (par, 0, 0)
14998 = gen_rtx_SET (VOIDmode,
14999 gen_frame_mem
15000 (BLKmode,
15001 gen_rtx_PRE_MODIFY (Pmode,
15002 stack_pointer_rtx,
15003 plus_constant
15004 (stack_pointer_rtx,
15005 -4 * num_regs))
15007 gen_rtx_UNSPEC (BLKmode,
15008 gen_rtvec (1, reg),
15009 UNSPEC_PUSH_MULT));
15011 if (i != PC_REGNUM)
15013 tmp = gen_rtx_SET (VOIDmode,
15014 gen_frame_mem (SImode, stack_pointer_rtx),
15015 reg);
15016 RTX_FRAME_RELATED_P (tmp) = 1;
15017 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15018 dwarf_par_index++;
15021 break;
15025 for (j = 1, i++; j < num_regs; i++)
15027 if (mask & (1 << i))
15029 reg = gen_rtx_REG (SImode, i);
15031 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15033 if (i != PC_REGNUM)
15036 = gen_rtx_SET (VOIDmode,
15037 gen_frame_mem
15038 (SImode,
15039 plus_constant (stack_pointer_rtx,
15040 4 * j)),
15041 reg);
15042 RTX_FRAME_RELATED_P (tmp) = 1;
15043 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15046 j++;
15050 par = emit_insn (par);
15052 tmp = gen_rtx_SET (VOIDmode,
15053 stack_pointer_rtx,
15054 plus_constant (stack_pointer_rtx, -4 * num_regs));
15055 RTX_FRAME_RELATED_P (tmp) = 1;
15056 XVECEXP (dwarf, 0, 0) = tmp;
15058 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15060 return par;
15063 /* Calculate the size of the return value that is passed in registers. */
15064 static unsigned
15065 arm_size_return_regs (void)
15067 enum machine_mode mode;
15069 if (crtl->return_rtx != 0)
15070 mode = GET_MODE (crtl->return_rtx);
15071 else
15072 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15074 return GET_MODE_SIZE (mode);
15077 static rtx
15078 emit_sfm (int base_reg, int count)
15080 rtx par;
15081 rtx dwarf;
15082 rtx tmp, reg;
15083 int i;
15085 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15086 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15088 reg = gen_rtx_REG (XFmode, base_reg++);
15090 XVECEXP (par, 0, 0)
15091 = gen_rtx_SET (VOIDmode,
15092 gen_frame_mem
15093 (BLKmode,
15094 gen_rtx_PRE_MODIFY (Pmode,
15095 stack_pointer_rtx,
15096 plus_constant
15097 (stack_pointer_rtx,
15098 -12 * count))
15100 gen_rtx_UNSPEC (BLKmode,
15101 gen_rtvec (1, reg),
15102 UNSPEC_PUSH_MULT));
15103 tmp = gen_rtx_SET (VOIDmode,
15104 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15105 RTX_FRAME_RELATED_P (tmp) = 1;
15106 XVECEXP (dwarf, 0, 1) = tmp;
15108 for (i = 1; i < count; i++)
15110 reg = gen_rtx_REG (XFmode, base_reg++);
15111 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15113 tmp = gen_rtx_SET (VOIDmode,
15114 gen_frame_mem (XFmode,
15115 plus_constant (stack_pointer_rtx,
15116 i * 12)),
15117 reg);
15118 RTX_FRAME_RELATED_P (tmp) = 1;
15119 XVECEXP (dwarf, 0, i + 1) = tmp;
15122 tmp = gen_rtx_SET (VOIDmode,
15123 stack_pointer_rtx,
15124 plus_constant (stack_pointer_rtx, -12 * count));
15126 RTX_FRAME_RELATED_P (tmp) = 1;
15127 XVECEXP (dwarf, 0, 0) = tmp;
15129 par = emit_insn (par);
15130 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15132 return par;
15136 /* Return true if the current function needs to save/restore LR. */
15138 static bool
15139 thumb_force_lr_save (void)
15141 return !cfun->machine->lr_save_eliminated
15142 && (!leaf_function_p ()
15143 || thumb_far_jump_used_p ()
15144 || df_regs_ever_live_p (LR_REGNUM));
15148 /* Compute the distance from register FROM to register TO.
15149 These can be the arg pointer (26), the soft frame pointer (25),
15150 the stack pointer (13) or the hard frame pointer (11).
15151 In thumb mode r7 is used as the soft frame pointer, if needed.
15152 Typical stack layout looks like this:
15154 old stack pointer -> | |
15155 ----
15156 | | \
15157 | | saved arguments for
15158 | | vararg functions
15159 | | /
15161 hard FP & arg pointer -> | | \
15162 | | stack
15163 | | frame
15164 | | /
15166 | | \
15167 | | call saved
15168 | | registers
15169 soft frame pointer -> | | /
15171 | | \
15172 | | local
15173 | | variables
15174 locals base pointer -> | | /
15176 | | \
15177 | | outgoing
15178 | | arguments
15179 current stack pointer -> | | /
15182 For a given function some or all of these stack components
15183 may not be needed, giving rise to the possibility of
15184 eliminating some of the registers.
15186 The values returned by this function must reflect the behavior
15187 of arm_expand_prologue() and arm_compute_save_reg_mask().
15189 The sign of the number returned reflects the direction of stack
15190 growth, so the values are positive for all eliminations except
15191 from the soft frame pointer to the hard frame pointer.
15193 SFP may point just inside the local variables block to ensure correct
15194 alignment. */
15197 /* Calculate stack offsets. These are used to calculate register elimination
15198 offsets and in prologue/epilogue code. Also calculates which registers
15199 should be saved. */
15201 static arm_stack_offsets *
15202 arm_get_frame_offsets (void)
15204 struct arm_stack_offsets *offsets;
15205 unsigned long func_type;
15206 int leaf;
15207 int saved;
15208 int core_saved;
15209 HOST_WIDE_INT frame_size;
15210 int i;
15212 offsets = &cfun->machine->stack_offsets;
15214 /* We need to know if we are a leaf function. Unfortunately, it
15215 is possible to be called after start_sequence has been called,
15216 which causes get_insns to return the insns for the sequence,
15217 not the function, which will cause leaf_function_p to return
15218 the incorrect result.
15220 to know about leaf functions once reload has completed, and the
15221 frame size cannot be changed after that time, so we can safely
15222 use the cached value. */
15224 if (reload_completed)
15225 return offsets;
15227 /* Initially this is the size of the local variables. It will translated
15228 into an offset once we have determined the size of preceding data. */
15229 frame_size = ROUND_UP_WORD (get_frame_size ());
15231 leaf = leaf_function_p ();
15233 /* Space for variadic functions. */
15234 offsets->saved_args = crtl->args.pretend_args_size;
15236 /* In Thumb mode this is incorrect, but never used. */
15237 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15238 arm_compute_static_chain_stack_bytes();
15240 if (TARGET_32BIT)
15242 unsigned int regno;
15244 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15245 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15246 saved = core_saved;
15248 /* We know that SP will be doubleword aligned on entry, and we must
15249 preserve that condition at any subroutine call. We also require the
15250 soft frame pointer to be doubleword aligned. */
15252 if (TARGET_REALLY_IWMMXT)
15254 /* Check for the call-saved iWMMXt registers. */
15255 for (regno = FIRST_IWMMXT_REGNUM;
15256 regno <= LAST_IWMMXT_REGNUM;
15257 regno++)
15258 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15259 saved += 8;
15262 func_type = arm_current_func_type ();
15263 if (! IS_VOLATILE (func_type))
15265 /* Space for saved FPA registers. */
15266 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15267 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15268 saved += 12;
15270 /* Space for saved VFP registers. */
15271 if (TARGET_HARD_FLOAT && TARGET_VFP)
15272 saved += arm_get_vfp_saved_size ();
15275 else /* TARGET_THUMB1 */
15277 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15278 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15279 saved = core_saved;
15280 if (TARGET_BACKTRACE)
15281 saved += 16;
15284 /* Saved registers include the stack frame. */
15285 offsets->saved_regs = offsets->saved_args + saved +
15286 arm_compute_static_chain_stack_bytes();
15287 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15288 /* A leaf function does not need any stack alignment if it has nothing
15289 on the stack. */
15290 if (leaf && frame_size == 0)
15292 offsets->outgoing_args = offsets->soft_frame;
15293 offsets->locals_base = offsets->soft_frame;
15294 return offsets;
15297 /* Ensure SFP has the correct alignment. */
15298 if (ARM_DOUBLEWORD_ALIGN
15299 && (offsets->soft_frame & 7))
15301 offsets->soft_frame += 4;
15302 /* Try to align stack by pushing an extra reg. Don't bother doing this
15303 when there is a stack frame as the alignment will be rolled into
15304 the normal stack adjustment. */
15305 if (frame_size + crtl->outgoing_args_size == 0)
15307 int reg = -1;
15309 /* If it is safe to use r3, then do so. This sometimes
15310 generates better code on Thumb-2 by avoiding the need to
15311 use 32-bit push/pop instructions. */
15312 if (!crtl->tail_call_emit
15313 && arm_size_return_regs () <= 12
15314 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15316 reg = 3;
15318 else
15319 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15321 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15323 reg = i;
15324 break;
15328 if (reg != -1)
15330 offsets->saved_regs += 4;
15331 offsets->saved_regs_mask |= (1 << reg);
15336 offsets->locals_base = offsets->soft_frame + frame_size;
15337 offsets->outgoing_args = (offsets->locals_base
15338 + crtl->outgoing_args_size);
15340 if (ARM_DOUBLEWORD_ALIGN)
15342 /* Ensure SP remains doubleword aligned. */
15343 if (offsets->outgoing_args & 7)
15344 offsets->outgoing_args += 4;
15345 gcc_assert (!(offsets->outgoing_args & 7));
15348 return offsets;
15352 /* Calculate the relative offsets for the different stack pointers. Positive
15353 offsets are in the direction of stack growth. */
15355 HOST_WIDE_INT
15356 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15358 arm_stack_offsets *offsets;
15360 offsets = arm_get_frame_offsets ();
15362 /* OK, now we have enough information to compute the distances.
15363 There must be an entry in these switch tables for each pair
15364 of registers in ELIMINABLE_REGS, even if some of the entries
15365 seem to be redundant or useless. */
15366 switch (from)
15368 case ARG_POINTER_REGNUM:
15369 switch (to)
15371 case THUMB_HARD_FRAME_POINTER_REGNUM:
15372 return 0;
15374 case FRAME_POINTER_REGNUM:
15375 /* This is the reverse of the soft frame pointer
15376 to hard frame pointer elimination below. */
15377 return offsets->soft_frame - offsets->saved_args;
15379 case ARM_HARD_FRAME_POINTER_REGNUM:
15380 /* This is only non-zero in the case where the static chain register
15381 is stored above the frame. */
15382 return offsets->frame - offsets->saved_args - 4;
15384 case STACK_POINTER_REGNUM:
15385 /* If nothing has been pushed on the stack at all
15386 then this will return -4. This *is* correct! */
15387 return offsets->outgoing_args - (offsets->saved_args + 4);
15389 default:
15390 gcc_unreachable ();
15392 gcc_unreachable ();
15394 case FRAME_POINTER_REGNUM:
15395 switch (to)
15397 case THUMB_HARD_FRAME_POINTER_REGNUM:
15398 return 0;
15400 case ARM_HARD_FRAME_POINTER_REGNUM:
15401 /* The hard frame pointer points to the top entry in the
15402 stack frame. The soft frame pointer to the bottom entry
15403 in the stack frame. If there is no stack frame at all,
15404 then they are identical. */
15406 return offsets->frame - offsets->soft_frame;
15408 case STACK_POINTER_REGNUM:
15409 return offsets->outgoing_args - offsets->soft_frame;
15411 default:
15412 gcc_unreachable ();
15414 gcc_unreachable ();
15416 default:
15417 /* You cannot eliminate from the stack pointer.
15418 In theory you could eliminate from the hard frame
15419 pointer to the stack pointer, but this will never
15420 happen, since if a stack frame is not needed the
15421 hard frame pointer will never be used. */
15422 gcc_unreachable ();
15426 /* Given FROM and TO register numbers, say whether this elimination is
15427 allowed. Frame pointer elimination is automatically handled.
15429 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15430 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15431 pointer, we must eliminate FRAME_POINTER_REGNUM into
15432 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15433 ARG_POINTER_REGNUM. */
15435 bool
15436 arm_can_eliminate (const int from, const int to)
15438 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15439 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15440 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15441 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15442 true);
15445 /* Emit RTL to save coprocessor registers on function entry. Returns the
15446 number of bytes pushed. */
15448 static int
15449 arm_save_coproc_regs(void)
15451 int saved_size = 0;
15452 unsigned reg;
15453 unsigned start_reg;
15454 rtx insn;
15456 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15457 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15459 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15460 insn = gen_rtx_MEM (V2SImode, insn);
15461 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15462 RTX_FRAME_RELATED_P (insn) = 1;
15463 saved_size += 8;
15466 /* Save any floating point call-saved registers used by this
15467 function. */
15468 if (TARGET_FPA_EMU2)
15470 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15471 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15473 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15474 insn = gen_rtx_MEM (XFmode, insn);
15475 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15476 RTX_FRAME_RELATED_P (insn) = 1;
15477 saved_size += 12;
15480 else
15482 start_reg = LAST_FPA_REGNUM;
15484 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15486 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15488 if (start_reg - reg == 3)
15490 insn = emit_sfm (reg, 4);
15491 RTX_FRAME_RELATED_P (insn) = 1;
15492 saved_size += 48;
15493 start_reg = reg - 1;
15496 else
15498 if (start_reg != reg)
15500 insn = emit_sfm (reg + 1, start_reg - reg);
15501 RTX_FRAME_RELATED_P (insn) = 1;
15502 saved_size += (start_reg - reg) * 12;
15504 start_reg = reg - 1;
15508 if (start_reg != reg)
15510 insn = emit_sfm (reg + 1, start_reg - reg);
15511 saved_size += (start_reg - reg) * 12;
15512 RTX_FRAME_RELATED_P (insn) = 1;
15515 if (TARGET_HARD_FLOAT && TARGET_VFP)
15517 start_reg = FIRST_VFP_REGNUM;
15519 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15521 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15522 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15524 if (start_reg != reg)
15525 saved_size += vfp_emit_fstmd (start_reg,
15526 (reg - start_reg) / 2);
15527 start_reg = reg + 2;
15530 if (start_reg != reg)
15531 saved_size += vfp_emit_fstmd (start_reg,
15532 (reg - start_reg) / 2);
15534 return saved_size;
15538 /* Set the Thumb frame pointer from the stack pointer. */
15540 static void
15541 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15543 HOST_WIDE_INT amount;
15544 rtx insn, dwarf;
15546 amount = offsets->outgoing_args - offsets->locals_base;
15547 if (amount < 1024)
15548 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15549 stack_pointer_rtx, GEN_INT (amount)));
15550 else
15552 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15553 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15554 expects the first two operands to be the same. */
15555 if (TARGET_THUMB2)
15557 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15558 stack_pointer_rtx,
15559 hard_frame_pointer_rtx));
15561 else
15563 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15564 hard_frame_pointer_rtx,
15565 stack_pointer_rtx));
15567 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15568 plus_constant (stack_pointer_rtx, amount));
15569 RTX_FRAME_RELATED_P (dwarf) = 1;
15570 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15573 RTX_FRAME_RELATED_P (insn) = 1;
15576 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15577 function. */
15578 void
15579 arm_expand_prologue (void)
15581 rtx amount;
15582 rtx insn;
15583 rtx ip_rtx;
15584 unsigned long live_regs_mask;
15585 unsigned long func_type;
15586 int fp_offset = 0;
15587 int saved_pretend_args = 0;
15588 int saved_regs = 0;
15589 unsigned HOST_WIDE_INT args_to_push;
15590 arm_stack_offsets *offsets;
15592 func_type = arm_current_func_type ();
15594 /* Naked functions don't have prologues. */
15595 if (IS_NAKED (func_type))
15596 return;
15598 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15599 args_to_push = crtl->args.pretend_args_size;
15601 /* Compute which register we will have to save onto the stack. */
15602 offsets = arm_get_frame_offsets ();
15603 live_regs_mask = offsets->saved_regs_mask;
15605 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15607 if (IS_STACKALIGN (func_type))
15609 rtx dwarf;
15610 rtx r0;
15611 rtx r1;
15612 /* Handle a word-aligned stack pointer. We generate the following:
15614 mov r0, sp
15615 bic r1, r0, #7
15616 mov sp, r1
15617 <save and restore r0 in normal prologue/epilogue>
15618 mov sp, r0
15619 bx lr
15621 The unwinder doesn't need to know about the stack realignment.
15622 Just tell it we saved SP in r0. */
15623 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15625 r0 = gen_rtx_REG (SImode, 0);
15626 r1 = gen_rtx_REG (SImode, 1);
15627 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15628 compiler won't choke. */
15629 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15630 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15631 insn = gen_movsi (r0, stack_pointer_rtx);
15632 RTX_FRAME_RELATED_P (insn) = 1;
15633 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15634 emit_insn (insn);
15635 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15636 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15639 /* For APCS frames, if IP register is clobbered
15640 when creating frame, save that register in a special
15641 way. */
15642 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15644 if (IS_INTERRUPT (func_type))
15646 /* Interrupt functions must not corrupt any registers.
15647 Creating a frame pointer however, corrupts the IP
15648 register, so we must push it first. */
15649 insn = emit_multi_reg_push (1 << IP_REGNUM);
15651 /* Do not set RTX_FRAME_RELATED_P on this insn.
15652 The dwarf stack unwinding code only wants to see one
15653 stack decrement per function, and this is not it. If
15654 this instruction is labeled as being part of the frame
15655 creation sequence then dwarf2out_frame_debug_expr will
15656 die when it encounters the assignment of IP to FP
15657 later on, since the use of SP here establishes SP as
15658 the CFA register and not IP.
15660 Anyway this instruction is not really part of the stack
15661 frame creation although it is part of the prologue. */
15663 else if (IS_NESTED (func_type))
15665 /* The Static chain register is the same as the IP register
15666 used as a scratch register during stack frame creation.
15667 To get around this need to find somewhere to store IP
15668 whilst the frame is being created. We try the following
15669 places in order:
15671 1. The last argument register.
15672 2. A slot on the stack above the frame. (This only
15673 works if the function is not a varargs function).
15674 3. Register r3, after pushing the argument registers
15675 onto the stack.
15677 Note - we only need to tell the dwarf2 backend about the SP
15678 adjustment in the second variant; the static chain register
15679 doesn't need to be unwound, as it doesn't contain a value
15680 inherited from the caller. */
15682 if (df_regs_ever_live_p (3) == false)
15683 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15684 else if (args_to_push == 0)
15686 rtx dwarf;
15688 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15689 saved_regs += 4;
15691 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15692 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15693 fp_offset = 4;
15695 /* Just tell the dwarf backend that we adjusted SP. */
15696 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15697 plus_constant (stack_pointer_rtx,
15698 -fp_offset));
15699 RTX_FRAME_RELATED_P (insn) = 1;
15700 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15702 else
15704 /* Store the args on the stack. */
15705 if (cfun->machine->uses_anonymous_args)
15706 insn = emit_multi_reg_push
15707 ((0xf0 >> (args_to_push / 4)) & 0xf);
15708 else
15709 insn = emit_insn
15710 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15711 GEN_INT (- args_to_push)));
15713 RTX_FRAME_RELATED_P (insn) = 1;
15715 saved_pretend_args = 1;
15716 fp_offset = args_to_push;
15717 args_to_push = 0;
15719 /* Now reuse r3 to preserve IP. */
15720 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15724 insn = emit_set_insn (ip_rtx,
15725 plus_constant (stack_pointer_rtx, fp_offset));
15726 RTX_FRAME_RELATED_P (insn) = 1;
15729 if (args_to_push)
15731 /* Push the argument registers, or reserve space for them. */
15732 if (cfun->machine->uses_anonymous_args)
15733 insn = emit_multi_reg_push
15734 ((0xf0 >> (args_to_push / 4)) & 0xf);
15735 else
15736 insn = emit_insn
15737 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15738 GEN_INT (- args_to_push)));
15739 RTX_FRAME_RELATED_P (insn) = 1;
15742 /* If this is an interrupt service routine, and the link register
15743 is going to be pushed, and we're not generating extra
15744 push of IP (needed when frame is needed and frame layout if apcs),
15745 subtracting four from LR now will mean that the function return
15746 can be done with a single instruction. */
15747 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15748 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15749 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15750 && TARGET_ARM)
15752 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15754 emit_set_insn (lr, plus_constant (lr, -4));
15757 if (live_regs_mask)
15759 saved_regs += bit_count (live_regs_mask) * 4;
15760 if (optimize_size && !frame_pointer_needed
15761 && saved_regs == offsets->saved_regs - offsets->saved_args)
15763 /* If no coprocessor registers are being pushed and we don't have
15764 to worry about a frame pointer then push extra registers to
15765 create the stack frame. This is done is a way that does not
15766 alter the frame layout, so is independent of the epilogue. */
15767 int n;
15768 int frame;
15769 n = 0;
15770 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15771 n++;
15772 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15773 if (frame && n * 4 >= frame)
15775 n = frame / 4;
15776 live_regs_mask |= (1 << n) - 1;
15777 saved_regs += frame;
15780 insn = emit_multi_reg_push (live_regs_mask);
15781 RTX_FRAME_RELATED_P (insn) = 1;
15784 if (! IS_VOLATILE (func_type))
15785 saved_regs += arm_save_coproc_regs ();
15787 if (frame_pointer_needed && TARGET_ARM)
15789 /* Create the new frame pointer. */
15790 if (TARGET_APCS_FRAME)
15792 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15793 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15794 RTX_FRAME_RELATED_P (insn) = 1;
15796 if (IS_NESTED (func_type))
15798 /* Recover the static chain register. */
15799 if (!df_regs_ever_live_p (3)
15800 || saved_pretend_args)
15801 insn = gen_rtx_REG (SImode, 3);
15802 else /* if (crtl->args.pretend_args_size == 0) */
15804 insn = plus_constant (hard_frame_pointer_rtx, 4);
15805 insn = gen_frame_mem (SImode, insn);
15807 emit_set_insn (ip_rtx, insn);
15808 /* Add a USE to stop propagate_one_insn() from barfing. */
15809 emit_insn (gen_prologue_use (ip_rtx));
15812 else
15814 insn = GEN_INT (saved_regs - 4);
15815 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15816 stack_pointer_rtx, insn));
15817 RTX_FRAME_RELATED_P (insn) = 1;
15821 if (flag_stack_usage)
15822 current_function_static_stack_size
15823 = offsets->outgoing_args - offsets->saved_args;
15825 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15827 /* This add can produce multiple insns for a large constant, so we
15828 need to get tricky. */
15829 rtx last = get_last_insn ();
15831 amount = GEN_INT (offsets->saved_args + saved_regs
15832 - offsets->outgoing_args);
15834 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15835 amount));
15838 last = last ? NEXT_INSN (last) : get_insns ();
15839 RTX_FRAME_RELATED_P (last) = 1;
15841 while (last != insn);
15843 /* If the frame pointer is needed, emit a special barrier that
15844 will prevent the scheduler from moving stores to the frame
15845 before the stack adjustment. */
15846 if (frame_pointer_needed)
15847 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15848 hard_frame_pointer_rtx));
15852 if (frame_pointer_needed && TARGET_THUMB2)
15853 thumb_set_frame_pointer (offsets);
15855 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15857 unsigned long mask;
15859 mask = live_regs_mask;
15860 mask &= THUMB2_WORK_REGS;
15861 if (!IS_NESTED (func_type))
15862 mask |= (1 << IP_REGNUM);
15863 arm_load_pic_register (mask);
15866 /* If we are profiling, make sure no instructions are scheduled before
15867 the call to mcount. Similarly if the user has requested no
15868 scheduling in the prolog. Similarly if we want non-call exceptions
15869 using the EABI unwinder, to prevent faulting instructions from being
15870 swapped with a stack adjustment. */
15871 if (crtl->profile || !TARGET_SCHED_PROLOG
15872 || (arm_except_unwind_info (&global_options) == UI_TARGET
15873 && cfun->can_throw_non_call_exceptions))
15874 emit_insn (gen_blockage ());
15876 /* If the link register is being kept alive, with the return address in it,
15877 then make sure that it does not get reused by the ce2 pass. */
15878 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15879 cfun->machine->lr_save_eliminated = 1;
15882 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15883 static void
15884 arm_print_condition (FILE *stream)
15886 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15888 /* Branch conversion is not implemented for Thumb-2. */
15889 if (TARGET_THUMB)
15891 output_operand_lossage ("predicated Thumb instruction");
15892 return;
15894 if (current_insn_predicate != NULL)
15896 output_operand_lossage
15897 ("predicated instruction in conditional sequence");
15898 return;
15901 fputs (arm_condition_codes[arm_current_cc], stream);
15903 else if (current_insn_predicate)
15905 enum arm_cond_code code;
15907 if (TARGET_THUMB1)
15909 output_operand_lossage ("predicated Thumb instruction");
15910 return;
15913 code = get_arm_condition_code (current_insn_predicate);
15914 fputs (arm_condition_codes[code], stream);
15919 /* If CODE is 'd', then the X is a condition operand and the instruction
15920 should only be executed if the condition is true.
15921 if CODE is 'D', then the X is a condition operand and the instruction
15922 should only be executed if the condition is false: however, if the mode
15923 of the comparison is CCFPEmode, then always execute the instruction -- we
15924 do this because in these circumstances !GE does not necessarily imply LT;
15925 in these cases the instruction pattern will take care to make sure that
15926 an instruction containing %d will follow, thereby undoing the effects of
15927 doing this instruction unconditionally.
15928 If CODE is 'N' then X is a floating point operand that must be negated
15929 before output.
15930 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15931 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15932 static void
15933 arm_print_operand (FILE *stream, rtx x, int code)
15935 switch (code)
15937 case '@':
15938 fputs (ASM_COMMENT_START, stream);
15939 return;
15941 case '_':
15942 fputs (user_label_prefix, stream);
15943 return;
15945 case '|':
15946 fputs (REGISTER_PREFIX, stream);
15947 return;
15949 case '?':
15950 arm_print_condition (stream);
15951 return;
15953 case '(':
15954 /* Nothing in unified syntax, otherwise the current condition code. */
15955 if (!TARGET_UNIFIED_ASM)
15956 arm_print_condition (stream);
15957 break;
15959 case ')':
15960 /* The current condition code in unified syntax, otherwise nothing. */
15961 if (TARGET_UNIFIED_ASM)
15962 arm_print_condition (stream);
15963 break;
15965 case '.':
15966 /* The current condition code for a condition code setting instruction.
15967 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15968 if (TARGET_UNIFIED_ASM)
15970 fputc('s', stream);
15971 arm_print_condition (stream);
15973 else
15975 arm_print_condition (stream);
15976 fputc('s', stream);
15978 return;
15980 case '!':
15981 /* If the instruction is conditionally executed then print
15982 the current condition code, otherwise print 's'. */
15983 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15984 if (current_insn_predicate)
15985 arm_print_condition (stream);
15986 else
15987 fputc('s', stream);
15988 break;
15990 /* %# is a "break" sequence. It doesn't output anything, but is used to
15991 separate e.g. operand numbers from following text, if that text consists
15992 of further digits which we don't want to be part of the operand
15993 number. */
15994 case '#':
15995 return;
15997 case 'N':
15999 REAL_VALUE_TYPE r;
16000 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16001 r = real_value_negate (&r);
16002 fprintf (stream, "%s", fp_const_from_val (&r));
16004 return;
16006 /* An integer or symbol address without a preceding # sign. */
16007 case 'c':
16008 switch (GET_CODE (x))
16010 case CONST_INT:
16011 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16012 break;
16014 case SYMBOL_REF:
16015 output_addr_const (stream, x);
16016 break;
16018 default:
16019 gcc_unreachable ();
16021 return;
16023 case 'B':
16024 if (GET_CODE (x) == CONST_INT)
16026 HOST_WIDE_INT val;
16027 val = ARM_SIGN_EXTEND (~INTVAL (x));
16028 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16030 else
16032 putc ('~', stream);
16033 output_addr_const (stream, x);
16035 return;
16037 case 'L':
16038 /* The low 16 bits of an immediate constant. */
16039 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16040 return;
16042 case 'i':
16043 fprintf (stream, "%s", arithmetic_instr (x, 1));
16044 return;
16046 /* Truncate Cirrus shift counts. */
16047 case 's':
16048 if (GET_CODE (x) == CONST_INT)
16050 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16051 return;
16053 arm_print_operand (stream, x, 0);
16054 return;
16056 case 'I':
16057 fprintf (stream, "%s", arithmetic_instr (x, 0));
16058 return;
16060 case 'S':
16062 HOST_WIDE_INT val;
16063 const char *shift;
16065 if (!shift_operator (x, SImode))
16067 output_operand_lossage ("invalid shift operand");
16068 break;
16071 shift = shift_op (x, &val);
16073 if (shift)
16075 fprintf (stream, ", %s ", shift);
16076 if (val == -1)
16077 arm_print_operand (stream, XEXP (x, 1), 0);
16078 else
16079 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16082 return;
16084 /* An explanation of the 'Q', 'R' and 'H' register operands:
16086 In a pair of registers containing a DI or DF value the 'Q'
16087 operand returns the register number of the register containing
16088 the least significant part of the value. The 'R' operand returns
16089 the register number of the register containing the most
16090 significant part of the value.
16092 The 'H' operand returns the higher of the two register numbers.
16093 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16094 same as the 'Q' operand, since the most significant part of the
16095 value is held in the lower number register. The reverse is true
16096 on systems where WORDS_BIG_ENDIAN is false.
16098 The purpose of these operands is to distinguish between cases
16099 where the endian-ness of the values is important (for example
16100 when they are added together), and cases where the endian-ness
16101 is irrelevant, but the order of register operations is important.
16102 For example when loading a value from memory into a register
16103 pair, the endian-ness does not matter. Provided that the value
16104 from the lower memory address is put into the lower numbered
16105 register, and the value from the higher address is put into the
16106 higher numbered register, the load will work regardless of whether
16107 the value being loaded is big-wordian or little-wordian. The
16108 order of the two register loads can matter however, if the address
16109 of the memory location is actually held in one of the registers
16110 being overwritten by the load.
16112 The 'Q' and 'R' constraints are also available for 64-bit
16113 constants. */
16114 case 'Q':
16115 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16117 rtx part = gen_lowpart (SImode, x);
16118 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16119 return;
16122 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16124 output_operand_lossage ("invalid operand for code '%c'", code);
16125 return;
16128 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16129 return;
16131 case 'R':
16132 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16134 enum machine_mode mode = GET_MODE (x);
16135 rtx part;
16137 if (mode == VOIDmode)
16138 mode = DImode;
16139 part = gen_highpart_mode (SImode, mode, x);
16140 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16141 return;
16144 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16146 output_operand_lossage ("invalid operand for code '%c'", code);
16147 return;
16150 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16151 return;
16153 case 'H':
16154 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16156 output_operand_lossage ("invalid operand for code '%c'", code);
16157 return;
16160 asm_fprintf (stream, "%r", REGNO (x) + 1);
16161 return;
16163 case 'J':
16164 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16166 output_operand_lossage ("invalid operand for code '%c'", code);
16167 return;
16170 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16171 return;
16173 case 'K':
16174 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16176 output_operand_lossage ("invalid operand for code '%c'", code);
16177 return;
16180 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16181 return;
16183 case 'm':
16184 asm_fprintf (stream, "%r",
16185 GET_CODE (XEXP (x, 0)) == REG
16186 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16187 return;
16189 case 'M':
16190 asm_fprintf (stream, "{%r-%r}",
16191 REGNO (x),
16192 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16193 return;
16195 /* Like 'M', but writing doubleword vector registers, for use by Neon
16196 insns. */
16197 case 'h':
16199 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16200 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16201 if (numregs == 1)
16202 asm_fprintf (stream, "{d%d}", regno);
16203 else
16204 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16206 return;
16208 case 'd':
16209 /* CONST_TRUE_RTX means always -- that's the default. */
16210 if (x == const_true_rtx)
16211 return;
16213 if (!COMPARISON_P (x))
16215 output_operand_lossage ("invalid operand for code '%c'", code);
16216 return;
16219 fputs (arm_condition_codes[get_arm_condition_code (x)],
16220 stream);
16221 return;
16223 case 'D':
16224 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16225 want to do that. */
16226 if (x == const_true_rtx)
16228 output_operand_lossage ("instruction never executed");
16229 return;
16231 if (!COMPARISON_P (x))
16233 output_operand_lossage ("invalid operand for code '%c'", code);
16234 return;
16237 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16238 (get_arm_condition_code (x))],
16239 stream);
16240 return;
16242 /* Cirrus registers can be accessed in a variety of ways:
16243 single floating point (f)
16244 double floating point (d)
16245 32bit integer (fx)
16246 64bit integer (dx). */
16247 case 'W': /* Cirrus register in F mode. */
16248 case 'X': /* Cirrus register in D mode. */
16249 case 'Y': /* Cirrus register in FX mode. */
16250 case 'Z': /* Cirrus register in DX mode. */
16251 gcc_assert (GET_CODE (x) == REG
16252 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16254 fprintf (stream, "mv%s%s",
16255 code == 'W' ? "f"
16256 : code == 'X' ? "d"
16257 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16259 return;
16261 /* Print cirrus register in the mode specified by the register's mode. */
16262 case 'V':
16264 int mode = GET_MODE (x);
16266 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16268 output_operand_lossage ("invalid operand for code '%c'", code);
16269 return;
16272 fprintf (stream, "mv%s%s",
16273 mode == DFmode ? "d"
16274 : mode == SImode ? "fx"
16275 : mode == DImode ? "dx"
16276 : "f", reg_names[REGNO (x)] + 2);
16278 return;
16281 case 'U':
16282 if (GET_CODE (x) != REG
16283 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16284 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16285 /* Bad value for wCG register number. */
16287 output_operand_lossage ("invalid operand for code '%c'", code);
16288 return;
16291 else
16292 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16293 return;
16295 /* Print an iWMMXt control register name. */
16296 case 'w':
16297 if (GET_CODE (x) != CONST_INT
16298 || INTVAL (x) < 0
16299 || INTVAL (x) >= 16)
16300 /* Bad value for wC register number. */
16302 output_operand_lossage ("invalid operand for code '%c'", code);
16303 return;
16306 else
16308 static const char * wc_reg_names [16] =
16310 "wCID", "wCon", "wCSSF", "wCASF",
16311 "wC4", "wC5", "wC6", "wC7",
16312 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16313 "wC12", "wC13", "wC14", "wC15"
16316 fprintf (stream, wc_reg_names [INTVAL (x)]);
16318 return;
16320 /* Print the high single-precision register of a VFP double-precision
16321 register. */
16322 case 'p':
16324 int mode = GET_MODE (x);
16325 int regno;
16327 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16329 output_operand_lossage ("invalid operand for code '%c'", code);
16330 return;
16333 regno = REGNO (x);
16334 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16336 output_operand_lossage ("invalid operand for code '%c'", code);
16337 return;
16340 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16342 return;
16344 /* Print a VFP/Neon double precision or quad precision register name. */
16345 case 'P':
16346 case 'q':
16348 int mode = GET_MODE (x);
16349 int is_quad = (code == 'q');
16350 int regno;
16352 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16354 output_operand_lossage ("invalid operand for code '%c'", code);
16355 return;
16358 if (GET_CODE (x) != REG
16359 || !IS_VFP_REGNUM (REGNO (x)))
16361 output_operand_lossage ("invalid operand for code '%c'", code);
16362 return;
16365 regno = REGNO (x);
16366 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16367 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16369 output_operand_lossage ("invalid operand for code '%c'", code);
16370 return;
16373 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16374 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16376 return;
16378 /* These two codes print the low/high doubleword register of a Neon quad
16379 register, respectively. For pair-structure types, can also print
16380 low/high quadword registers. */
16381 case 'e':
16382 case 'f':
16384 int mode = GET_MODE (x);
16385 int regno;
16387 if ((GET_MODE_SIZE (mode) != 16
16388 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16390 output_operand_lossage ("invalid operand for code '%c'", code);
16391 return;
16394 regno = REGNO (x);
16395 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16397 output_operand_lossage ("invalid operand for code '%c'", code);
16398 return;
16401 if (GET_MODE_SIZE (mode) == 16)
16402 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16403 + (code == 'f' ? 1 : 0));
16404 else
16405 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16406 + (code == 'f' ? 1 : 0));
16408 return;
16410 /* Print a VFPv3 floating-point constant, represented as an integer
16411 index. */
16412 case 'G':
16414 int index = vfp3_const_double_index (x);
16415 gcc_assert (index != -1);
16416 fprintf (stream, "%d", index);
16418 return;
16420 /* Print bits representing opcode features for Neon.
16422 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16423 and polynomials as unsigned.
16425 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16427 Bit 2 is 1 for rounding functions, 0 otherwise. */
16429 /* Identify the type as 's', 'u', 'p' or 'f'. */
16430 case 'T':
16432 HOST_WIDE_INT bits = INTVAL (x);
16433 fputc ("uspf"[bits & 3], stream);
16435 return;
16437 /* Likewise, but signed and unsigned integers are both 'i'. */
16438 case 'F':
16440 HOST_WIDE_INT bits = INTVAL (x);
16441 fputc ("iipf"[bits & 3], stream);
16443 return;
16445 /* As for 'T', but emit 'u' instead of 'p'. */
16446 case 't':
16448 HOST_WIDE_INT bits = INTVAL (x);
16449 fputc ("usuf"[bits & 3], stream);
16451 return;
16453 /* Bit 2: rounding (vs none). */
16454 case 'O':
16456 HOST_WIDE_INT bits = INTVAL (x);
16457 fputs ((bits & 4) != 0 ? "r" : "", stream);
16459 return;
16461 /* Memory operand for vld1/vst1 instruction. */
16462 case 'A':
16464 rtx addr;
16465 bool postinc = FALSE;
16466 unsigned align, modesize, align_bits;
16468 gcc_assert (GET_CODE (x) == MEM);
16469 addr = XEXP (x, 0);
16470 if (GET_CODE (addr) == POST_INC)
16472 postinc = 1;
16473 addr = XEXP (addr, 0);
16475 asm_fprintf (stream, "[%r", REGNO (addr));
16477 /* We know the alignment of this access, so we can emit a hint in the
16478 instruction (for some alignments) as an aid to the memory subsystem
16479 of the target. */
16480 align = MEM_ALIGN (x) >> 3;
16481 modesize = GET_MODE_SIZE (GET_MODE (x));
16483 /* Only certain alignment specifiers are supported by the hardware. */
16484 if (modesize == 16 && (align % 32) == 0)
16485 align_bits = 256;
16486 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16487 align_bits = 128;
16488 else if ((align % 8) == 0)
16489 align_bits = 64;
16490 else
16491 align_bits = 0;
16493 if (align_bits != 0)
16494 asm_fprintf (stream, ":%d", align_bits);
16496 asm_fprintf (stream, "]");
16498 if (postinc)
16499 fputs("!", stream);
16501 return;
16503 case 'C':
16505 rtx addr;
16507 gcc_assert (GET_CODE (x) == MEM);
16508 addr = XEXP (x, 0);
16509 gcc_assert (GET_CODE (addr) == REG);
16510 asm_fprintf (stream, "[%r]", REGNO (addr));
16512 return;
16514 /* Translate an S register number into a D register number and element index. */
16515 case 'y':
16517 int mode = GET_MODE (x);
16518 int regno;
16520 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16522 output_operand_lossage ("invalid operand for code '%c'", code);
16523 return;
16526 regno = REGNO (x);
16527 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16529 output_operand_lossage ("invalid operand for code '%c'", code);
16530 return;
16533 regno = regno - FIRST_VFP_REGNUM;
16534 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16536 return;
16538 /* Register specifier for vld1.16/vst1.16. Translate the S register
16539 number into a D register number and element index. */
16540 case 'z':
16542 int mode = GET_MODE (x);
16543 int regno;
16545 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16547 output_operand_lossage ("invalid operand for code '%c'", code);
16548 return;
16551 regno = REGNO (x);
16552 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16554 output_operand_lossage ("invalid operand for code '%c'", code);
16555 return;
16558 regno = regno - FIRST_VFP_REGNUM;
16559 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16561 return;
16563 default:
16564 if (x == 0)
16566 output_operand_lossage ("missing operand");
16567 return;
16570 switch (GET_CODE (x))
16572 case REG:
16573 asm_fprintf (stream, "%r", REGNO (x));
16574 break;
16576 case MEM:
16577 output_memory_reference_mode = GET_MODE (x);
16578 output_address (XEXP (x, 0));
16579 break;
16581 case CONST_DOUBLE:
16582 if (TARGET_NEON)
16584 char fpstr[20];
16585 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16586 sizeof (fpstr), 0, 1);
16587 fprintf (stream, "#%s", fpstr);
16589 else
16590 fprintf (stream, "#%s", fp_immediate_constant (x));
16591 break;
16593 default:
16594 gcc_assert (GET_CODE (x) != NEG);
16595 fputc ('#', stream);
16596 if (GET_CODE (x) == HIGH)
16598 fputs (":lower16:", stream);
16599 x = XEXP (x, 0);
16602 output_addr_const (stream, x);
16603 break;
16608 /* Target hook for printing a memory address. */
16609 static void
16610 arm_print_operand_address (FILE *stream, rtx x)
16612 if (TARGET_32BIT)
16614 int is_minus = GET_CODE (x) == MINUS;
16616 if (GET_CODE (x) == REG)
16617 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16618 else if (GET_CODE (x) == PLUS || is_minus)
16620 rtx base = XEXP (x, 0);
16621 rtx index = XEXP (x, 1);
16622 HOST_WIDE_INT offset = 0;
16623 if (GET_CODE (base) != REG
16624 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16626 /* Ensure that BASE is a register. */
16627 /* (one of them must be). */
16628 /* Also ensure the SP is not used as in index register. */
16629 rtx temp = base;
16630 base = index;
16631 index = temp;
16633 switch (GET_CODE (index))
16635 case CONST_INT:
16636 offset = INTVAL (index);
16637 if (is_minus)
16638 offset = -offset;
16639 asm_fprintf (stream, "[%r, #%wd]",
16640 REGNO (base), offset);
16641 break;
16643 case REG:
16644 asm_fprintf (stream, "[%r, %s%r]",
16645 REGNO (base), is_minus ? "-" : "",
16646 REGNO (index));
16647 break;
16649 case MULT:
16650 case ASHIFTRT:
16651 case LSHIFTRT:
16652 case ASHIFT:
16653 case ROTATERT:
16655 asm_fprintf (stream, "[%r, %s%r",
16656 REGNO (base), is_minus ? "-" : "",
16657 REGNO (XEXP (index, 0)));
16658 arm_print_operand (stream, index, 'S');
16659 fputs ("]", stream);
16660 break;
16663 default:
16664 gcc_unreachable ();
16667 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16668 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16670 extern enum machine_mode output_memory_reference_mode;
16672 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16674 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16675 asm_fprintf (stream, "[%r, #%s%d]!",
16676 REGNO (XEXP (x, 0)),
16677 GET_CODE (x) == PRE_DEC ? "-" : "",
16678 GET_MODE_SIZE (output_memory_reference_mode));
16679 else
16680 asm_fprintf (stream, "[%r], #%s%d",
16681 REGNO (XEXP (x, 0)),
16682 GET_CODE (x) == POST_DEC ? "-" : "",
16683 GET_MODE_SIZE (output_memory_reference_mode));
16685 else if (GET_CODE (x) == PRE_MODIFY)
16687 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16688 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16689 asm_fprintf (stream, "#%wd]!",
16690 INTVAL (XEXP (XEXP (x, 1), 1)));
16691 else
16692 asm_fprintf (stream, "%r]!",
16693 REGNO (XEXP (XEXP (x, 1), 1)));
16695 else if (GET_CODE (x) == POST_MODIFY)
16697 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16698 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16699 asm_fprintf (stream, "#%wd",
16700 INTVAL (XEXP (XEXP (x, 1), 1)));
16701 else
16702 asm_fprintf (stream, "%r",
16703 REGNO (XEXP (XEXP (x, 1), 1)));
16705 else output_addr_const (stream, x);
16707 else
16709 if (GET_CODE (x) == REG)
16710 asm_fprintf (stream, "[%r]", REGNO (x));
16711 else if (GET_CODE (x) == POST_INC)
16712 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16713 else if (GET_CODE (x) == PLUS)
16715 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16716 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16717 asm_fprintf (stream, "[%r, #%wd]",
16718 REGNO (XEXP (x, 0)),
16719 INTVAL (XEXP (x, 1)));
16720 else
16721 asm_fprintf (stream, "[%r, %r]",
16722 REGNO (XEXP (x, 0)),
16723 REGNO (XEXP (x, 1)));
16725 else
16726 output_addr_const (stream, x);
16730 /* Target hook for indicating whether a punctuation character for
16731 TARGET_PRINT_OPERAND is valid. */
16732 static bool
16733 arm_print_operand_punct_valid_p (unsigned char code)
16735 return (code == '@' || code == '|' || code == '.'
16736 || code == '(' || code == ')' || code == '#'
16737 || (TARGET_32BIT && (code == '?'))
16738 || (TARGET_THUMB2 && (code == '!'))
16739 || (TARGET_THUMB && (code == '_')));
16742 /* Target hook for assembling integer objects. The ARM version needs to
16743 handle word-sized values specially. */
16744 static bool
16745 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16747 enum machine_mode mode;
16749 if (size == UNITS_PER_WORD && aligned_p)
16751 fputs ("\t.word\t", asm_out_file);
16752 output_addr_const (asm_out_file, x);
16754 /* Mark symbols as position independent. We only do this in the
16755 .text segment, not in the .data segment. */
16756 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16757 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16759 /* See legitimize_pic_address for an explanation of the
16760 TARGET_VXWORKS_RTP check. */
16761 if (TARGET_VXWORKS_RTP
16762 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16763 fputs ("(GOT)", asm_out_file);
16764 else
16765 fputs ("(GOTOFF)", asm_out_file);
16767 fputc ('\n', asm_out_file);
16768 return true;
16771 mode = GET_MODE (x);
16773 if (arm_vector_mode_supported_p (mode))
16775 int i, units;
16777 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16779 units = CONST_VECTOR_NUNITS (x);
16780 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16782 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16783 for (i = 0; i < units; i++)
16785 rtx elt = CONST_VECTOR_ELT (x, i);
16786 assemble_integer
16787 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16789 else
16790 for (i = 0; i < units; i++)
16792 rtx elt = CONST_VECTOR_ELT (x, i);
16793 REAL_VALUE_TYPE rval;
16795 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16797 assemble_real
16798 (rval, GET_MODE_INNER (mode),
16799 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16802 return true;
16805 return default_assemble_integer (x, size, aligned_p);
16808 static void
16809 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16811 section *s;
16813 if (!TARGET_AAPCS_BASED)
16815 (is_ctor ?
16816 default_named_section_asm_out_constructor
16817 : default_named_section_asm_out_destructor) (symbol, priority);
16818 return;
16821 /* Put these in the .init_array section, using a special relocation. */
16822 if (priority != DEFAULT_INIT_PRIORITY)
16824 char buf[18];
16825 sprintf (buf, "%s.%.5u",
16826 is_ctor ? ".init_array" : ".fini_array",
16827 priority);
16828 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16830 else if (is_ctor)
16831 s = ctors_section;
16832 else
16833 s = dtors_section;
16835 switch_to_section (s);
16836 assemble_align (POINTER_SIZE);
16837 fputs ("\t.word\t", asm_out_file);
16838 output_addr_const (asm_out_file, symbol);
16839 fputs ("(target1)\n", asm_out_file);
16842 /* Add a function to the list of static constructors. */
16844 static void
16845 arm_elf_asm_constructor (rtx symbol, int priority)
16847 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16850 /* Add a function to the list of static destructors. */
16852 static void
16853 arm_elf_asm_destructor (rtx symbol, int priority)
16855 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16858 /* A finite state machine takes care of noticing whether or not instructions
16859 can be conditionally executed, and thus decrease execution time and code
16860 size by deleting branch instructions. The fsm is controlled by
16861 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16863 /* The state of the fsm controlling condition codes are:
16864 0: normal, do nothing special
16865 1: make ASM_OUTPUT_OPCODE not output this instruction
16866 2: make ASM_OUTPUT_OPCODE not output this instruction
16867 3: make instructions conditional
16868 4: make instructions conditional
16870 State transitions (state->state by whom under condition):
16871 0 -> 1 final_prescan_insn if the `target' is a label
16872 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16873 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16874 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16875 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16876 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16877 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16878 (the target insn is arm_target_insn).
16880 If the jump clobbers the conditions then we use states 2 and 4.
16882 A similar thing can be done with conditional return insns.
16884 XXX In case the `target' is an unconditional branch, this conditionalising
16885 of the instructions always reduces code size, but not always execution
16886 time. But then, I want to reduce the code size to somewhere near what
16887 /bin/cc produces. */
16889 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16890 instructions. When a COND_EXEC instruction is seen the subsequent
16891 instructions are scanned so that multiple conditional instructions can be
16892 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16893 specify the length and true/false mask for the IT block. These will be
16894 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16896 /* Returns the index of the ARM condition code string in
16897 `arm_condition_codes'. COMPARISON should be an rtx like
16898 `(eq (...) (...))'. */
16899 static enum arm_cond_code
16900 get_arm_condition_code (rtx comparison)
16902 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16903 enum arm_cond_code code;
16904 enum rtx_code comp_code = GET_CODE (comparison);
16906 if (GET_MODE_CLASS (mode) != MODE_CC)
16907 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16908 XEXP (comparison, 1));
16910 switch (mode)
16912 case CC_DNEmode: code = ARM_NE; goto dominance;
16913 case CC_DEQmode: code = ARM_EQ; goto dominance;
16914 case CC_DGEmode: code = ARM_GE; goto dominance;
16915 case CC_DGTmode: code = ARM_GT; goto dominance;
16916 case CC_DLEmode: code = ARM_LE; goto dominance;
16917 case CC_DLTmode: code = ARM_LT; goto dominance;
16918 case CC_DGEUmode: code = ARM_CS; goto dominance;
16919 case CC_DGTUmode: code = ARM_HI; goto dominance;
16920 case CC_DLEUmode: code = ARM_LS; goto dominance;
16921 case CC_DLTUmode: code = ARM_CC;
16923 dominance:
16924 gcc_assert (comp_code == EQ || comp_code == NE);
16926 if (comp_code == EQ)
16927 return ARM_INVERSE_CONDITION_CODE (code);
16928 return code;
16930 case CC_NOOVmode:
16931 switch (comp_code)
16933 case NE: return ARM_NE;
16934 case EQ: return ARM_EQ;
16935 case GE: return ARM_PL;
16936 case LT: return ARM_MI;
16937 default: gcc_unreachable ();
16940 case CC_Zmode:
16941 switch (comp_code)
16943 case NE: return ARM_NE;
16944 case EQ: return ARM_EQ;
16945 default: gcc_unreachable ();
16948 case CC_Nmode:
16949 switch (comp_code)
16951 case NE: return ARM_MI;
16952 case EQ: return ARM_PL;
16953 default: gcc_unreachable ();
16956 case CCFPEmode:
16957 case CCFPmode:
16958 /* These encodings assume that AC=1 in the FPA system control
16959 byte. This allows us to handle all cases except UNEQ and
16960 LTGT. */
16961 switch (comp_code)
16963 case GE: return ARM_GE;
16964 case GT: return ARM_GT;
16965 case LE: return ARM_LS;
16966 case LT: return ARM_MI;
16967 case NE: return ARM_NE;
16968 case EQ: return ARM_EQ;
16969 case ORDERED: return ARM_VC;
16970 case UNORDERED: return ARM_VS;
16971 case UNLT: return ARM_LT;
16972 case UNLE: return ARM_LE;
16973 case UNGT: return ARM_HI;
16974 case UNGE: return ARM_PL;
16975 /* UNEQ and LTGT do not have a representation. */
16976 case UNEQ: /* Fall through. */
16977 case LTGT: /* Fall through. */
16978 default: gcc_unreachable ();
16981 case CC_SWPmode:
16982 switch (comp_code)
16984 case NE: return ARM_NE;
16985 case EQ: return ARM_EQ;
16986 case GE: return ARM_LE;
16987 case GT: return ARM_LT;
16988 case LE: return ARM_GE;
16989 case LT: return ARM_GT;
16990 case GEU: return ARM_LS;
16991 case GTU: return ARM_CC;
16992 case LEU: return ARM_CS;
16993 case LTU: return ARM_HI;
16994 default: gcc_unreachable ();
16997 case CC_Cmode:
16998 switch (comp_code)
17000 case LTU: return ARM_CS;
17001 case GEU: return ARM_CC;
17002 default: gcc_unreachable ();
17005 case CC_CZmode:
17006 switch (comp_code)
17008 case NE: return ARM_NE;
17009 case EQ: return ARM_EQ;
17010 case GEU: return ARM_CS;
17011 case GTU: return ARM_HI;
17012 case LEU: return ARM_LS;
17013 case LTU: return ARM_CC;
17014 default: gcc_unreachable ();
17017 case CC_NCVmode:
17018 switch (comp_code)
17020 case GE: return ARM_GE;
17021 case LT: return ARM_LT;
17022 case GEU: return ARM_CS;
17023 case LTU: return ARM_CC;
17024 default: gcc_unreachable ();
17027 case CCmode:
17028 switch (comp_code)
17030 case NE: return ARM_NE;
17031 case EQ: return ARM_EQ;
17032 case GE: return ARM_GE;
17033 case GT: return ARM_GT;
17034 case LE: return ARM_LE;
17035 case LT: return ARM_LT;
17036 case GEU: return ARM_CS;
17037 case GTU: return ARM_HI;
17038 case LEU: return ARM_LS;
17039 case LTU: return ARM_CC;
17040 default: gcc_unreachable ();
17043 default: gcc_unreachable ();
17047 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17048 instructions. */
17049 void
17050 thumb2_final_prescan_insn (rtx insn)
17052 rtx first_insn = insn;
17053 rtx body = PATTERN (insn);
17054 rtx predicate;
17055 enum arm_cond_code code;
17056 int n;
17057 int mask;
17059 /* Remove the previous insn from the count of insns to be output. */
17060 if (arm_condexec_count)
17061 arm_condexec_count--;
17063 /* Nothing to do if we are already inside a conditional block. */
17064 if (arm_condexec_count)
17065 return;
17067 if (GET_CODE (body) != COND_EXEC)
17068 return;
17070 /* Conditional jumps are implemented directly. */
17071 if (GET_CODE (insn) == JUMP_INSN)
17072 return;
17074 predicate = COND_EXEC_TEST (body);
17075 arm_current_cc = get_arm_condition_code (predicate);
17077 n = get_attr_ce_count (insn);
17078 arm_condexec_count = 1;
17079 arm_condexec_mask = (1 << n) - 1;
17080 arm_condexec_masklen = n;
17081 /* See if subsequent instructions can be combined into the same block. */
17082 for (;;)
17084 insn = next_nonnote_insn (insn);
17086 /* Jumping into the middle of an IT block is illegal, so a label or
17087 barrier terminates the block. */
17088 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17089 break;
17091 body = PATTERN (insn);
17092 /* USE and CLOBBER aren't really insns, so just skip them. */
17093 if (GET_CODE (body) == USE
17094 || GET_CODE (body) == CLOBBER)
17095 continue;
17097 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17098 if (GET_CODE (body) != COND_EXEC)
17099 break;
17100 /* Allow up to 4 conditionally executed instructions in a block. */
17101 n = get_attr_ce_count (insn);
17102 if (arm_condexec_masklen + n > 4)
17103 break;
17105 predicate = COND_EXEC_TEST (body);
17106 code = get_arm_condition_code (predicate);
17107 mask = (1 << n) - 1;
17108 if (arm_current_cc == code)
17109 arm_condexec_mask |= (mask << arm_condexec_masklen);
17110 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17111 break;
17113 arm_condexec_count++;
17114 arm_condexec_masklen += n;
17116 /* A jump must be the last instruction in a conditional block. */
17117 if (GET_CODE(insn) == JUMP_INSN)
17118 break;
17120 /* Restore recog_data (getting the attributes of other insns can
17121 destroy this array, but final.c assumes that it remains intact
17122 across this call). */
17123 extract_constrain_insn_cached (first_insn);
17126 void
17127 arm_final_prescan_insn (rtx insn)
17129 /* BODY will hold the body of INSN. */
17130 rtx body = PATTERN (insn);
17132 /* This will be 1 if trying to repeat the trick, and things need to be
17133 reversed if it appears to fail. */
17134 int reverse = 0;
17136 /* If we start with a return insn, we only succeed if we find another one. */
17137 int seeking_return = 0;
17139 /* START_INSN will hold the insn from where we start looking. This is the
17140 first insn after the following code_label if REVERSE is true. */
17141 rtx start_insn = insn;
17143 /* If in state 4, check if the target branch is reached, in order to
17144 change back to state 0. */
17145 if (arm_ccfsm_state == 4)
17147 if (insn == arm_target_insn)
17149 arm_target_insn = NULL;
17150 arm_ccfsm_state = 0;
17152 return;
17155 /* If in state 3, it is possible to repeat the trick, if this insn is an
17156 unconditional branch to a label, and immediately following this branch
17157 is the previous target label which is only used once, and the label this
17158 branch jumps to is not too far off. */
17159 if (arm_ccfsm_state == 3)
17161 if (simplejump_p (insn))
17163 start_insn = next_nonnote_insn (start_insn);
17164 if (GET_CODE (start_insn) == BARRIER)
17166 /* XXX Isn't this always a barrier? */
17167 start_insn = next_nonnote_insn (start_insn);
17169 if (GET_CODE (start_insn) == CODE_LABEL
17170 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17171 && LABEL_NUSES (start_insn) == 1)
17172 reverse = TRUE;
17173 else
17174 return;
17176 else if (GET_CODE (body) == RETURN)
17178 start_insn = next_nonnote_insn (start_insn);
17179 if (GET_CODE (start_insn) == BARRIER)
17180 start_insn = next_nonnote_insn (start_insn);
17181 if (GET_CODE (start_insn) == CODE_LABEL
17182 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17183 && LABEL_NUSES (start_insn) == 1)
17185 reverse = TRUE;
17186 seeking_return = 1;
17188 else
17189 return;
17191 else
17192 return;
17195 gcc_assert (!arm_ccfsm_state || reverse);
17196 if (GET_CODE (insn) != JUMP_INSN)
17197 return;
17199 /* This jump might be paralleled with a clobber of the condition codes
17200 the jump should always come first */
17201 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17202 body = XVECEXP (body, 0, 0);
17204 if (reverse
17205 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17206 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17208 int insns_skipped;
17209 int fail = FALSE, succeed = FALSE;
17210 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17211 int then_not_else = TRUE;
17212 rtx this_insn = start_insn, label = 0;
17214 /* Register the insn jumped to. */
17215 if (reverse)
17217 if (!seeking_return)
17218 label = XEXP (SET_SRC (body), 0);
17220 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17221 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17222 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17224 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17225 then_not_else = FALSE;
17227 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17228 seeking_return = 1;
17229 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17231 seeking_return = 1;
17232 then_not_else = FALSE;
17234 else
17235 gcc_unreachable ();
17237 /* See how many insns this branch skips, and what kind of insns. If all
17238 insns are okay, and the label or unconditional branch to the same
17239 label is not too far away, succeed. */
17240 for (insns_skipped = 0;
17241 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17243 rtx scanbody;
17245 this_insn = next_nonnote_insn (this_insn);
17246 if (!this_insn)
17247 break;
17249 switch (GET_CODE (this_insn))
17251 case CODE_LABEL:
17252 /* Succeed if it is the target label, otherwise fail since
17253 control falls in from somewhere else. */
17254 if (this_insn == label)
17256 arm_ccfsm_state = 1;
17257 succeed = TRUE;
17259 else
17260 fail = TRUE;
17261 break;
17263 case BARRIER:
17264 /* Succeed if the following insn is the target label.
17265 Otherwise fail.
17266 If return insns are used then the last insn in a function
17267 will be a barrier. */
17268 this_insn = next_nonnote_insn (this_insn);
17269 if (this_insn && this_insn == label)
17271 arm_ccfsm_state = 1;
17272 succeed = TRUE;
17274 else
17275 fail = TRUE;
17276 break;
17278 case CALL_INSN:
17279 /* The AAPCS says that conditional calls should not be
17280 used since they make interworking inefficient (the
17281 linker can't transform BL<cond> into BLX). That's
17282 only a problem if the machine has BLX. */
17283 if (arm_arch5)
17285 fail = TRUE;
17286 break;
17289 /* Succeed if the following insn is the target label, or
17290 if the following two insns are a barrier and the
17291 target label. */
17292 this_insn = next_nonnote_insn (this_insn);
17293 if (this_insn && GET_CODE (this_insn) == BARRIER)
17294 this_insn = next_nonnote_insn (this_insn);
17296 if (this_insn && this_insn == label
17297 && insns_skipped < max_insns_skipped)
17299 arm_ccfsm_state = 1;
17300 succeed = TRUE;
17302 else
17303 fail = TRUE;
17304 break;
17306 case JUMP_INSN:
17307 /* If this is an unconditional branch to the same label, succeed.
17308 If it is to another label, do nothing. If it is conditional,
17309 fail. */
17310 /* XXX Probably, the tests for SET and the PC are
17311 unnecessary. */
17313 scanbody = PATTERN (this_insn);
17314 if (GET_CODE (scanbody) == SET
17315 && GET_CODE (SET_DEST (scanbody)) == PC)
17317 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17318 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17320 arm_ccfsm_state = 2;
17321 succeed = TRUE;
17323 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17324 fail = TRUE;
17326 /* Fail if a conditional return is undesirable (e.g. on a
17327 StrongARM), but still allow this if optimizing for size. */
17328 else if (GET_CODE (scanbody) == RETURN
17329 && !use_return_insn (TRUE, NULL)
17330 && !optimize_size)
17331 fail = TRUE;
17332 else if (GET_CODE (scanbody) == RETURN
17333 && seeking_return)
17335 arm_ccfsm_state = 2;
17336 succeed = TRUE;
17338 else if (GET_CODE (scanbody) == PARALLEL)
17340 switch (get_attr_conds (this_insn))
17342 case CONDS_NOCOND:
17343 break;
17344 default:
17345 fail = TRUE;
17346 break;
17349 else
17350 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17352 break;
17354 case INSN:
17355 /* Instructions using or affecting the condition codes make it
17356 fail. */
17357 scanbody = PATTERN (this_insn);
17358 if (!(GET_CODE (scanbody) == SET
17359 || GET_CODE (scanbody) == PARALLEL)
17360 || get_attr_conds (this_insn) != CONDS_NOCOND)
17361 fail = TRUE;
17363 /* A conditional cirrus instruction must be followed by
17364 a non Cirrus instruction. However, since we
17365 conditionalize instructions in this function and by
17366 the time we get here we can't add instructions
17367 (nops), because shorten_branches() has already been
17368 called, we will disable conditionalizing Cirrus
17369 instructions to be safe. */
17370 if (GET_CODE (scanbody) != USE
17371 && GET_CODE (scanbody) != CLOBBER
17372 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17373 fail = TRUE;
17374 break;
17376 default:
17377 break;
17380 if (succeed)
17382 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17383 arm_target_label = CODE_LABEL_NUMBER (label);
17384 else
17386 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17388 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17390 this_insn = next_nonnote_insn (this_insn);
17391 gcc_assert (!this_insn
17392 || (GET_CODE (this_insn) != BARRIER
17393 && GET_CODE (this_insn) != CODE_LABEL));
17395 if (!this_insn)
17397 /* Oh, dear! we ran off the end.. give up. */
17398 extract_constrain_insn_cached (insn);
17399 arm_ccfsm_state = 0;
17400 arm_target_insn = NULL;
17401 return;
17403 arm_target_insn = this_insn;
17406 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17407 what it was. */
17408 if (!reverse)
17409 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17411 if (reverse || then_not_else)
17412 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17415 /* Restore recog_data (getting the attributes of other insns can
17416 destroy this array, but final.c assumes that it remains intact
17417 across this call. */
17418 extract_constrain_insn_cached (insn);
17422 /* Output IT instructions. */
17423 void
17424 thumb2_asm_output_opcode (FILE * stream)
17426 char buff[5];
17427 int n;
17429 if (arm_condexec_mask)
17431 for (n = 0; n < arm_condexec_masklen; n++)
17432 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17433 buff[n] = 0;
17434 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17435 arm_condition_codes[arm_current_cc]);
17436 arm_condexec_mask = 0;
17440 /* Returns true if REGNO is a valid register
17441 for holding a quantity of type MODE. */
17443 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17445 if (GET_MODE_CLASS (mode) == MODE_CC)
17446 return (regno == CC_REGNUM
17447 || (TARGET_HARD_FLOAT && TARGET_VFP
17448 && regno == VFPCC_REGNUM));
17450 if (TARGET_THUMB1)
17451 /* For the Thumb we only allow values bigger than SImode in
17452 registers 0 - 6, so that there is always a second low
17453 register available to hold the upper part of the value.
17454 We probably we ought to ensure that the register is the
17455 start of an even numbered register pair. */
17456 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17458 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17459 && IS_CIRRUS_REGNUM (regno))
17460 /* We have outlawed SI values in Cirrus registers because they
17461 reside in the lower 32 bits, but SF values reside in the
17462 upper 32 bits. This causes gcc all sorts of grief. We can't
17463 even split the registers into pairs because Cirrus SI values
17464 get sign extended to 64bits-- aldyh. */
17465 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17467 if (TARGET_HARD_FLOAT && TARGET_VFP
17468 && IS_VFP_REGNUM (regno))
17470 if (mode == SFmode || mode == SImode)
17471 return VFP_REGNO_OK_FOR_SINGLE (regno);
17473 if (mode == DFmode)
17474 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17476 /* VFP registers can hold HFmode values, but there is no point in
17477 putting them there unless we have hardware conversion insns. */
17478 if (mode == HFmode)
17479 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17481 if (TARGET_NEON)
17482 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17483 || (VALID_NEON_QREG_MODE (mode)
17484 && NEON_REGNO_OK_FOR_QUAD (regno))
17485 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17486 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17487 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17488 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17489 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17491 return FALSE;
17494 if (TARGET_REALLY_IWMMXT)
17496 if (IS_IWMMXT_GR_REGNUM (regno))
17497 return mode == SImode;
17499 if (IS_IWMMXT_REGNUM (regno))
17500 return VALID_IWMMXT_REG_MODE (mode);
17503 /* We allow almost any value to be stored in the general registers.
17504 Restrict doubleword quantities to even register pairs so that we can
17505 use ldrd. Do not allow very large Neon structure opaque modes in
17506 general registers; they would use too many. */
17507 if (regno <= LAST_ARM_REGNUM)
17508 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17509 && ARM_NUM_REGS (mode) <= 4;
17511 if (regno == FRAME_POINTER_REGNUM
17512 || regno == ARG_POINTER_REGNUM)
17513 /* We only allow integers in the fake hard registers. */
17514 return GET_MODE_CLASS (mode) == MODE_INT;
17516 /* The only registers left are the FPA registers
17517 which we only allow to hold FP values. */
17518 return (TARGET_HARD_FLOAT && TARGET_FPA
17519 && GET_MODE_CLASS (mode) == MODE_FLOAT
17520 && regno >= FIRST_FPA_REGNUM
17521 && regno <= LAST_FPA_REGNUM);
17524 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17525 not used in arm mode. */
17527 enum reg_class
17528 arm_regno_class (int regno)
17530 if (TARGET_THUMB1)
17532 if (regno == STACK_POINTER_REGNUM)
17533 return STACK_REG;
17534 if (regno == CC_REGNUM)
17535 return CC_REG;
17536 if (regno < 8)
17537 return LO_REGS;
17538 return HI_REGS;
17541 if (TARGET_THUMB2 && regno < 8)
17542 return LO_REGS;
17544 if ( regno <= LAST_ARM_REGNUM
17545 || regno == FRAME_POINTER_REGNUM
17546 || regno == ARG_POINTER_REGNUM)
17547 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17549 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17550 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17552 if (IS_CIRRUS_REGNUM (regno))
17553 return CIRRUS_REGS;
17555 if (IS_VFP_REGNUM (regno))
17557 if (regno <= D7_VFP_REGNUM)
17558 return VFP_D0_D7_REGS;
17559 else if (regno <= LAST_LO_VFP_REGNUM)
17560 return VFP_LO_REGS;
17561 else
17562 return VFP_HI_REGS;
17565 if (IS_IWMMXT_REGNUM (regno))
17566 return IWMMXT_REGS;
17568 if (IS_IWMMXT_GR_REGNUM (regno))
17569 return IWMMXT_GR_REGS;
17571 return FPA_REGS;
17574 /* Handle a special case when computing the offset
17575 of an argument from the frame pointer. */
17577 arm_debugger_arg_offset (int value, rtx addr)
17579 rtx insn;
17581 /* We are only interested if dbxout_parms() failed to compute the offset. */
17582 if (value != 0)
17583 return 0;
17585 /* We can only cope with the case where the address is held in a register. */
17586 if (GET_CODE (addr) != REG)
17587 return 0;
17589 /* If we are using the frame pointer to point at the argument, then
17590 an offset of 0 is correct. */
17591 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17592 return 0;
17594 /* If we are using the stack pointer to point at the
17595 argument, then an offset of 0 is correct. */
17596 /* ??? Check this is consistent with thumb2 frame layout. */
17597 if ((TARGET_THUMB || !frame_pointer_needed)
17598 && REGNO (addr) == SP_REGNUM)
17599 return 0;
17601 /* Oh dear. The argument is pointed to by a register rather
17602 than being held in a register, or being stored at a known
17603 offset from the frame pointer. Since GDB only understands
17604 those two kinds of argument we must translate the address
17605 held in the register into an offset from the frame pointer.
17606 We do this by searching through the insns for the function
17607 looking to see where this register gets its value. If the
17608 register is initialized from the frame pointer plus an offset
17609 then we are in luck and we can continue, otherwise we give up.
17611 This code is exercised by producing debugging information
17612 for a function with arguments like this:
17614 double func (double a, double b, int c, double d) {return d;}
17616 Without this code the stab for parameter 'd' will be set to
17617 an offset of 0 from the frame pointer, rather than 8. */
17619 /* The if() statement says:
17621 If the insn is a normal instruction
17622 and if the insn is setting the value in a register
17623 and if the register being set is the register holding the address of the argument
17624 and if the address is computing by an addition
17625 that involves adding to a register
17626 which is the frame pointer
17627 a constant integer
17629 then... */
17631 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17633 if ( GET_CODE (insn) == INSN
17634 && GET_CODE (PATTERN (insn)) == SET
17635 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17636 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17637 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17638 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17639 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17642 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17644 break;
17648 if (value == 0)
17650 debug_rtx (addr);
17651 warning (0, "unable to compute real location of stacked parameter");
17652 value = 8; /* XXX magic hack */
17655 return value;
17658 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17659 do \
17661 if ((MASK) & insn_flags) \
17662 add_builtin_function ((NAME), (TYPE), (CODE), \
17663 BUILT_IN_MD, NULL, NULL_TREE); \
17665 while (0)
17667 struct builtin_description
17669 const unsigned int mask;
17670 const enum insn_code icode;
17671 const char * const name;
17672 const enum arm_builtins code;
17673 const enum rtx_code comparison;
17674 const unsigned int flag;
17677 static const struct builtin_description bdesc_2arg[] =
17679 #define IWMMXT_BUILTIN(code, string, builtin) \
17680 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17681 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17683 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17684 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17685 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17686 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17687 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17688 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17689 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17690 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17691 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17692 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17693 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17694 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17695 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17696 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17697 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17698 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17699 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17700 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17701 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17702 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17703 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17704 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17705 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17706 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17707 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17708 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17709 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17710 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17711 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17712 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17713 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17714 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17715 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17716 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17717 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17718 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17719 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17720 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17721 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17722 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17723 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17724 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17725 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17726 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17727 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17728 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17729 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17730 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17731 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17732 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17733 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17734 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17735 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17736 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17737 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17738 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17739 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17740 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17742 #define IWMMXT_BUILTIN2(code, builtin) \
17743 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17745 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17746 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17747 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17748 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17749 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17750 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17751 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17752 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17753 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17754 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17755 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17756 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17757 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17758 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17759 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17760 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17761 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17762 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17763 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17764 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17765 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17766 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17767 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17768 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17769 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17770 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17771 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17772 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17773 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17774 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17775 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17776 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17779 static const struct builtin_description bdesc_1arg[] =
17781 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17782 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17783 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17784 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17785 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17786 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17787 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17788 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17789 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17790 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17791 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17792 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17793 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17794 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17795 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17796 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17797 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17798 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17801 /* Set up all the iWMMXt builtins. This is
17802 not called if TARGET_IWMMXT is zero. */
17804 static void
17805 arm_init_iwmmxt_builtins (void)
17807 const struct builtin_description * d;
17808 size_t i;
17809 tree endlink = void_list_node;
17811 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17812 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17813 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17815 tree int_ftype_int
17816 = build_function_type (integer_type_node,
17817 tree_cons (NULL_TREE, integer_type_node, endlink));
17818 tree v8qi_ftype_v8qi_v8qi_int
17819 = build_function_type (V8QI_type_node,
17820 tree_cons (NULL_TREE, V8QI_type_node,
17821 tree_cons (NULL_TREE, V8QI_type_node,
17822 tree_cons (NULL_TREE,
17823 integer_type_node,
17824 endlink))));
17825 tree v4hi_ftype_v4hi_int
17826 = build_function_type (V4HI_type_node,
17827 tree_cons (NULL_TREE, V4HI_type_node,
17828 tree_cons (NULL_TREE, integer_type_node,
17829 endlink)));
17830 tree v2si_ftype_v2si_int
17831 = build_function_type (V2SI_type_node,
17832 tree_cons (NULL_TREE, V2SI_type_node,
17833 tree_cons (NULL_TREE, integer_type_node,
17834 endlink)));
17835 tree v2si_ftype_di_di
17836 = build_function_type (V2SI_type_node,
17837 tree_cons (NULL_TREE, long_long_integer_type_node,
17838 tree_cons (NULL_TREE, long_long_integer_type_node,
17839 endlink)));
17840 tree di_ftype_di_int
17841 = build_function_type (long_long_integer_type_node,
17842 tree_cons (NULL_TREE, long_long_integer_type_node,
17843 tree_cons (NULL_TREE, integer_type_node,
17844 endlink)));
17845 tree di_ftype_di_int_int
17846 = build_function_type (long_long_integer_type_node,
17847 tree_cons (NULL_TREE, long_long_integer_type_node,
17848 tree_cons (NULL_TREE, integer_type_node,
17849 tree_cons (NULL_TREE,
17850 integer_type_node,
17851 endlink))));
17852 tree int_ftype_v8qi
17853 = build_function_type (integer_type_node,
17854 tree_cons (NULL_TREE, V8QI_type_node,
17855 endlink));
17856 tree int_ftype_v4hi
17857 = build_function_type (integer_type_node,
17858 tree_cons (NULL_TREE, V4HI_type_node,
17859 endlink));
17860 tree int_ftype_v2si
17861 = build_function_type (integer_type_node,
17862 tree_cons (NULL_TREE, V2SI_type_node,
17863 endlink));
17864 tree int_ftype_v8qi_int
17865 = build_function_type (integer_type_node,
17866 tree_cons (NULL_TREE, V8QI_type_node,
17867 tree_cons (NULL_TREE, integer_type_node,
17868 endlink)));
17869 tree int_ftype_v4hi_int
17870 = build_function_type (integer_type_node,
17871 tree_cons (NULL_TREE, V4HI_type_node,
17872 tree_cons (NULL_TREE, integer_type_node,
17873 endlink)));
17874 tree int_ftype_v2si_int
17875 = build_function_type (integer_type_node,
17876 tree_cons (NULL_TREE, V2SI_type_node,
17877 tree_cons (NULL_TREE, integer_type_node,
17878 endlink)));
17879 tree v8qi_ftype_v8qi_int_int
17880 = build_function_type (V8QI_type_node,
17881 tree_cons (NULL_TREE, V8QI_type_node,
17882 tree_cons (NULL_TREE, integer_type_node,
17883 tree_cons (NULL_TREE,
17884 integer_type_node,
17885 endlink))));
17886 tree v4hi_ftype_v4hi_int_int
17887 = build_function_type (V4HI_type_node,
17888 tree_cons (NULL_TREE, V4HI_type_node,
17889 tree_cons (NULL_TREE, integer_type_node,
17890 tree_cons (NULL_TREE,
17891 integer_type_node,
17892 endlink))));
17893 tree v2si_ftype_v2si_int_int
17894 = build_function_type (V2SI_type_node,
17895 tree_cons (NULL_TREE, V2SI_type_node,
17896 tree_cons (NULL_TREE, integer_type_node,
17897 tree_cons (NULL_TREE,
17898 integer_type_node,
17899 endlink))));
17900 /* Miscellaneous. */
17901 tree v8qi_ftype_v4hi_v4hi
17902 = build_function_type (V8QI_type_node,
17903 tree_cons (NULL_TREE, V4HI_type_node,
17904 tree_cons (NULL_TREE, V4HI_type_node,
17905 endlink)));
17906 tree v4hi_ftype_v2si_v2si
17907 = build_function_type (V4HI_type_node,
17908 tree_cons (NULL_TREE, V2SI_type_node,
17909 tree_cons (NULL_TREE, V2SI_type_node,
17910 endlink)));
17911 tree v2si_ftype_v4hi_v4hi
17912 = build_function_type (V2SI_type_node,
17913 tree_cons (NULL_TREE, V4HI_type_node,
17914 tree_cons (NULL_TREE, V4HI_type_node,
17915 endlink)));
17916 tree v2si_ftype_v8qi_v8qi
17917 = build_function_type (V2SI_type_node,
17918 tree_cons (NULL_TREE, V8QI_type_node,
17919 tree_cons (NULL_TREE, V8QI_type_node,
17920 endlink)));
17921 tree v4hi_ftype_v4hi_di
17922 = build_function_type (V4HI_type_node,
17923 tree_cons (NULL_TREE, V4HI_type_node,
17924 tree_cons (NULL_TREE,
17925 long_long_integer_type_node,
17926 endlink)));
17927 tree v2si_ftype_v2si_di
17928 = build_function_type (V2SI_type_node,
17929 tree_cons (NULL_TREE, V2SI_type_node,
17930 tree_cons (NULL_TREE,
17931 long_long_integer_type_node,
17932 endlink)));
17933 tree void_ftype_int_int
17934 = build_function_type (void_type_node,
17935 tree_cons (NULL_TREE, integer_type_node,
17936 tree_cons (NULL_TREE, integer_type_node,
17937 endlink)));
17938 tree di_ftype_void
17939 = build_function_type (long_long_unsigned_type_node, endlink);
17940 tree di_ftype_v8qi
17941 = build_function_type (long_long_integer_type_node,
17942 tree_cons (NULL_TREE, V8QI_type_node,
17943 endlink));
17944 tree di_ftype_v4hi
17945 = build_function_type (long_long_integer_type_node,
17946 tree_cons (NULL_TREE, V4HI_type_node,
17947 endlink));
17948 tree di_ftype_v2si
17949 = build_function_type (long_long_integer_type_node,
17950 tree_cons (NULL_TREE, V2SI_type_node,
17951 endlink));
17952 tree v2si_ftype_v4hi
17953 = build_function_type (V2SI_type_node,
17954 tree_cons (NULL_TREE, V4HI_type_node,
17955 endlink));
17956 tree v4hi_ftype_v8qi
17957 = build_function_type (V4HI_type_node,
17958 tree_cons (NULL_TREE, V8QI_type_node,
17959 endlink));
17961 tree di_ftype_di_v4hi_v4hi
17962 = build_function_type (long_long_unsigned_type_node,
17963 tree_cons (NULL_TREE,
17964 long_long_unsigned_type_node,
17965 tree_cons (NULL_TREE, V4HI_type_node,
17966 tree_cons (NULL_TREE,
17967 V4HI_type_node,
17968 endlink))));
17970 tree di_ftype_v4hi_v4hi
17971 = build_function_type (long_long_unsigned_type_node,
17972 tree_cons (NULL_TREE, V4HI_type_node,
17973 tree_cons (NULL_TREE, V4HI_type_node,
17974 endlink)));
17976 /* Normal vector binops. */
17977 tree v8qi_ftype_v8qi_v8qi
17978 = build_function_type (V8QI_type_node,
17979 tree_cons (NULL_TREE, V8QI_type_node,
17980 tree_cons (NULL_TREE, V8QI_type_node,
17981 endlink)));
17982 tree v4hi_ftype_v4hi_v4hi
17983 = build_function_type (V4HI_type_node,
17984 tree_cons (NULL_TREE, V4HI_type_node,
17985 tree_cons (NULL_TREE, V4HI_type_node,
17986 endlink)));
17987 tree v2si_ftype_v2si_v2si
17988 = build_function_type (V2SI_type_node,
17989 tree_cons (NULL_TREE, V2SI_type_node,
17990 tree_cons (NULL_TREE, V2SI_type_node,
17991 endlink)));
17992 tree di_ftype_di_di
17993 = build_function_type (long_long_unsigned_type_node,
17994 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17995 tree_cons (NULL_TREE,
17996 long_long_unsigned_type_node,
17997 endlink)));
17999 /* Add all builtins that are more or less simple operations on two
18000 operands. */
18001 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18003 /* Use one of the operands; the target can have a different mode for
18004 mask-generating compares. */
18005 enum machine_mode mode;
18006 tree type;
18008 if (d->name == 0)
18009 continue;
18011 mode = insn_data[d->icode].operand[1].mode;
18013 switch (mode)
18015 case V8QImode:
18016 type = v8qi_ftype_v8qi_v8qi;
18017 break;
18018 case V4HImode:
18019 type = v4hi_ftype_v4hi_v4hi;
18020 break;
18021 case V2SImode:
18022 type = v2si_ftype_v2si_v2si;
18023 break;
18024 case DImode:
18025 type = di_ftype_di_di;
18026 break;
18028 default:
18029 gcc_unreachable ();
18032 def_mbuiltin (d->mask, d->name, type, d->code);
18035 /* Add the remaining MMX insns with somewhat more complicated types. */
18036 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18040 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18047 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18048 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18049 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18050 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18051 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18052 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18054 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18055 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18056 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18057 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18058 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18059 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18061 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18062 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18063 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18064 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18065 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18066 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18068 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18070 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18071 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18072 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18073 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18075 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18076 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18077 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18078 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18079 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18080 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18081 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18082 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18083 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18085 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18086 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18087 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18089 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18090 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18091 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18093 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18094 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18095 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18096 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18097 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18098 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18100 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18101 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18102 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18103 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18104 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18105 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18106 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18107 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18108 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18109 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18110 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18111 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18113 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18114 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18115 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18116 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18118 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18119 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18120 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18121 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18122 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18123 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18124 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18127 static void
18128 arm_init_tls_builtins (void)
18130 tree ftype, decl;
18132 ftype = build_function_type (ptr_type_node, void_list_node);
18133 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18134 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18135 NULL, NULL_TREE);
18136 TREE_NOTHROW (decl) = 1;
18137 TREE_READONLY (decl) = 1;
18140 enum neon_builtin_type_bits {
18141 T_V8QI = 0x0001,
18142 T_V4HI = 0x0002,
18143 T_V2SI = 0x0004,
18144 T_V2SF = 0x0008,
18145 T_DI = 0x0010,
18146 T_V16QI = 0x0020,
18147 T_V8HI = 0x0040,
18148 T_V4SI = 0x0080,
18149 T_V4SF = 0x0100,
18150 T_V2DI = 0x0200,
18151 T_TI = 0x0400,
18152 T_EI = 0x0800,
18153 T_OI = 0x1000
18156 #define v8qi_UP T_V8QI
18157 #define v4hi_UP T_V4HI
18158 #define v2si_UP T_V2SI
18159 #define v2sf_UP T_V2SF
18160 #define di_UP T_DI
18161 #define v16qi_UP T_V16QI
18162 #define v8hi_UP T_V8HI
18163 #define v4si_UP T_V4SI
18164 #define v4sf_UP T_V4SF
18165 #define v2di_UP T_V2DI
18166 #define ti_UP T_TI
18167 #define ei_UP T_EI
18168 #define oi_UP T_OI
18170 #define UP(X) X##_UP
18172 #define T_MAX 13
18174 typedef enum {
18175 NEON_BINOP,
18176 NEON_TERNOP,
18177 NEON_UNOP,
18178 NEON_GETLANE,
18179 NEON_SETLANE,
18180 NEON_CREATE,
18181 NEON_DUP,
18182 NEON_DUPLANE,
18183 NEON_COMBINE,
18184 NEON_SPLIT,
18185 NEON_LANEMUL,
18186 NEON_LANEMULL,
18187 NEON_LANEMULH,
18188 NEON_LANEMAC,
18189 NEON_SCALARMUL,
18190 NEON_SCALARMULL,
18191 NEON_SCALARMULH,
18192 NEON_SCALARMAC,
18193 NEON_CONVERT,
18194 NEON_FIXCONV,
18195 NEON_SELECT,
18196 NEON_RESULTPAIR,
18197 NEON_REINTERP,
18198 NEON_VTBL,
18199 NEON_VTBX,
18200 NEON_LOAD1,
18201 NEON_LOAD1LANE,
18202 NEON_STORE1,
18203 NEON_STORE1LANE,
18204 NEON_LOADSTRUCT,
18205 NEON_LOADSTRUCTLANE,
18206 NEON_STORESTRUCT,
18207 NEON_STORESTRUCTLANE,
18208 NEON_LOGICBINOP,
18209 NEON_SHIFTINSERT,
18210 NEON_SHIFTIMM,
18211 NEON_SHIFTACC
18212 } neon_itype;
18214 typedef struct {
18215 const char *name;
18216 const neon_itype itype;
18217 const int bits;
18218 const enum insn_code codes[T_MAX];
18219 const unsigned int num_vars;
18220 unsigned int base_fcode;
18221 } neon_builtin_datum;
18223 #define CF(N,X) CODE_FOR_neon_##N##X
18225 #define VAR1(T, N, A) \
18226 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18227 #define VAR2(T, N, A, B) \
18228 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18229 #define VAR3(T, N, A, B, C) \
18230 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18231 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18232 #define VAR4(T, N, A, B, C, D) \
18233 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18234 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18235 #define VAR5(T, N, A, B, C, D, E) \
18236 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18237 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18238 #define VAR6(T, N, A, B, C, D, E, F) \
18239 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18240 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18241 #define VAR7(T, N, A, B, C, D, E, F, G) \
18242 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18243 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18244 CF (N, G) }, 7, 0
18245 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18246 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18247 | UP (H), \
18248 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18249 CF (N, G), CF (N, H) }, 8, 0
18250 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18251 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18252 | UP (H) | UP (I), \
18253 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18254 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18255 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18256 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18257 | UP (H) | UP (I) | UP (J), \
18258 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18259 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18261 /* The mode entries in the following table correspond to the "key" type of the
18262 instruction variant, i.e. equivalent to that which would be specified after
18263 the assembler mnemonic, which usually refers to the last vector operand.
18264 (Signed/unsigned/polynomial types are not differentiated between though, and
18265 are all mapped onto the same mode for a given element size.) The modes
18266 listed per instruction should be the same as those defined for that
18267 instruction's pattern in neon.md.
18268 WARNING: Variants should be listed in the same increasing order as
18269 neon_builtin_type_bits. */
18271 static neon_builtin_datum neon_builtin_data[] =
18273 { VAR10 (BINOP, vadd,
18274 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18275 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18276 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18277 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18278 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18279 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18280 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18281 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18282 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18283 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18284 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18285 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18286 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18287 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18288 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18289 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18290 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18291 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18292 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18293 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18294 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18295 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18296 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18297 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18298 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18299 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18300 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18301 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18302 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18303 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18304 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18305 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18306 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18307 { VAR10 (BINOP, vsub,
18308 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18309 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18310 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18311 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18312 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18313 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18314 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18315 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18316 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18317 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18318 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18319 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18320 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18321 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18322 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18323 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18324 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18325 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18326 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18327 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18328 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18329 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18330 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18331 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18332 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18333 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18334 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18335 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18336 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18337 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18338 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18339 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18340 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18341 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18342 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18343 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18344 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18345 /* FIXME: vget_lane supports more variants than this! */
18346 { VAR10 (GETLANE, vget_lane,
18347 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18348 { VAR10 (SETLANE, vset_lane,
18349 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18350 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18351 { VAR10 (DUP, vdup_n,
18352 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18353 { VAR10 (DUPLANE, vdup_lane,
18354 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18355 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18356 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18357 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18358 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18359 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18360 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18361 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18362 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18363 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18364 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18365 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18366 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18367 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18368 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18369 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18370 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18371 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18372 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18373 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18374 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18375 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18376 { VAR10 (BINOP, vext,
18377 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18378 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18379 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18380 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18381 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18382 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18383 { VAR10 (SELECT, vbsl,
18384 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18385 { VAR1 (VTBL, vtbl1, v8qi) },
18386 { VAR1 (VTBL, vtbl2, v8qi) },
18387 { VAR1 (VTBL, vtbl3, v8qi) },
18388 { VAR1 (VTBL, vtbl4, v8qi) },
18389 { VAR1 (VTBX, vtbx1, v8qi) },
18390 { VAR1 (VTBX, vtbx2, v8qi) },
18391 { VAR1 (VTBX, vtbx3, v8qi) },
18392 { VAR1 (VTBX, vtbx4, v8qi) },
18393 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18394 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18395 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18396 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18397 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18398 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18399 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18400 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18401 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18402 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18403 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18404 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18405 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18406 { VAR10 (LOAD1, vld1,
18407 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18408 { VAR10 (LOAD1LANE, vld1_lane,
18409 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18410 { VAR10 (LOAD1, vld1_dup,
18411 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18412 { VAR10 (STORE1, vst1,
18413 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18414 { VAR10 (STORE1LANE, vst1_lane,
18415 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18416 { VAR9 (LOADSTRUCT,
18417 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18418 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18419 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18420 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18421 { VAR9 (STORESTRUCT, vst2,
18422 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18423 { VAR7 (STORESTRUCTLANE, vst2_lane,
18424 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18425 { VAR9 (LOADSTRUCT,
18426 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18427 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18428 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18429 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18430 { VAR9 (STORESTRUCT, vst3,
18431 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18432 { VAR7 (STORESTRUCTLANE, vst3_lane,
18433 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18434 { VAR9 (LOADSTRUCT, vld4,
18435 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18436 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18437 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18438 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18439 { VAR9 (STORESTRUCT, vst4,
18440 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18441 { VAR7 (STORESTRUCTLANE, vst4_lane,
18442 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18443 { VAR10 (LOGICBINOP, vand,
18444 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18445 { VAR10 (LOGICBINOP, vorr,
18446 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18447 { VAR10 (BINOP, veor,
18448 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18449 { VAR10 (LOGICBINOP, vbic,
18450 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18451 { VAR10 (LOGICBINOP, vorn,
18452 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18455 #undef CF
18456 #undef VAR1
18457 #undef VAR2
18458 #undef VAR3
18459 #undef VAR4
18460 #undef VAR5
18461 #undef VAR6
18462 #undef VAR7
18463 #undef VAR8
18464 #undef VAR9
18465 #undef VAR10
18467 static void
18468 arm_init_neon_builtins (void)
18470 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18472 tree neon_intQI_type_node;
18473 tree neon_intHI_type_node;
18474 tree neon_polyQI_type_node;
18475 tree neon_polyHI_type_node;
18476 tree neon_intSI_type_node;
18477 tree neon_intDI_type_node;
18478 tree neon_float_type_node;
18480 tree intQI_pointer_node;
18481 tree intHI_pointer_node;
18482 tree intSI_pointer_node;
18483 tree intDI_pointer_node;
18484 tree float_pointer_node;
18486 tree const_intQI_node;
18487 tree const_intHI_node;
18488 tree const_intSI_node;
18489 tree const_intDI_node;
18490 tree const_float_node;
18492 tree const_intQI_pointer_node;
18493 tree const_intHI_pointer_node;
18494 tree const_intSI_pointer_node;
18495 tree const_intDI_pointer_node;
18496 tree const_float_pointer_node;
18498 tree V8QI_type_node;
18499 tree V4HI_type_node;
18500 tree V2SI_type_node;
18501 tree V2SF_type_node;
18502 tree V16QI_type_node;
18503 tree V8HI_type_node;
18504 tree V4SI_type_node;
18505 tree V4SF_type_node;
18506 tree V2DI_type_node;
18508 tree intUQI_type_node;
18509 tree intUHI_type_node;
18510 tree intUSI_type_node;
18511 tree intUDI_type_node;
18513 tree intEI_type_node;
18514 tree intOI_type_node;
18515 tree intCI_type_node;
18516 tree intXI_type_node;
18518 tree V8QI_pointer_node;
18519 tree V4HI_pointer_node;
18520 tree V2SI_pointer_node;
18521 tree V2SF_pointer_node;
18522 tree V16QI_pointer_node;
18523 tree V8HI_pointer_node;
18524 tree V4SI_pointer_node;
18525 tree V4SF_pointer_node;
18526 tree V2DI_pointer_node;
18528 tree void_ftype_pv8qi_v8qi_v8qi;
18529 tree void_ftype_pv4hi_v4hi_v4hi;
18530 tree void_ftype_pv2si_v2si_v2si;
18531 tree void_ftype_pv2sf_v2sf_v2sf;
18532 tree void_ftype_pdi_di_di;
18533 tree void_ftype_pv16qi_v16qi_v16qi;
18534 tree void_ftype_pv8hi_v8hi_v8hi;
18535 tree void_ftype_pv4si_v4si_v4si;
18536 tree void_ftype_pv4sf_v4sf_v4sf;
18537 tree void_ftype_pv2di_v2di_v2di;
18539 tree reinterp_ftype_dreg[5][5];
18540 tree reinterp_ftype_qreg[5][5];
18541 tree dreg_types[5], qreg_types[5];
18543 /* Create distinguished type nodes for NEON vector element types,
18544 and pointers to values of such types, so we can detect them later. */
18545 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18546 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18547 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18548 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18549 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18550 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18551 neon_float_type_node = make_node (REAL_TYPE);
18552 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18553 layout_type (neon_float_type_node);
18555 /* Define typedefs which exactly correspond to the modes we are basing vector
18556 types on. If you change these names you'll need to change
18557 the table used by arm_mangle_type too. */
18558 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18559 "__builtin_neon_qi");
18560 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18561 "__builtin_neon_hi");
18562 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18563 "__builtin_neon_si");
18564 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18565 "__builtin_neon_sf");
18566 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18567 "__builtin_neon_di");
18568 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18569 "__builtin_neon_poly8");
18570 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18571 "__builtin_neon_poly16");
18573 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18574 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18575 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18576 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18577 float_pointer_node = build_pointer_type (neon_float_type_node);
18579 /* Next create constant-qualified versions of the above types. */
18580 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18581 TYPE_QUAL_CONST);
18582 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18583 TYPE_QUAL_CONST);
18584 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18585 TYPE_QUAL_CONST);
18586 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18587 TYPE_QUAL_CONST);
18588 const_float_node = build_qualified_type (neon_float_type_node,
18589 TYPE_QUAL_CONST);
18591 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18592 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18593 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18594 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18595 const_float_pointer_node = build_pointer_type (const_float_node);
18597 /* Now create vector types based on our NEON element types. */
18598 /* 64-bit vectors. */
18599 V8QI_type_node =
18600 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18601 V4HI_type_node =
18602 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18603 V2SI_type_node =
18604 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18605 V2SF_type_node =
18606 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18607 /* 128-bit vectors. */
18608 V16QI_type_node =
18609 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18610 V8HI_type_node =
18611 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18612 V4SI_type_node =
18613 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18614 V4SF_type_node =
18615 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18616 V2DI_type_node =
18617 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18619 /* Unsigned integer types for various mode sizes. */
18620 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18621 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18622 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18623 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18625 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18626 "__builtin_neon_uqi");
18627 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18628 "__builtin_neon_uhi");
18629 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18630 "__builtin_neon_usi");
18631 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18632 "__builtin_neon_udi");
18634 /* Opaque integer types for structures of vectors. */
18635 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18636 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18637 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18638 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18640 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18641 "__builtin_neon_ti");
18642 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18643 "__builtin_neon_ei");
18644 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18645 "__builtin_neon_oi");
18646 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18647 "__builtin_neon_ci");
18648 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18649 "__builtin_neon_xi");
18651 /* Pointers to vector types. */
18652 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18653 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18654 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18655 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18656 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18657 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18658 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18659 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18660 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18662 /* Operations which return results as pairs. */
18663 void_ftype_pv8qi_v8qi_v8qi =
18664 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18665 V8QI_type_node, NULL);
18666 void_ftype_pv4hi_v4hi_v4hi =
18667 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18668 V4HI_type_node, NULL);
18669 void_ftype_pv2si_v2si_v2si =
18670 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18671 V2SI_type_node, NULL);
18672 void_ftype_pv2sf_v2sf_v2sf =
18673 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18674 V2SF_type_node, NULL);
18675 void_ftype_pdi_di_di =
18676 build_function_type_list (void_type_node, intDI_pointer_node,
18677 neon_intDI_type_node, neon_intDI_type_node, NULL);
18678 void_ftype_pv16qi_v16qi_v16qi =
18679 build_function_type_list (void_type_node, V16QI_pointer_node,
18680 V16QI_type_node, V16QI_type_node, NULL);
18681 void_ftype_pv8hi_v8hi_v8hi =
18682 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18683 V8HI_type_node, NULL);
18684 void_ftype_pv4si_v4si_v4si =
18685 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18686 V4SI_type_node, NULL);
18687 void_ftype_pv4sf_v4sf_v4sf =
18688 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18689 V4SF_type_node, NULL);
18690 void_ftype_pv2di_v2di_v2di =
18691 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18692 V2DI_type_node, NULL);
18694 dreg_types[0] = V8QI_type_node;
18695 dreg_types[1] = V4HI_type_node;
18696 dreg_types[2] = V2SI_type_node;
18697 dreg_types[3] = V2SF_type_node;
18698 dreg_types[4] = neon_intDI_type_node;
18700 qreg_types[0] = V16QI_type_node;
18701 qreg_types[1] = V8HI_type_node;
18702 qreg_types[2] = V4SI_type_node;
18703 qreg_types[3] = V4SF_type_node;
18704 qreg_types[4] = V2DI_type_node;
18706 for (i = 0; i < 5; i++)
18708 int j;
18709 for (j = 0; j < 5; j++)
18711 reinterp_ftype_dreg[i][j]
18712 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18713 reinterp_ftype_qreg[i][j]
18714 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18718 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18720 neon_builtin_datum *d = &neon_builtin_data[i];
18721 unsigned int j, codeidx = 0;
18723 d->base_fcode = fcode;
18725 for (j = 0; j < T_MAX; j++)
18727 const char* const modenames[] = {
18728 "v8qi", "v4hi", "v2si", "v2sf", "di",
18729 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18731 char namebuf[60];
18732 tree ftype = NULL;
18733 enum insn_code icode;
18734 int is_load = 0, is_store = 0;
18736 if ((d->bits & (1 << j)) == 0)
18737 continue;
18739 icode = d->codes[codeidx++];
18741 switch (d->itype)
18743 case NEON_LOAD1:
18744 case NEON_LOAD1LANE:
18745 case NEON_LOADSTRUCT:
18746 case NEON_LOADSTRUCTLANE:
18747 is_load = 1;
18748 /* Fall through. */
18749 case NEON_STORE1:
18750 case NEON_STORE1LANE:
18751 case NEON_STORESTRUCT:
18752 case NEON_STORESTRUCTLANE:
18753 if (!is_load)
18754 is_store = 1;
18755 /* Fall through. */
18756 case NEON_UNOP:
18757 case NEON_BINOP:
18758 case NEON_LOGICBINOP:
18759 case NEON_SHIFTINSERT:
18760 case NEON_TERNOP:
18761 case NEON_GETLANE:
18762 case NEON_SETLANE:
18763 case NEON_CREATE:
18764 case NEON_DUP:
18765 case NEON_DUPLANE:
18766 case NEON_SHIFTIMM:
18767 case NEON_SHIFTACC:
18768 case NEON_COMBINE:
18769 case NEON_SPLIT:
18770 case NEON_CONVERT:
18771 case NEON_FIXCONV:
18772 case NEON_LANEMUL:
18773 case NEON_LANEMULL:
18774 case NEON_LANEMULH:
18775 case NEON_LANEMAC:
18776 case NEON_SCALARMUL:
18777 case NEON_SCALARMULL:
18778 case NEON_SCALARMULH:
18779 case NEON_SCALARMAC:
18780 case NEON_SELECT:
18781 case NEON_VTBL:
18782 case NEON_VTBX:
18784 int k;
18785 tree return_type = void_type_node, args = void_list_node;
18787 /* Build a function type directly from the insn_data for this
18788 builtin. The build_function_type() function takes care of
18789 removing duplicates for us. */
18790 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18792 tree eltype;
18794 if (is_load && k == 1)
18796 /* Neon load patterns always have the memory operand
18797 (a SImode pointer) in the operand 1 position. We
18798 want a const pointer to the element type in that
18799 position. */
18800 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18802 switch (1 << j)
18804 case T_V8QI:
18805 case T_V16QI:
18806 eltype = const_intQI_pointer_node;
18807 break;
18809 case T_V4HI:
18810 case T_V8HI:
18811 eltype = const_intHI_pointer_node;
18812 break;
18814 case T_V2SI:
18815 case T_V4SI:
18816 eltype = const_intSI_pointer_node;
18817 break;
18819 case T_V2SF:
18820 case T_V4SF:
18821 eltype = const_float_pointer_node;
18822 break;
18824 case T_DI:
18825 case T_V2DI:
18826 eltype = const_intDI_pointer_node;
18827 break;
18829 default: gcc_unreachable ();
18832 else if (is_store && k == 0)
18834 /* Similarly, Neon store patterns use operand 0 as
18835 the memory location to store to (a SImode pointer).
18836 Use a pointer to the element type of the store in
18837 that position. */
18838 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18840 switch (1 << j)
18842 case T_V8QI:
18843 case T_V16QI:
18844 eltype = intQI_pointer_node;
18845 break;
18847 case T_V4HI:
18848 case T_V8HI:
18849 eltype = intHI_pointer_node;
18850 break;
18852 case T_V2SI:
18853 case T_V4SI:
18854 eltype = intSI_pointer_node;
18855 break;
18857 case T_V2SF:
18858 case T_V4SF:
18859 eltype = float_pointer_node;
18860 break;
18862 case T_DI:
18863 case T_V2DI:
18864 eltype = intDI_pointer_node;
18865 break;
18867 default: gcc_unreachable ();
18870 else
18872 switch (insn_data[icode].operand[k].mode)
18874 case VOIDmode: eltype = void_type_node; break;
18875 /* Scalars. */
18876 case QImode: eltype = neon_intQI_type_node; break;
18877 case HImode: eltype = neon_intHI_type_node; break;
18878 case SImode: eltype = neon_intSI_type_node; break;
18879 case SFmode: eltype = neon_float_type_node; break;
18880 case DImode: eltype = neon_intDI_type_node; break;
18881 case TImode: eltype = intTI_type_node; break;
18882 case EImode: eltype = intEI_type_node; break;
18883 case OImode: eltype = intOI_type_node; break;
18884 case CImode: eltype = intCI_type_node; break;
18885 case XImode: eltype = intXI_type_node; break;
18886 /* 64-bit vectors. */
18887 case V8QImode: eltype = V8QI_type_node; break;
18888 case V4HImode: eltype = V4HI_type_node; break;
18889 case V2SImode: eltype = V2SI_type_node; break;
18890 case V2SFmode: eltype = V2SF_type_node; break;
18891 /* 128-bit vectors. */
18892 case V16QImode: eltype = V16QI_type_node; break;
18893 case V8HImode: eltype = V8HI_type_node; break;
18894 case V4SImode: eltype = V4SI_type_node; break;
18895 case V4SFmode: eltype = V4SF_type_node; break;
18896 case V2DImode: eltype = V2DI_type_node; break;
18897 default: gcc_unreachable ();
18901 if (k == 0 && !is_store)
18902 return_type = eltype;
18903 else
18904 args = tree_cons (NULL_TREE, eltype, args);
18907 ftype = build_function_type (return_type, args);
18909 break;
18911 case NEON_RESULTPAIR:
18913 switch (insn_data[icode].operand[1].mode)
18915 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18916 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18917 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18918 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18919 case DImode: ftype = void_ftype_pdi_di_di; break;
18920 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18921 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18922 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18923 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18924 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18925 default: gcc_unreachable ();
18928 break;
18930 case NEON_REINTERP:
18932 /* We iterate over 5 doubleword types, then 5 quadword
18933 types. */
18934 int rhs = j % 5;
18935 switch (insn_data[icode].operand[0].mode)
18937 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18938 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18939 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18940 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18941 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18942 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18943 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18944 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18945 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18946 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18947 default: gcc_unreachable ();
18950 break;
18952 default:
18953 gcc_unreachable ();
18956 gcc_assert (ftype != NULL);
18958 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18960 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18961 NULL_TREE);
18966 static void
18967 arm_init_fp16_builtins (void)
18969 tree fp16_type = make_node (REAL_TYPE);
18970 TYPE_PRECISION (fp16_type) = 16;
18971 layout_type (fp16_type);
18972 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18975 static void
18976 arm_init_builtins (void)
18978 arm_init_tls_builtins ();
18980 if (TARGET_REALLY_IWMMXT)
18981 arm_init_iwmmxt_builtins ();
18983 if (TARGET_NEON)
18984 arm_init_neon_builtins ();
18986 if (arm_fp16_format)
18987 arm_init_fp16_builtins ();
18990 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18992 static const char *
18993 arm_invalid_parameter_type (const_tree t)
18995 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18996 return N_("function parameters cannot have __fp16 type");
18997 return NULL;
19000 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19002 static const char *
19003 arm_invalid_return_type (const_tree t)
19005 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19006 return N_("functions cannot return __fp16 type");
19007 return NULL;
19010 /* Implement TARGET_PROMOTED_TYPE. */
19012 static tree
19013 arm_promoted_type (const_tree t)
19015 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19016 return float_type_node;
19017 return NULL_TREE;
19020 /* Implement TARGET_CONVERT_TO_TYPE.
19021 Specifically, this hook implements the peculiarity of the ARM
19022 half-precision floating-point C semantics that requires conversions between
19023 __fp16 to or from double to do an intermediate conversion to float. */
19025 static tree
19026 arm_convert_to_type (tree type, tree expr)
19028 tree fromtype = TREE_TYPE (expr);
19029 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19030 return NULL_TREE;
19031 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19032 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19033 return convert (type, convert (float_type_node, expr));
19034 return NULL_TREE;
19037 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19038 This simply adds HFmode as a supported mode; even though we don't
19039 implement arithmetic on this type directly, it's supported by
19040 optabs conversions, much the way the double-word arithmetic is
19041 special-cased in the default hook. */
19043 static bool
19044 arm_scalar_mode_supported_p (enum machine_mode mode)
19046 if (mode == HFmode)
19047 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19048 else
19049 return default_scalar_mode_supported_p (mode);
19052 /* Errors in the source file can cause expand_expr to return const0_rtx
19053 where we expect a vector. To avoid crashing, use one of the vector
19054 clear instructions. */
19056 static rtx
19057 safe_vector_operand (rtx x, enum machine_mode mode)
19059 if (x != const0_rtx)
19060 return x;
19061 x = gen_reg_rtx (mode);
19063 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19064 : gen_rtx_SUBREG (DImode, x, 0)));
19065 return x;
19068 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19070 static rtx
19071 arm_expand_binop_builtin (enum insn_code icode,
19072 tree exp, rtx target)
19074 rtx pat;
19075 tree arg0 = CALL_EXPR_ARG (exp, 0);
19076 tree arg1 = CALL_EXPR_ARG (exp, 1);
19077 rtx op0 = expand_normal (arg0);
19078 rtx op1 = expand_normal (arg1);
19079 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19080 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19081 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19083 if (VECTOR_MODE_P (mode0))
19084 op0 = safe_vector_operand (op0, mode0);
19085 if (VECTOR_MODE_P (mode1))
19086 op1 = safe_vector_operand (op1, mode1);
19088 if (! target
19089 || GET_MODE (target) != tmode
19090 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19091 target = gen_reg_rtx (tmode);
19093 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19095 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19096 op0 = copy_to_mode_reg (mode0, op0);
19097 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19098 op1 = copy_to_mode_reg (mode1, op1);
19100 pat = GEN_FCN (icode) (target, op0, op1);
19101 if (! pat)
19102 return 0;
19103 emit_insn (pat);
19104 return target;
19107 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19109 static rtx
19110 arm_expand_unop_builtin (enum insn_code icode,
19111 tree exp, rtx target, int do_load)
19113 rtx pat;
19114 tree arg0 = CALL_EXPR_ARG (exp, 0);
19115 rtx op0 = expand_normal (arg0);
19116 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19117 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19119 if (! target
19120 || GET_MODE (target) != tmode
19121 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19122 target = gen_reg_rtx (tmode);
19123 if (do_load)
19124 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19125 else
19127 if (VECTOR_MODE_P (mode0))
19128 op0 = safe_vector_operand (op0, mode0);
19130 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19131 op0 = copy_to_mode_reg (mode0, op0);
19134 pat = GEN_FCN (icode) (target, op0);
19135 if (! pat)
19136 return 0;
19137 emit_insn (pat);
19138 return target;
19141 static int
19142 neon_builtin_compare (const void *a, const void *b)
19144 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19145 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19146 unsigned int soughtcode = key->base_fcode;
19148 if (soughtcode >= memb->base_fcode
19149 && soughtcode < memb->base_fcode + memb->num_vars)
19150 return 0;
19151 else if (soughtcode < memb->base_fcode)
19152 return -1;
19153 else
19154 return 1;
19157 static enum insn_code
19158 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19160 neon_builtin_datum key
19161 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19162 neon_builtin_datum *found;
19163 int idx;
19165 key.base_fcode = fcode;
19166 found = (neon_builtin_datum *)
19167 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19168 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19169 gcc_assert (found);
19170 idx = fcode - (int) found->base_fcode;
19171 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19173 if (itype)
19174 *itype = found->itype;
19176 return found->codes[idx];
19179 typedef enum {
19180 NEON_ARG_COPY_TO_REG,
19181 NEON_ARG_CONSTANT,
19182 NEON_ARG_STOP
19183 } builtin_arg;
19185 #define NEON_MAX_BUILTIN_ARGS 5
19187 /* Expand a Neon builtin. */
19188 static rtx
19189 arm_expand_neon_args (rtx target, int icode, int have_retval,
19190 tree exp, ...)
19192 va_list ap;
19193 rtx pat;
19194 tree arg[NEON_MAX_BUILTIN_ARGS];
19195 rtx op[NEON_MAX_BUILTIN_ARGS];
19196 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19197 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19198 int argc = 0;
19200 if (have_retval
19201 && (!target
19202 || GET_MODE (target) != tmode
19203 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19204 target = gen_reg_rtx (tmode);
19206 va_start (ap, exp);
19208 for (;;)
19210 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19212 if (thisarg == NEON_ARG_STOP)
19213 break;
19214 else
19216 arg[argc] = CALL_EXPR_ARG (exp, argc);
19217 op[argc] = expand_normal (arg[argc]);
19218 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19220 switch (thisarg)
19222 case NEON_ARG_COPY_TO_REG:
19223 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19224 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19225 (op[argc], mode[argc]))
19226 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19227 break;
19229 case NEON_ARG_CONSTANT:
19230 /* FIXME: This error message is somewhat unhelpful. */
19231 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19232 (op[argc], mode[argc]))
19233 error ("argument must be a constant");
19234 break;
19236 case NEON_ARG_STOP:
19237 gcc_unreachable ();
19240 argc++;
19244 va_end (ap);
19246 if (have_retval)
19247 switch (argc)
19249 case 1:
19250 pat = GEN_FCN (icode) (target, op[0]);
19251 break;
19253 case 2:
19254 pat = GEN_FCN (icode) (target, op[0], op[1]);
19255 break;
19257 case 3:
19258 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19259 break;
19261 case 4:
19262 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19263 break;
19265 case 5:
19266 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19267 break;
19269 default:
19270 gcc_unreachable ();
19272 else
19273 switch (argc)
19275 case 1:
19276 pat = GEN_FCN (icode) (op[0]);
19277 break;
19279 case 2:
19280 pat = GEN_FCN (icode) (op[0], op[1]);
19281 break;
19283 case 3:
19284 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19285 break;
19287 case 4:
19288 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19289 break;
19291 case 5:
19292 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19293 break;
19295 default:
19296 gcc_unreachable ();
19299 if (!pat)
19300 return 0;
19302 emit_insn (pat);
19304 return target;
19307 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19308 constants defined per-instruction or per instruction-variant. Instead, the
19309 required info is looked up in the table neon_builtin_data. */
19310 static rtx
19311 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19313 neon_itype itype;
19314 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19316 switch (itype)
19318 case NEON_UNOP:
19319 case NEON_CONVERT:
19320 case NEON_DUPLANE:
19321 return arm_expand_neon_args (target, icode, 1, exp,
19322 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19324 case NEON_BINOP:
19325 case NEON_SETLANE:
19326 case NEON_SCALARMUL:
19327 case NEON_SCALARMULL:
19328 case NEON_SCALARMULH:
19329 case NEON_SHIFTINSERT:
19330 case NEON_LOGICBINOP:
19331 return arm_expand_neon_args (target, icode, 1, exp,
19332 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19333 NEON_ARG_STOP);
19335 case NEON_TERNOP:
19336 return arm_expand_neon_args (target, icode, 1, exp,
19337 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19338 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19340 case NEON_GETLANE:
19341 case NEON_FIXCONV:
19342 case NEON_SHIFTIMM:
19343 return arm_expand_neon_args (target, icode, 1, exp,
19344 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19345 NEON_ARG_STOP);
19347 case NEON_CREATE:
19348 return arm_expand_neon_args (target, icode, 1, exp,
19349 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19351 case NEON_DUP:
19352 case NEON_SPLIT:
19353 case NEON_REINTERP:
19354 return arm_expand_neon_args (target, icode, 1, exp,
19355 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19357 case NEON_COMBINE:
19358 case NEON_VTBL:
19359 return arm_expand_neon_args (target, icode, 1, exp,
19360 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19362 case NEON_RESULTPAIR:
19363 return arm_expand_neon_args (target, icode, 0, exp,
19364 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19365 NEON_ARG_STOP);
19367 case NEON_LANEMUL:
19368 case NEON_LANEMULL:
19369 case NEON_LANEMULH:
19370 return arm_expand_neon_args (target, icode, 1, exp,
19371 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19372 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19374 case NEON_LANEMAC:
19375 return arm_expand_neon_args (target, icode, 1, exp,
19376 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19377 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19379 case NEON_SHIFTACC:
19380 return arm_expand_neon_args (target, icode, 1, exp,
19381 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19382 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19384 case NEON_SCALARMAC:
19385 return arm_expand_neon_args (target, icode, 1, exp,
19386 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19387 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19389 case NEON_SELECT:
19390 case NEON_VTBX:
19391 return arm_expand_neon_args (target, icode, 1, exp,
19392 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19393 NEON_ARG_STOP);
19395 case NEON_LOAD1:
19396 case NEON_LOADSTRUCT:
19397 return arm_expand_neon_args (target, icode, 1, exp,
19398 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19400 case NEON_LOAD1LANE:
19401 case NEON_LOADSTRUCTLANE:
19402 return arm_expand_neon_args (target, icode, 1, exp,
19403 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19404 NEON_ARG_STOP);
19406 case NEON_STORE1:
19407 case NEON_STORESTRUCT:
19408 return arm_expand_neon_args (target, icode, 0, exp,
19409 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19411 case NEON_STORE1LANE:
19412 case NEON_STORESTRUCTLANE:
19413 return arm_expand_neon_args (target, icode, 0, exp,
19414 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19415 NEON_ARG_STOP);
19418 gcc_unreachable ();
19421 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19422 void
19423 neon_reinterpret (rtx dest, rtx src)
19425 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19428 /* Emit code to place a Neon pair result in memory locations (with equal
19429 registers). */
19430 void
19431 neon_emit_pair_result_insn (enum machine_mode mode,
19432 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19433 rtx op1, rtx op2)
19435 rtx mem = gen_rtx_MEM (mode, destaddr);
19436 rtx tmp1 = gen_reg_rtx (mode);
19437 rtx tmp2 = gen_reg_rtx (mode);
19439 emit_insn (intfn (tmp1, op1, tmp2, op2));
19441 emit_move_insn (mem, tmp1);
19442 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19443 emit_move_insn (mem, tmp2);
19446 /* Set up operands for a register copy from src to dest, taking care not to
19447 clobber registers in the process.
19448 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19449 be called with a large N, so that should be OK. */
19451 void
19452 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19454 unsigned int copied = 0, opctr = 0;
19455 unsigned int done = (1 << count) - 1;
19456 unsigned int i, j;
19458 while (copied != done)
19460 for (i = 0; i < count; i++)
19462 int good = 1;
19464 for (j = 0; good && j < count; j++)
19465 if (i != j && (copied & (1 << j)) == 0
19466 && reg_overlap_mentioned_p (src[j], dest[i]))
19467 good = 0;
19469 if (good)
19471 operands[opctr++] = dest[i];
19472 operands[opctr++] = src[i];
19473 copied |= 1 << i;
19478 gcc_assert (opctr == count * 2);
19481 /* Expand an expression EXP that calls a built-in function,
19482 with result going to TARGET if that's convenient
19483 (and in mode MODE if that's convenient).
19484 SUBTARGET may be used as the target for computing one of EXP's operands.
19485 IGNORE is nonzero if the value is to be ignored. */
19487 static rtx
19488 arm_expand_builtin (tree exp,
19489 rtx target,
19490 rtx subtarget ATTRIBUTE_UNUSED,
19491 enum machine_mode mode ATTRIBUTE_UNUSED,
19492 int ignore ATTRIBUTE_UNUSED)
19494 const struct builtin_description * d;
19495 enum insn_code icode;
19496 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19497 tree arg0;
19498 tree arg1;
19499 tree arg2;
19500 rtx op0;
19501 rtx op1;
19502 rtx op2;
19503 rtx pat;
19504 int fcode = DECL_FUNCTION_CODE (fndecl);
19505 size_t i;
19506 enum machine_mode tmode;
19507 enum machine_mode mode0;
19508 enum machine_mode mode1;
19509 enum machine_mode mode2;
19511 if (fcode >= ARM_BUILTIN_NEON_BASE)
19512 return arm_expand_neon_builtin (fcode, exp, target);
19514 switch (fcode)
19516 case ARM_BUILTIN_TEXTRMSB:
19517 case ARM_BUILTIN_TEXTRMUB:
19518 case ARM_BUILTIN_TEXTRMSH:
19519 case ARM_BUILTIN_TEXTRMUH:
19520 case ARM_BUILTIN_TEXTRMSW:
19521 case ARM_BUILTIN_TEXTRMUW:
19522 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19523 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19524 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19525 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19526 : CODE_FOR_iwmmxt_textrmw);
19528 arg0 = CALL_EXPR_ARG (exp, 0);
19529 arg1 = CALL_EXPR_ARG (exp, 1);
19530 op0 = expand_normal (arg0);
19531 op1 = expand_normal (arg1);
19532 tmode = insn_data[icode].operand[0].mode;
19533 mode0 = insn_data[icode].operand[1].mode;
19534 mode1 = insn_data[icode].operand[2].mode;
19536 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19537 op0 = copy_to_mode_reg (mode0, op0);
19538 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19540 /* @@@ better error message */
19541 error ("selector must be an immediate");
19542 return gen_reg_rtx (tmode);
19544 if (target == 0
19545 || GET_MODE (target) != tmode
19546 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19547 target = gen_reg_rtx (tmode);
19548 pat = GEN_FCN (icode) (target, op0, op1);
19549 if (! pat)
19550 return 0;
19551 emit_insn (pat);
19552 return target;
19554 case ARM_BUILTIN_TINSRB:
19555 case ARM_BUILTIN_TINSRH:
19556 case ARM_BUILTIN_TINSRW:
19557 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19558 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19559 : CODE_FOR_iwmmxt_tinsrw);
19560 arg0 = CALL_EXPR_ARG (exp, 0);
19561 arg1 = CALL_EXPR_ARG (exp, 1);
19562 arg2 = CALL_EXPR_ARG (exp, 2);
19563 op0 = expand_normal (arg0);
19564 op1 = expand_normal (arg1);
19565 op2 = expand_normal (arg2);
19566 tmode = insn_data[icode].operand[0].mode;
19567 mode0 = insn_data[icode].operand[1].mode;
19568 mode1 = insn_data[icode].operand[2].mode;
19569 mode2 = insn_data[icode].operand[3].mode;
19571 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19572 op0 = copy_to_mode_reg (mode0, op0);
19573 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19574 op1 = copy_to_mode_reg (mode1, op1);
19575 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19577 /* @@@ better error message */
19578 error ("selector must be an immediate");
19579 return const0_rtx;
19581 if (target == 0
19582 || GET_MODE (target) != tmode
19583 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19584 target = gen_reg_rtx (tmode);
19585 pat = GEN_FCN (icode) (target, op0, op1, op2);
19586 if (! pat)
19587 return 0;
19588 emit_insn (pat);
19589 return target;
19591 case ARM_BUILTIN_SETWCX:
19592 arg0 = CALL_EXPR_ARG (exp, 0);
19593 arg1 = CALL_EXPR_ARG (exp, 1);
19594 op0 = force_reg (SImode, expand_normal (arg0));
19595 op1 = expand_normal (arg1);
19596 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19597 return 0;
19599 case ARM_BUILTIN_GETWCX:
19600 arg0 = CALL_EXPR_ARG (exp, 0);
19601 op0 = expand_normal (arg0);
19602 target = gen_reg_rtx (SImode);
19603 emit_insn (gen_iwmmxt_tmrc (target, op0));
19604 return target;
19606 case ARM_BUILTIN_WSHUFH:
19607 icode = CODE_FOR_iwmmxt_wshufh;
19608 arg0 = CALL_EXPR_ARG (exp, 0);
19609 arg1 = CALL_EXPR_ARG (exp, 1);
19610 op0 = expand_normal (arg0);
19611 op1 = expand_normal (arg1);
19612 tmode = insn_data[icode].operand[0].mode;
19613 mode1 = insn_data[icode].operand[1].mode;
19614 mode2 = insn_data[icode].operand[2].mode;
19616 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19617 op0 = copy_to_mode_reg (mode1, op0);
19618 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19620 /* @@@ better error message */
19621 error ("mask must be an immediate");
19622 return const0_rtx;
19624 if (target == 0
19625 || GET_MODE (target) != tmode
19626 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19627 target = gen_reg_rtx (tmode);
19628 pat = GEN_FCN (icode) (target, op0, op1);
19629 if (! pat)
19630 return 0;
19631 emit_insn (pat);
19632 return target;
19634 case ARM_BUILTIN_WSADB:
19635 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19636 case ARM_BUILTIN_WSADH:
19637 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19638 case ARM_BUILTIN_WSADBZ:
19639 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19640 case ARM_BUILTIN_WSADHZ:
19641 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19643 /* Several three-argument builtins. */
19644 case ARM_BUILTIN_WMACS:
19645 case ARM_BUILTIN_WMACU:
19646 case ARM_BUILTIN_WALIGN:
19647 case ARM_BUILTIN_TMIA:
19648 case ARM_BUILTIN_TMIAPH:
19649 case ARM_BUILTIN_TMIATT:
19650 case ARM_BUILTIN_TMIATB:
19651 case ARM_BUILTIN_TMIABT:
19652 case ARM_BUILTIN_TMIABB:
19653 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19654 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19655 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19656 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19657 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19658 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19659 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19660 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19661 : CODE_FOR_iwmmxt_walign);
19662 arg0 = CALL_EXPR_ARG (exp, 0);
19663 arg1 = CALL_EXPR_ARG (exp, 1);
19664 arg2 = CALL_EXPR_ARG (exp, 2);
19665 op0 = expand_normal (arg0);
19666 op1 = expand_normal (arg1);
19667 op2 = expand_normal (arg2);
19668 tmode = insn_data[icode].operand[0].mode;
19669 mode0 = insn_data[icode].operand[1].mode;
19670 mode1 = insn_data[icode].operand[2].mode;
19671 mode2 = insn_data[icode].operand[3].mode;
19673 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19674 op0 = copy_to_mode_reg (mode0, op0);
19675 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19676 op1 = copy_to_mode_reg (mode1, op1);
19677 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19678 op2 = copy_to_mode_reg (mode2, op2);
19679 if (target == 0
19680 || GET_MODE (target) != tmode
19681 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19682 target = gen_reg_rtx (tmode);
19683 pat = GEN_FCN (icode) (target, op0, op1, op2);
19684 if (! pat)
19685 return 0;
19686 emit_insn (pat);
19687 return target;
19689 case ARM_BUILTIN_WZERO:
19690 target = gen_reg_rtx (DImode);
19691 emit_insn (gen_iwmmxt_clrdi (target));
19692 return target;
19694 case ARM_BUILTIN_THREAD_POINTER:
19695 return arm_load_tp (target);
19697 default:
19698 break;
19701 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19702 if (d->code == (const enum arm_builtins) fcode)
19703 return arm_expand_binop_builtin (d->icode, exp, target);
19705 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19706 if (d->code == (const enum arm_builtins) fcode)
19707 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19709 /* @@@ Should really do something sensible here. */
19710 return NULL_RTX;
19713 /* Return the number (counting from 0) of
19714 the least significant set bit in MASK. */
19716 inline static int
19717 number_of_first_bit_set (unsigned mask)
19719 int bit;
19721 for (bit = 0;
19722 (mask & (1 << bit)) == 0;
19723 ++bit)
19724 continue;
19726 return bit;
19729 /* Emit code to push or pop registers to or from the stack. F is the
19730 assembly file. MASK is the registers to push or pop. PUSH is
19731 nonzero if we should push, and zero if we should pop. For debugging
19732 output, if pushing, adjust CFA_OFFSET by the amount of space added
19733 to the stack. REAL_REGS should have the same number of bits set as
19734 MASK, and will be used instead (in the same order) to describe which
19735 registers were saved - this is used to mark the save slots when we
19736 push high registers after moving them to low registers. */
19737 static void
19738 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19739 unsigned long real_regs)
19741 int regno;
19742 int lo_mask = mask & 0xFF;
19743 int pushed_words = 0;
19745 gcc_assert (mask);
19747 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19749 /* Special case. Do not generate a POP PC statement here, do it in
19750 thumb_exit() */
19751 thumb_exit (f, -1);
19752 return;
19755 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19757 fprintf (f, "\t.save\t{");
19758 for (regno = 0; regno < 15; regno++)
19760 if (real_regs & (1 << regno))
19762 if (real_regs & ((1 << regno) -1))
19763 fprintf (f, ", ");
19764 asm_fprintf (f, "%r", regno);
19767 fprintf (f, "}\n");
19770 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19772 /* Look at the low registers first. */
19773 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19775 if (lo_mask & 1)
19777 asm_fprintf (f, "%r", regno);
19779 if ((lo_mask & ~1) != 0)
19780 fprintf (f, ", ");
19782 pushed_words++;
19786 if (push && (mask & (1 << LR_REGNUM)))
19788 /* Catch pushing the LR. */
19789 if (mask & 0xFF)
19790 fprintf (f, ", ");
19792 asm_fprintf (f, "%r", LR_REGNUM);
19794 pushed_words++;
19796 else if (!push && (mask & (1 << PC_REGNUM)))
19798 /* Catch popping the PC. */
19799 if (TARGET_INTERWORK || TARGET_BACKTRACE
19800 || crtl->calls_eh_return)
19802 /* The PC is never poped directly, instead
19803 it is popped into r3 and then BX is used. */
19804 fprintf (f, "}\n");
19806 thumb_exit (f, -1);
19808 return;
19810 else
19812 if (mask & 0xFF)
19813 fprintf (f, ", ");
19815 asm_fprintf (f, "%r", PC_REGNUM);
19819 fprintf (f, "}\n");
19821 if (push && pushed_words && dwarf2out_do_frame ())
19823 char *l = dwarf2out_cfi_label (false);
19824 int pushed_mask = real_regs;
19826 *cfa_offset += pushed_words * 4;
19827 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19829 pushed_words = 0;
19830 pushed_mask = real_regs;
19831 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19833 if (pushed_mask & 1)
19834 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19839 /* Generate code to return from a thumb function.
19840 If 'reg_containing_return_addr' is -1, then the return address is
19841 actually on the stack, at the stack pointer. */
19842 static void
19843 thumb_exit (FILE *f, int reg_containing_return_addr)
19845 unsigned regs_available_for_popping;
19846 unsigned regs_to_pop;
19847 int pops_needed;
19848 unsigned available;
19849 unsigned required;
19850 int mode;
19851 int size;
19852 int restore_a4 = FALSE;
19854 /* Compute the registers we need to pop. */
19855 regs_to_pop = 0;
19856 pops_needed = 0;
19858 if (reg_containing_return_addr == -1)
19860 regs_to_pop |= 1 << LR_REGNUM;
19861 ++pops_needed;
19864 if (TARGET_BACKTRACE)
19866 /* Restore the (ARM) frame pointer and stack pointer. */
19867 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19868 pops_needed += 2;
19871 /* If there is nothing to pop then just emit the BX instruction and
19872 return. */
19873 if (pops_needed == 0)
19875 if (crtl->calls_eh_return)
19876 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19878 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19879 return;
19881 /* Otherwise if we are not supporting interworking and we have not created
19882 a backtrace structure and the function was not entered in ARM mode then
19883 just pop the return address straight into the PC. */
19884 else if (!TARGET_INTERWORK
19885 && !TARGET_BACKTRACE
19886 && !is_called_in_ARM_mode (current_function_decl)
19887 && !crtl->calls_eh_return)
19889 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19890 return;
19893 /* Find out how many of the (return) argument registers we can corrupt. */
19894 regs_available_for_popping = 0;
19896 /* If returning via __builtin_eh_return, the bottom three registers
19897 all contain information needed for the return. */
19898 if (crtl->calls_eh_return)
19899 size = 12;
19900 else
19902 /* If we can deduce the registers used from the function's
19903 return value. This is more reliable that examining
19904 df_regs_ever_live_p () because that will be set if the register is
19905 ever used in the function, not just if the register is used
19906 to hold a return value. */
19908 if (crtl->return_rtx != 0)
19909 mode = GET_MODE (crtl->return_rtx);
19910 else
19911 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19913 size = GET_MODE_SIZE (mode);
19915 if (size == 0)
19917 /* In a void function we can use any argument register.
19918 In a function that returns a structure on the stack
19919 we can use the second and third argument registers. */
19920 if (mode == VOIDmode)
19921 regs_available_for_popping =
19922 (1 << ARG_REGISTER (1))
19923 | (1 << ARG_REGISTER (2))
19924 | (1 << ARG_REGISTER (3));
19925 else
19926 regs_available_for_popping =
19927 (1 << ARG_REGISTER (2))
19928 | (1 << ARG_REGISTER (3));
19930 else if (size <= 4)
19931 regs_available_for_popping =
19932 (1 << ARG_REGISTER (2))
19933 | (1 << ARG_REGISTER (3));
19934 else if (size <= 8)
19935 regs_available_for_popping =
19936 (1 << ARG_REGISTER (3));
19939 /* Match registers to be popped with registers into which we pop them. */
19940 for (available = regs_available_for_popping,
19941 required = regs_to_pop;
19942 required != 0 && available != 0;
19943 available &= ~(available & - available),
19944 required &= ~(required & - required))
19945 -- pops_needed;
19947 /* If we have any popping registers left over, remove them. */
19948 if (available > 0)
19949 regs_available_for_popping &= ~available;
19951 /* Otherwise if we need another popping register we can use
19952 the fourth argument register. */
19953 else if (pops_needed)
19955 /* If we have not found any free argument registers and
19956 reg a4 contains the return address, we must move it. */
19957 if (regs_available_for_popping == 0
19958 && reg_containing_return_addr == LAST_ARG_REGNUM)
19960 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19961 reg_containing_return_addr = LR_REGNUM;
19963 else if (size > 12)
19965 /* Register a4 is being used to hold part of the return value,
19966 but we have dire need of a free, low register. */
19967 restore_a4 = TRUE;
19969 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19972 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19974 /* The fourth argument register is available. */
19975 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19977 --pops_needed;
19981 /* Pop as many registers as we can. */
19982 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19983 regs_available_for_popping);
19985 /* Process the registers we popped. */
19986 if (reg_containing_return_addr == -1)
19988 /* The return address was popped into the lowest numbered register. */
19989 regs_to_pop &= ~(1 << LR_REGNUM);
19991 reg_containing_return_addr =
19992 number_of_first_bit_set (regs_available_for_popping);
19994 /* Remove this register for the mask of available registers, so that
19995 the return address will not be corrupted by further pops. */
19996 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19999 /* If we popped other registers then handle them here. */
20000 if (regs_available_for_popping)
20002 int frame_pointer;
20004 /* Work out which register currently contains the frame pointer. */
20005 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20007 /* Move it into the correct place. */
20008 asm_fprintf (f, "\tmov\t%r, %r\n",
20009 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20011 /* (Temporarily) remove it from the mask of popped registers. */
20012 regs_available_for_popping &= ~(1 << frame_pointer);
20013 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20015 if (regs_available_for_popping)
20017 int stack_pointer;
20019 /* We popped the stack pointer as well,
20020 find the register that contains it. */
20021 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20023 /* Move it into the stack register. */
20024 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20026 /* At this point we have popped all necessary registers, so
20027 do not worry about restoring regs_available_for_popping
20028 to its correct value:
20030 assert (pops_needed == 0)
20031 assert (regs_available_for_popping == (1 << frame_pointer))
20032 assert (regs_to_pop == (1 << STACK_POINTER)) */
20034 else
20036 /* Since we have just move the popped value into the frame
20037 pointer, the popping register is available for reuse, and
20038 we know that we still have the stack pointer left to pop. */
20039 regs_available_for_popping |= (1 << frame_pointer);
20043 /* If we still have registers left on the stack, but we no longer have
20044 any registers into which we can pop them, then we must move the return
20045 address into the link register and make available the register that
20046 contained it. */
20047 if (regs_available_for_popping == 0 && pops_needed > 0)
20049 regs_available_for_popping |= 1 << reg_containing_return_addr;
20051 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20052 reg_containing_return_addr);
20054 reg_containing_return_addr = LR_REGNUM;
20057 /* If we have registers left on the stack then pop some more.
20058 We know that at most we will want to pop FP and SP. */
20059 if (pops_needed > 0)
20061 int popped_into;
20062 int move_to;
20064 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20065 regs_available_for_popping);
20067 /* We have popped either FP or SP.
20068 Move whichever one it is into the correct register. */
20069 popped_into = number_of_first_bit_set (regs_available_for_popping);
20070 move_to = number_of_first_bit_set (regs_to_pop);
20072 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20074 regs_to_pop &= ~(1 << move_to);
20076 --pops_needed;
20079 /* If we still have not popped everything then we must have only
20080 had one register available to us and we are now popping the SP. */
20081 if (pops_needed > 0)
20083 int popped_into;
20085 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20086 regs_available_for_popping);
20088 popped_into = number_of_first_bit_set (regs_available_for_popping);
20090 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20092 assert (regs_to_pop == (1 << STACK_POINTER))
20093 assert (pops_needed == 1)
20097 /* If necessary restore the a4 register. */
20098 if (restore_a4)
20100 if (reg_containing_return_addr != LR_REGNUM)
20102 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20103 reg_containing_return_addr = LR_REGNUM;
20106 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20109 if (crtl->calls_eh_return)
20110 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20112 /* Return to caller. */
20113 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20116 /* Scan INSN just before assembler is output for it.
20117 For Thumb-1, we track the status of the condition codes; this
20118 information is used in the cbranchsi4_insn pattern. */
20119 void
20120 thumb1_final_prescan_insn (rtx insn)
20122 if (flag_print_asm_name)
20123 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20124 INSN_ADDRESSES (INSN_UID (insn)));
20125 /* Don't overwrite the previous setter when we get to a cbranch. */
20126 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20128 enum attr_conds conds;
20130 if (cfun->machine->thumb1_cc_insn)
20132 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20133 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20134 CC_STATUS_INIT;
20136 conds = get_attr_conds (insn);
20137 if (conds == CONDS_SET)
20139 rtx set = single_set (insn);
20140 cfun->machine->thumb1_cc_insn = insn;
20141 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20142 cfun->machine->thumb1_cc_op1 = const0_rtx;
20143 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20144 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20146 rtx src1 = XEXP (SET_SRC (set), 1);
20147 if (src1 == const0_rtx)
20148 cfun->machine->thumb1_cc_mode = CCmode;
20151 else if (conds != CONDS_NOCOND)
20152 cfun->machine->thumb1_cc_insn = NULL_RTX;
20157 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20159 unsigned HOST_WIDE_INT mask = 0xff;
20160 int i;
20162 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20163 if (val == 0) /* XXX */
20164 return 0;
20166 for (i = 0; i < 25; i++)
20167 if ((val & (mask << i)) == val)
20168 return 1;
20170 return 0;
20173 /* Returns nonzero if the current function contains,
20174 or might contain a far jump. */
20175 static int
20176 thumb_far_jump_used_p (void)
20178 rtx insn;
20180 /* This test is only important for leaf functions. */
20181 /* assert (!leaf_function_p ()); */
20183 /* If we have already decided that far jumps may be used,
20184 do not bother checking again, and always return true even if
20185 it turns out that they are not being used. Once we have made
20186 the decision that far jumps are present (and that hence the link
20187 register will be pushed onto the stack) we cannot go back on it. */
20188 if (cfun->machine->far_jump_used)
20189 return 1;
20191 /* If this function is not being called from the prologue/epilogue
20192 generation code then it must be being called from the
20193 INITIAL_ELIMINATION_OFFSET macro. */
20194 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20196 /* In this case we know that we are being asked about the elimination
20197 of the arg pointer register. If that register is not being used,
20198 then there are no arguments on the stack, and we do not have to
20199 worry that a far jump might force the prologue to push the link
20200 register, changing the stack offsets. In this case we can just
20201 return false, since the presence of far jumps in the function will
20202 not affect stack offsets.
20204 If the arg pointer is live (or if it was live, but has now been
20205 eliminated and so set to dead) then we do have to test to see if
20206 the function might contain a far jump. This test can lead to some
20207 false negatives, since before reload is completed, then length of
20208 branch instructions is not known, so gcc defaults to returning their
20209 longest length, which in turn sets the far jump attribute to true.
20211 A false negative will not result in bad code being generated, but it
20212 will result in a needless push and pop of the link register. We
20213 hope that this does not occur too often.
20215 If we need doubleword stack alignment this could affect the other
20216 elimination offsets so we can't risk getting it wrong. */
20217 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20218 cfun->machine->arg_pointer_live = 1;
20219 else if (!cfun->machine->arg_pointer_live)
20220 return 0;
20223 /* Check to see if the function contains a branch
20224 insn with the far jump attribute set. */
20225 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20227 if (GET_CODE (insn) == JUMP_INSN
20228 /* Ignore tablejump patterns. */
20229 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20230 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20231 && get_attr_far_jump (insn) == FAR_JUMP_YES
20234 /* Record the fact that we have decided that
20235 the function does use far jumps. */
20236 cfun->machine->far_jump_used = 1;
20237 return 1;
20241 return 0;
20244 /* Return nonzero if FUNC must be entered in ARM mode. */
20246 is_called_in_ARM_mode (tree func)
20248 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20250 /* Ignore the problem about functions whose address is taken. */
20251 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20252 return TRUE;
20254 #ifdef ARM_PE
20255 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20256 #else
20257 return FALSE;
20258 #endif
20261 /* Given the stack offsets and register mask in OFFSETS, decide how
20262 many additional registers to push instead of subtracting a constant
20263 from SP. For epilogues the principle is the same except we use pop.
20264 FOR_PROLOGUE indicates which we're generating. */
20265 static int
20266 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20268 HOST_WIDE_INT amount;
20269 unsigned long live_regs_mask = offsets->saved_regs_mask;
20270 /* Extract a mask of the ones we can give to the Thumb's push/pop
20271 instruction. */
20272 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20273 /* Then count how many other high registers will need to be pushed. */
20274 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20275 int n_free, reg_base;
20277 if (!for_prologue && frame_pointer_needed)
20278 amount = offsets->locals_base - offsets->saved_regs;
20279 else
20280 amount = offsets->outgoing_args - offsets->saved_regs;
20282 /* If the stack frame size is 512 exactly, we can save one load
20283 instruction, which should make this a win even when optimizing
20284 for speed. */
20285 if (!optimize_size && amount != 512)
20286 return 0;
20288 /* Can't do this if there are high registers to push. */
20289 if (high_regs_pushed != 0)
20290 return 0;
20292 /* Shouldn't do it in the prologue if no registers would normally
20293 be pushed at all. In the epilogue, also allow it if we'll have
20294 a pop insn for the PC. */
20295 if (l_mask == 0
20296 && (for_prologue
20297 || TARGET_BACKTRACE
20298 || (live_regs_mask & 1 << LR_REGNUM) == 0
20299 || TARGET_INTERWORK
20300 || crtl->args.pretend_args_size != 0))
20301 return 0;
20303 /* Don't do this if thumb_expand_prologue wants to emit instructions
20304 between the push and the stack frame allocation. */
20305 if (for_prologue
20306 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20307 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20308 return 0;
20310 reg_base = 0;
20311 n_free = 0;
20312 if (!for_prologue)
20314 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20315 live_regs_mask >>= reg_base;
20318 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20319 && (for_prologue || call_used_regs[reg_base + n_free]))
20321 live_regs_mask >>= 1;
20322 n_free++;
20325 if (n_free == 0)
20326 return 0;
20327 gcc_assert (amount / 4 * 4 == amount);
20329 if (amount >= 512 && (amount - n_free * 4) < 512)
20330 return (amount - 508) / 4;
20331 if (amount <= n_free * 4)
20332 return amount / 4;
20333 return 0;
20336 /* The bits which aren't usefully expanded as rtl. */
20337 const char *
20338 thumb_unexpanded_epilogue (void)
20340 arm_stack_offsets *offsets;
20341 int regno;
20342 unsigned long live_regs_mask = 0;
20343 int high_regs_pushed = 0;
20344 int extra_pop;
20345 int had_to_push_lr;
20346 int size;
20348 if (cfun->machine->return_used_this_function != 0)
20349 return "";
20351 if (IS_NAKED (arm_current_func_type ()))
20352 return "";
20354 offsets = arm_get_frame_offsets ();
20355 live_regs_mask = offsets->saved_regs_mask;
20356 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20358 /* If we can deduce the registers used from the function's return value.
20359 This is more reliable that examining df_regs_ever_live_p () because that
20360 will be set if the register is ever used in the function, not just if
20361 the register is used to hold a return value. */
20362 size = arm_size_return_regs ();
20364 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20365 if (extra_pop > 0)
20367 unsigned long extra_mask = (1 << extra_pop) - 1;
20368 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20371 /* The prolog may have pushed some high registers to use as
20372 work registers. e.g. the testsuite file:
20373 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20374 compiles to produce:
20375 push {r4, r5, r6, r7, lr}
20376 mov r7, r9
20377 mov r6, r8
20378 push {r6, r7}
20379 as part of the prolog. We have to undo that pushing here. */
20381 if (high_regs_pushed)
20383 unsigned long mask = live_regs_mask & 0xff;
20384 int next_hi_reg;
20386 /* The available low registers depend on the size of the value we are
20387 returning. */
20388 if (size <= 12)
20389 mask |= 1 << 3;
20390 if (size <= 8)
20391 mask |= 1 << 2;
20393 if (mask == 0)
20394 /* Oh dear! We have no low registers into which we can pop
20395 high registers! */
20396 internal_error
20397 ("no low registers available for popping high registers");
20399 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20400 if (live_regs_mask & (1 << next_hi_reg))
20401 break;
20403 while (high_regs_pushed)
20405 /* Find lo register(s) into which the high register(s) can
20406 be popped. */
20407 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20409 if (mask & (1 << regno))
20410 high_regs_pushed--;
20411 if (high_regs_pushed == 0)
20412 break;
20415 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20417 /* Pop the values into the low register(s). */
20418 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20420 /* Move the value(s) into the high registers. */
20421 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20423 if (mask & (1 << regno))
20425 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20426 regno);
20428 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20429 if (live_regs_mask & (1 << next_hi_reg))
20430 break;
20434 live_regs_mask &= ~0x0f00;
20437 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20438 live_regs_mask &= 0xff;
20440 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20442 /* Pop the return address into the PC. */
20443 if (had_to_push_lr)
20444 live_regs_mask |= 1 << PC_REGNUM;
20446 /* Either no argument registers were pushed or a backtrace
20447 structure was created which includes an adjusted stack
20448 pointer, so just pop everything. */
20449 if (live_regs_mask)
20450 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20451 live_regs_mask);
20453 /* We have either just popped the return address into the
20454 PC or it is was kept in LR for the entire function.
20455 Note that thumb_pushpop has already called thumb_exit if the
20456 PC was in the list. */
20457 if (!had_to_push_lr)
20458 thumb_exit (asm_out_file, LR_REGNUM);
20460 else
20462 /* Pop everything but the return address. */
20463 if (live_regs_mask)
20464 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20465 live_regs_mask);
20467 if (had_to_push_lr)
20469 if (size > 12)
20471 /* We have no free low regs, so save one. */
20472 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20473 LAST_ARG_REGNUM);
20476 /* Get the return address into a temporary register. */
20477 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20478 1 << LAST_ARG_REGNUM);
20480 if (size > 12)
20482 /* Move the return address to lr. */
20483 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20484 LAST_ARG_REGNUM);
20485 /* Restore the low register. */
20486 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20487 IP_REGNUM);
20488 regno = LR_REGNUM;
20490 else
20491 regno = LAST_ARG_REGNUM;
20493 else
20494 regno = LR_REGNUM;
20496 /* Remove the argument registers that were pushed onto the stack. */
20497 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20498 SP_REGNUM, SP_REGNUM,
20499 crtl->args.pretend_args_size);
20501 thumb_exit (asm_out_file, regno);
20504 return "";
20507 /* Functions to save and restore machine-specific function data. */
20508 static struct machine_function *
20509 arm_init_machine_status (void)
20511 struct machine_function *machine;
20512 machine = ggc_alloc_cleared_machine_function ();
20514 #if ARM_FT_UNKNOWN != 0
20515 machine->func_type = ARM_FT_UNKNOWN;
20516 #endif
20517 return machine;
20520 /* Return an RTX indicating where the return address to the
20521 calling function can be found. */
20523 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20525 if (count != 0)
20526 return NULL_RTX;
20528 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20531 /* Do anything needed before RTL is emitted for each function. */
20532 void
20533 arm_init_expanders (void)
20535 /* Arrange to initialize and mark the machine per-function status. */
20536 init_machine_status = arm_init_machine_status;
20538 /* This is to stop the combine pass optimizing away the alignment
20539 adjustment of va_arg. */
20540 /* ??? It is claimed that this should not be necessary. */
20541 if (cfun)
20542 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20546 /* Like arm_compute_initial_elimination offset. Simpler because there
20547 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20548 to point at the base of the local variables after static stack
20549 space for a function has been allocated. */
20551 HOST_WIDE_INT
20552 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20554 arm_stack_offsets *offsets;
20556 offsets = arm_get_frame_offsets ();
20558 switch (from)
20560 case ARG_POINTER_REGNUM:
20561 switch (to)
20563 case STACK_POINTER_REGNUM:
20564 return offsets->outgoing_args - offsets->saved_args;
20566 case FRAME_POINTER_REGNUM:
20567 return offsets->soft_frame - offsets->saved_args;
20569 case ARM_HARD_FRAME_POINTER_REGNUM:
20570 return offsets->saved_regs - offsets->saved_args;
20572 case THUMB_HARD_FRAME_POINTER_REGNUM:
20573 return offsets->locals_base - offsets->saved_args;
20575 default:
20576 gcc_unreachable ();
20578 break;
20580 case FRAME_POINTER_REGNUM:
20581 switch (to)
20583 case STACK_POINTER_REGNUM:
20584 return offsets->outgoing_args - offsets->soft_frame;
20586 case ARM_HARD_FRAME_POINTER_REGNUM:
20587 return offsets->saved_regs - offsets->soft_frame;
20589 case THUMB_HARD_FRAME_POINTER_REGNUM:
20590 return offsets->locals_base - offsets->soft_frame;
20592 default:
20593 gcc_unreachable ();
20595 break;
20597 default:
20598 gcc_unreachable ();
20602 /* Generate the rest of a function's prologue. */
20603 void
20604 thumb1_expand_prologue (void)
20606 rtx insn, dwarf;
20608 HOST_WIDE_INT amount;
20609 arm_stack_offsets *offsets;
20610 unsigned long func_type;
20611 int regno;
20612 unsigned long live_regs_mask;
20614 func_type = arm_current_func_type ();
20616 /* Naked functions don't have prologues. */
20617 if (IS_NAKED (func_type))
20618 return;
20620 if (IS_INTERRUPT (func_type))
20622 error ("interrupt Service Routines cannot be coded in Thumb mode");
20623 return;
20626 offsets = arm_get_frame_offsets ();
20627 live_regs_mask = offsets->saved_regs_mask;
20628 /* Load the pic register before setting the frame pointer,
20629 so we can use r7 as a temporary work register. */
20630 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20631 arm_load_pic_register (live_regs_mask);
20633 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20634 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20635 stack_pointer_rtx);
20637 if (flag_stack_usage)
20638 current_function_static_stack_size
20639 = offsets->outgoing_args - offsets->saved_args;
20641 amount = offsets->outgoing_args - offsets->saved_regs;
20642 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20643 if (amount)
20645 if (amount < 512)
20647 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20648 GEN_INT (- amount)));
20649 RTX_FRAME_RELATED_P (insn) = 1;
20651 else
20653 rtx reg;
20655 /* The stack decrement is too big for an immediate value in a single
20656 insn. In theory we could issue multiple subtracts, but after
20657 three of them it becomes more space efficient to place the full
20658 value in the constant pool and load into a register. (Also the
20659 ARM debugger really likes to see only one stack decrement per
20660 function). So instead we look for a scratch register into which
20661 we can load the decrement, and then we subtract this from the
20662 stack pointer. Unfortunately on the thumb the only available
20663 scratch registers are the argument registers, and we cannot use
20664 these as they may hold arguments to the function. Instead we
20665 attempt to locate a call preserved register which is used by this
20666 function. If we can find one, then we know that it will have
20667 been pushed at the start of the prologue and so we can corrupt
20668 it now. */
20669 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20670 if (live_regs_mask & (1 << regno))
20671 break;
20673 gcc_assert(regno <= LAST_LO_REGNUM);
20675 reg = gen_rtx_REG (SImode, regno);
20677 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20679 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20680 stack_pointer_rtx, reg));
20681 RTX_FRAME_RELATED_P (insn) = 1;
20682 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20683 plus_constant (stack_pointer_rtx,
20684 -amount));
20685 RTX_FRAME_RELATED_P (dwarf) = 1;
20686 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20690 if (frame_pointer_needed)
20691 thumb_set_frame_pointer (offsets);
20693 /* If we are profiling, make sure no instructions are scheduled before
20694 the call to mcount. Similarly if the user has requested no
20695 scheduling in the prolog. Similarly if we want non-call exceptions
20696 using the EABI unwinder, to prevent faulting instructions from being
20697 swapped with a stack adjustment. */
20698 if (crtl->profile || !TARGET_SCHED_PROLOG
20699 || (arm_except_unwind_info (&global_options) == UI_TARGET
20700 && cfun->can_throw_non_call_exceptions))
20701 emit_insn (gen_blockage ());
20703 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20704 if (live_regs_mask & 0xff)
20705 cfun->machine->lr_save_eliminated = 0;
20709 void
20710 thumb1_expand_epilogue (void)
20712 HOST_WIDE_INT amount;
20713 arm_stack_offsets *offsets;
20714 int regno;
20716 /* Naked functions don't have prologues. */
20717 if (IS_NAKED (arm_current_func_type ()))
20718 return;
20720 offsets = arm_get_frame_offsets ();
20721 amount = offsets->outgoing_args - offsets->saved_regs;
20723 if (frame_pointer_needed)
20725 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20726 amount = offsets->locals_base - offsets->saved_regs;
20728 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20730 gcc_assert (amount >= 0);
20731 if (amount)
20733 if (amount < 512)
20734 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20735 GEN_INT (amount)));
20736 else
20738 /* r3 is always free in the epilogue. */
20739 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20741 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20742 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20746 /* Emit a USE (stack_pointer_rtx), so that
20747 the stack adjustment will not be deleted. */
20748 emit_insn (gen_prologue_use (stack_pointer_rtx));
20750 if (crtl->profile || !TARGET_SCHED_PROLOG)
20751 emit_insn (gen_blockage ());
20753 /* Emit a clobber for each insn that will be restored in the epilogue,
20754 so that flow2 will get register lifetimes correct. */
20755 for (regno = 0; regno < 13; regno++)
20756 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20757 emit_clobber (gen_rtx_REG (SImode, regno));
20759 if (! df_regs_ever_live_p (LR_REGNUM))
20760 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20763 static void
20764 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20766 arm_stack_offsets *offsets;
20767 unsigned long live_regs_mask = 0;
20768 unsigned long l_mask;
20769 unsigned high_regs_pushed = 0;
20770 int cfa_offset = 0;
20771 int regno;
20773 if (IS_NAKED (arm_current_func_type ()))
20774 return;
20776 if (is_called_in_ARM_mode (current_function_decl))
20778 const char * name;
20780 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20781 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20782 == SYMBOL_REF);
20783 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20785 /* Generate code sequence to switch us into Thumb mode. */
20786 /* The .code 32 directive has already been emitted by
20787 ASM_DECLARE_FUNCTION_NAME. */
20788 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20789 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20791 /* Generate a label, so that the debugger will notice the
20792 change in instruction sets. This label is also used by
20793 the assembler to bypass the ARM code when this function
20794 is called from a Thumb encoded function elsewhere in the
20795 same file. Hence the definition of STUB_NAME here must
20796 agree with the definition in gas/config/tc-arm.c. */
20798 #define STUB_NAME ".real_start_of"
20800 fprintf (f, "\t.code\t16\n");
20801 #ifdef ARM_PE
20802 if (arm_dllexport_name_p (name))
20803 name = arm_strip_name_encoding (name);
20804 #endif
20805 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20806 fprintf (f, "\t.thumb_func\n");
20807 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20810 if (crtl->args.pretend_args_size)
20812 /* Output unwind directive for the stack adjustment. */
20813 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20814 fprintf (f, "\t.pad #%d\n",
20815 crtl->args.pretend_args_size);
20817 if (cfun->machine->uses_anonymous_args)
20819 int num_pushes;
20821 fprintf (f, "\tpush\t{");
20823 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20825 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20826 regno <= LAST_ARG_REGNUM;
20827 regno++)
20828 asm_fprintf (f, "%r%s", regno,
20829 regno == LAST_ARG_REGNUM ? "" : ", ");
20831 fprintf (f, "}\n");
20833 else
20834 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20835 SP_REGNUM, SP_REGNUM,
20836 crtl->args.pretend_args_size);
20838 /* We don't need to record the stores for unwinding (would it
20839 help the debugger any if we did?), but record the change in
20840 the stack pointer. */
20841 if (dwarf2out_do_frame ())
20843 char *l = dwarf2out_cfi_label (false);
20845 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20846 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20850 /* Get the registers we are going to push. */
20851 offsets = arm_get_frame_offsets ();
20852 live_regs_mask = offsets->saved_regs_mask;
20853 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20854 l_mask = live_regs_mask & 0x40ff;
20855 /* Then count how many other high registers will need to be pushed. */
20856 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20858 if (TARGET_BACKTRACE)
20860 unsigned offset;
20861 unsigned work_register;
20863 /* We have been asked to create a stack backtrace structure.
20864 The code looks like this:
20866 0 .align 2
20867 0 func:
20868 0 sub SP, #16 Reserve space for 4 registers.
20869 2 push {R7} Push low registers.
20870 4 add R7, SP, #20 Get the stack pointer before the push.
20871 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20872 8 mov R7, PC Get hold of the start of this code plus 12.
20873 10 str R7, [SP, #16] Store it.
20874 12 mov R7, FP Get hold of the current frame pointer.
20875 14 str R7, [SP, #4] Store it.
20876 16 mov R7, LR Get hold of the current return address.
20877 18 str R7, [SP, #12] Store it.
20878 20 add R7, SP, #16 Point at the start of the backtrace structure.
20879 22 mov FP, R7 Put this value into the frame pointer. */
20881 work_register = thumb_find_work_register (live_regs_mask);
20883 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20884 asm_fprintf (f, "\t.pad #16\n");
20886 asm_fprintf
20887 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20888 SP_REGNUM, SP_REGNUM);
20890 if (dwarf2out_do_frame ())
20892 char *l = dwarf2out_cfi_label (false);
20894 cfa_offset = cfa_offset + 16;
20895 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20898 if (l_mask)
20900 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20901 offset = bit_count (l_mask) * UNITS_PER_WORD;
20903 else
20904 offset = 0;
20906 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20907 offset + 16 + crtl->args.pretend_args_size);
20909 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20910 offset + 4);
20912 /* Make sure that the instruction fetching the PC is in the right place
20913 to calculate "start of backtrace creation code + 12". */
20914 if (l_mask)
20916 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20917 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20918 offset + 12);
20919 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20920 ARM_HARD_FRAME_POINTER_REGNUM);
20921 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20922 offset);
20924 else
20926 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20927 ARM_HARD_FRAME_POINTER_REGNUM);
20928 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20929 offset);
20930 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20931 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20932 offset + 12);
20935 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20936 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20937 offset + 8);
20938 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20939 offset + 12);
20940 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20941 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20943 /* Optimization: If we are not pushing any low registers but we are going
20944 to push some high registers then delay our first push. This will just
20945 be a push of LR and we can combine it with the push of the first high
20946 register. */
20947 else if ((l_mask & 0xff) != 0
20948 || (high_regs_pushed == 0 && l_mask))
20950 unsigned long mask = l_mask;
20951 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20952 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20955 if (high_regs_pushed)
20957 unsigned pushable_regs;
20958 unsigned next_hi_reg;
20960 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20961 if (live_regs_mask & (1 << next_hi_reg))
20962 break;
20964 pushable_regs = l_mask & 0xff;
20966 if (pushable_regs == 0)
20967 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20969 while (high_regs_pushed > 0)
20971 unsigned long real_regs_mask = 0;
20973 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20975 if (pushable_regs & (1 << regno))
20977 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20979 high_regs_pushed --;
20980 real_regs_mask |= (1 << next_hi_reg);
20982 if (high_regs_pushed)
20984 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20985 next_hi_reg --)
20986 if (live_regs_mask & (1 << next_hi_reg))
20987 break;
20989 else
20991 pushable_regs &= ~((1 << regno) - 1);
20992 break;
20997 /* If we had to find a work register and we have not yet
20998 saved the LR then add it to the list of regs to push. */
20999 if (l_mask == (1 << LR_REGNUM))
21001 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21002 1, &cfa_offset,
21003 real_regs_mask | (1 << LR_REGNUM));
21004 l_mask = 0;
21006 else
21007 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21012 /* Handle the case of a double word load into a low register from
21013 a computed memory address. The computed address may involve a
21014 register which is overwritten by the load. */
21015 const char *
21016 thumb_load_double_from_address (rtx *operands)
21018 rtx addr;
21019 rtx base;
21020 rtx offset;
21021 rtx arg1;
21022 rtx arg2;
21024 gcc_assert (GET_CODE (operands[0]) == REG);
21025 gcc_assert (GET_CODE (operands[1]) == MEM);
21027 /* Get the memory address. */
21028 addr = XEXP (operands[1], 0);
21030 /* Work out how the memory address is computed. */
21031 switch (GET_CODE (addr))
21033 case REG:
21034 operands[2] = adjust_address (operands[1], SImode, 4);
21036 if (REGNO (operands[0]) == REGNO (addr))
21038 output_asm_insn ("ldr\t%H0, %2", operands);
21039 output_asm_insn ("ldr\t%0, %1", operands);
21041 else
21043 output_asm_insn ("ldr\t%0, %1", operands);
21044 output_asm_insn ("ldr\t%H0, %2", operands);
21046 break;
21048 case CONST:
21049 /* Compute <address> + 4 for the high order load. */
21050 operands[2] = adjust_address (operands[1], SImode, 4);
21052 output_asm_insn ("ldr\t%0, %1", operands);
21053 output_asm_insn ("ldr\t%H0, %2", operands);
21054 break;
21056 case PLUS:
21057 arg1 = XEXP (addr, 0);
21058 arg2 = XEXP (addr, 1);
21060 if (CONSTANT_P (arg1))
21061 base = arg2, offset = arg1;
21062 else
21063 base = arg1, offset = arg2;
21065 gcc_assert (GET_CODE (base) == REG);
21067 /* Catch the case of <address> = <reg> + <reg> */
21068 if (GET_CODE (offset) == REG)
21070 int reg_offset = REGNO (offset);
21071 int reg_base = REGNO (base);
21072 int reg_dest = REGNO (operands[0]);
21074 /* Add the base and offset registers together into the
21075 higher destination register. */
21076 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21077 reg_dest + 1, reg_base, reg_offset);
21079 /* Load the lower destination register from the address in
21080 the higher destination register. */
21081 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21082 reg_dest, reg_dest + 1);
21084 /* Load the higher destination register from its own address
21085 plus 4. */
21086 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21087 reg_dest + 1, reg_dest + 1);
21089 else
21091 /* Compute <address> + 4 for the high order load. */
21092 operands[2] = adjust_address (operands[1], SImode, 4);
21094 /* If the computed address is held in the low order register
21095 then load the high order register first, otherwise always
21096 load the low order register first. */
21097 if (REGNO (operands[0]) == REGNO (base))
21099 output_asm_insn ("ldr\t%H0, %2", operands);
21100 output_asm_insn ("ldr\t%0, %1", operands);
21102 else
21104 output_asm_insn ("ldr\t%0, %1", operands);
21105 output_asm_insn ("ldr\t%H0, %2", operands);
21108 break;
21110 case LABEL_REF:
21111 /* With no registers to worry about we can just load the value
21112 directly. */
21113 operands[2] = adjust_address (operands[1], SImode, 4);
21115 output_asm_insn ("ldr\t%H0, %2", operands);
21116 output_asm_insn ("ldr\t%0, %1", operands);
21117 break;
21119 default:
21120 gcc_unreachable ();
21123 return "";
21126 const char *
21127 thumb_output_move_mem_multiple (int n, rtx *operands)
21129 rtx tmp;
21131 switch (n)
21133 case 2:
21134 if (REGNO (operands[4]) > REGNO (operands[5]))
21136 tmp = operands[4];
21137 operands[4] = operands[5];
21138 operands[5] = tmp;
21140 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21141 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21142 break;
21144 case 3:
21145 if (REGNO (operands[4]) > REGNO (operands[5]))
21147 tmp = operands[4];
21148 operands[4] = operands[5];
21149 operands[5] = tmp;
21151 if (REGNO (operands[5]) > REGNO (operands[6]))
21153 tmp = operands[5];
21154 operands[5] = operands[6];
21155 operands[6] = tmp;
21157 if (REGNO (operands[4]) > REGNO (operands[5]))
21159 tmp = operands[4];
21160 operands[4] = operands[5];
21161 operands[5] = tmp;
21164 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21165 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21166 break;
21168 default:
21169 gcc_unreachable ();
21172 return "";
21175 /* Output a call-via instruction for thumb state. */
21176 const char *
21177 thumb_call_via_reg (rtx reg)
21179 int regno = REGNO (reg);
21180 rtx *labelp;
21182 gcc_assert (regno < LR_REGNUM);
21184 /* If we are in the normal text section we can use a single instance
21185 per compilation unit. If we are doing function sections, then we need
21186 an entry per section, since we can't rely on reachability. */
21187 if (in_section == text_section)
21189 thumb_call_reg_needed = 1;
21191 if (thumb_call_via_label[regno] == NULL)
21192 thumb_call_via_label[regno] = gen_label_rtx ();
21193 labelp = thumb_call_via_label + regno;
21195 else
21197 if (cfun->machine->call_via[regno] == NULL)
21198 cfun->machine->call_via[regno] = gen_label_rtx ();
21199 labelp = cfun->machine->call_via + regno;
21202 output_asm_insn ("bl\t%a0", labelp);
21203 return "";
21206 /* Routines for generating rtl. */
21207 void
21208 thumb_expand_movmemqi (rtx *operands)
21210 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21211 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21212 HOST_WIDE_INT len = INTVAL (operands[2]);
21213 HOST_WIDE_INT offset = 0;
21215 while (len >= 12)
21217 emit_insn (gen_movmem12b (out, in, out, in));
21218 len -= 12;
21221 if (len >= 8)
21223 emit_insn (gen_movmem8b (out, in, out, in));
21224 len -= 8;
21227 if (len >= 4)
21229 rtx reg = gen_reg_rtx (SImode);
21230 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21231 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21232 len -= 4;
21233 offset += 4;
21236 if (len >= 2)
21238 rtx reg = gen_reg_rtx (HImode);
21239 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21240 plus_constant (in, offset))));
21241 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21242 reg));
21243 len -= 2;
21244 offset += 2;
21247 if (len)
21249 rtx reg = gen_reg_rtx (QImode);
21250 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21251 plus_constant (in, offset))));
21252 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21253 reg));
21257 void
21258 thumb_reload_out_hi (rtx *operands)
21260 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21263 /* Handle reading a half-word from memory during reload. */
21264 void
21265 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21267 gcc_unreachable ();
21270 /* Return the length of a function name prefix
21271 that starts with the character 'c'. */
21272 static int
21273 arm_get_strip_length (int c)
21275 switch (c)
21277 ARM_NAME_ENCODING_LENGTHS
21278 default: return 0;
21282 /* Return a pointer to a function's name with any
21283 and all prefix encodings stripped from it. */
21284 const char *
21285 arm_strip_name_encoding (const char *name)
21287 int skip;
21289 while ((skip = arm_get_strip_length (* name)))
21290 name += skip;
21292 return name;
21295 /* If there is a '*' anywhere in the name's prefix, then
21296 emit the stripped name verbatim, otherwise prepend an
21297 underscore if leading underscores are being used. */
21298 void
21299 arm_asm_output_labelref (FILE *stream, const char *name)
21301 int skip;
21302 int verbatim = 0;
21304 while ((skip = arm_get_strip_length (* name)))
21306 verbatim |= (*name == '*');
21307 name += skip;
21310 if (verbatim)
21311 fputs (name, stream);
21312 else
21313 asm_fprintf (stream, "%U%s", name);
21316 static void
21317 arm_file_start (void)
21319 int val;
21321 if (TARGET_UNIFIED_ASM)
21322 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21324 if (TARGET_BPABI)
21326 const char *fpu_name;
21327 if (arm_selected_arch)
21328 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21329 else
21330 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21332 if (TARGET_SOFT_FLOAT)
21334 if (TARGET_VFP)
21335 fpu_name = "softvfp";
21336 else
21337 fpu_name = "softfpa";
21339 else
21341 fpu_name = arm_fpu_desc->name;
21342 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21344 if (TARGET_HARD_FLOAT)
21345 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21346 if (TARGET_HARD_FLOAT_ABI)
21347 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21350 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21352 /* Some of these attributes only apply when the corresponding features
21353 are used. However we don't have any easy way of figuring this out.
21354 Conservatively record the setting that would have been used. */
21356 /* Tag_ABI_FP_rounding. */
21357 if (flag_rounding_math)
21358 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21359 if (!flag_unsafe_math_optimizations)
21361 /* Tag_ABI_FP_denomal. */
21362 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21363 /* Tag_ABI_FP_exceptions. */
21364 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21366 /* Tag_ABI_FP_user_exceptions. */
21367 if (flag_signaling_nans)
21368 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21369 /* Tag_ABI_FP_number_model. */
21370 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21371 flag_finite_math_only ? 1 : 3);
21373 /* Tag_ABI_align8_needed. */
21374 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21375 /* Tag_ABI_align8_preserved. */
21376 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21377 /* Tag_ABI_enum_size. */
21378 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21379 flag_short_enums ? 1 : 2);
21381 /* Tag_ABI_optimization_goals. */
21382 if (optimize_size)
21383 val = 4;
21384 else if (optimize >= 2)
21385 val = 2;
21386 else if (optimize)
21387 val = 1;
21388 else
21389 val = 6;
21390 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21392 /* Tag_ABI_FP_16bit_format. */
21393 if (arm_fp16_format)
21394 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21395 (int)arm_fp16_format);
21397 if (arm_lang_output_object_attributes_hook)
21398 arm_lang_output_object_attributes_hook();
21400 default_file_start();
21403 static void
21404 arm_file_end (void)
21406 int regno;
21408 if (NEED_INDICATE_EXEC_STACK)
21409 /* Add .note.GNU-stack. */
21410 file_end_indicate_exec_stack ();
21412 if (! thumb_call_reg_needed)
21413 return;
21415 switch_to_section (text_section);
21416 asm_fprintf (asm_out_file, "\t.code 16\n");
21417 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21419 for (regno = 0; regno < LR_REGNUM; regno++)
21421 rtx label = thumb_call_via_label[regno];
21423 if (label != 0)
21425 targetm.asm_out.internal_label (asm_out_file, "L",
21426 CODE_LABEL_NUMBER (label));
21427 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21432 #ifndef ARM_PE
21433 /* Symbols in the text segment can be accessed without indirecting via the
21434 constant pool; it may take an extra binary operation, but this is still
21435 faster than indirecting via memory. Don't do this when not optimizing,
21436 since we won't be calculating al of the offsets necessary to do this
21437 simplification. */
21439 static void
21440 arm_encode_section_info (tree decl, rtx rtl, int first)
21442 if (optimize > 0 && TREE_CONSTANT (decl))
21443 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21445 default_encode_section_info (decl, rtl, first);
21447 #endif /* !ARM_PE */
21449 static void
21450 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21452 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21453 && !strcmp (prefix, "L"))
21455 arm_ccfsm_state = 0;
21456 arm_target_insn = NULL;
21458 default_internal_label (stream, prefix, labelno);
21461 /* Output code to add DELTA to the first argument, and then jump
21462 to FUNCTION. Used for C++ multiple inheritance. */
21463 static void
21464 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21465 HOST_WIDE_INT delta,
21466 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21467 tree function)
21469 static int thunk_label = 0;
21470 char label[256];
21471 char labelpc[256];
21472 int mi_delta = delta;
21473 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21474 int shift = 0;
21475 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21476 ? 1 : 0);
21477 if (mi_delta < 0)
21478 mi_delta = - mi_delta;
21480 if (TARGET_THUMB1)
21482 int labelno = thunk_label++;
21483 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21484 /* Thunks are entered in arm mode when avaiable. */
21485 if (TARGET_THUMB1_ONLY)
21487 /* push r3 so we can use it as a temporary. */
21488 /* TODO: Omit this save if r3 is not used. */
21489 fputs ("\tpush {r3}\n", file);
21490 fputs ("\tldr\tr3, ", file);
21492 else
21494 fputs ("\tldr\tr12, ", file);
21496 assemble_name (file, label);
21497 fputc ('\n', file);
21498 if (flag_pic)
21500 /* If we are generating PIC, the ldr instruction below loads
21501 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21502 the address of the add + 8, so we have:
21504 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21505 = target + 1.
21507 Note that we have "+ 1" because some versions of GNU ld
21508 don't set the low bit of the result for R_ARM_REL32
21509 relocations against thumb function symbols.
21510 On ARMv6M this is +4, not +8. */
21511 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21512 assemble_name (file, labelpc);
21513 fputs (":\n", file);
21514 if (TARGET_THUMB1_ONLY)
21516 /* This is 2 insns after the start of the thunk, so we know it
21517 is 4-byte aligned. */
21518 fputs ("\tadd\tr3, pc, r3\n", file);
21519 fputs ("\tmov r12, r3\n", file);
21521 else
21522 fputs ("\tadd\tr12, pc, r12\n", file);
21524 else if (TARGET_THUMB1_ONLY)
21525 fputs ("\tmov r12, r3\n", file);
21527 if (TARGET_THUMB1_ONLY)
21529 if (mi_delta > 255)
21531 fputs ("\tldr\tr3, ", file);
21532 assemble_name (file, label);
21533 fputs ("+4\n", file);
21534 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21535 mi_op, this_regno, this_regno);
21537 else if (mi_delta != 0)
21539 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21540 mi_op, this_regno, this_regno,
21541 mi_delta);
21544 else
21546 /* TODO: Use movw/movt for large constants when available. */
21547 while (mi_delta != 0)
21549 if ((mi_delta & (3 << shift)) == 0)
21550 shift += 2;
21551 else
21553 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21554 mi_op, this_regno, this_regno,
21555 mi_delta & (0xff << shift));
21556 mi_delta &= ~(0xff << shift);
21557 shift += 8;
21561 if (TARGET_THUMB1)
21563 if (TARGET_THUMB1_ONLY)
21564 fputs ("\tpop\t{r3}\n", file);
21566 fprintf (file, "\tbx\tr12\n");
21567 ASM_OUTPUT_ALIGN (file, 2);
21568 assemble_name (file, label);
21569 fputs (":\n", file);
21570 if (flag_pic)
21572 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21573 rtx tem = XEXP (DECL_RTL (function), 0);
21574 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21575 tem = gen_rtx_MINUS (GET_MODE (tem),
21576 tem,
21577 gen_rtx_SYMBOL_REF (Pmode,
21578 ggc_strdup (labelpc)));
21579 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21581 else
21582 /* Output ".word .LTHUNKn". */
21583 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21585 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21586 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21588 else
21590 fputs ("\tb\t", file);
21591 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21592 if (NEED_PLT_RELOC)
21593 fputs ("(PLT)", file);
21594 fputc ('\n', file);
21599 arm_emit_vector_const (FILE *file, rtx x)
21601 int i;
21602 const char * pattern;
21604 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21606 switch (GET_MODE (x))
21608 case V2SImode: pattern = "%08x"; break;
21609 case V4HImode: pattern = "%04x"; break;
21610 case V8QImode: pattern = "%02x"; break;
21611 default: gcc_unreachable ();
21614 fprintf (file, "0x");
21615 for (i = CONST_VECTOR_NUNITS (x); i--;)
21617 rtx element;
21619 element = CONST_VECTOR_ELT (x, i);
21620 fprintf (file, pattern, INTVAL (element));
21623 return 1;
21626 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21627 HFmode constant pool entries are actually loaded with ldr. */
21628 void
21629 arm_emit_fp16_const (rtx c)
21631 REAL_VALUE_TYPE r;
21632 long bits;
21634 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21635 bits = real_to_target (NULL, &r, HFmode);
21636 if (WORDS_BIG_ENDIAN)
21637 assemble_zeros (2);
21638 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21639 if (!WORDS_BIG_ENDIAN)
21640 assemble_zeros (2);
21643 const char *
21644 arm_output_load_gr (rtx *operands)
21646 rtx reg;
21647 rtx offset;
21648 rtx wcgr;
21649 rtx sum;
21651 if (GET_CODE (operands [1]) != MEM
21652 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21653 || GET_CODE (reg = XEXP (sum, 0)) != REG
21654 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21655 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21656 return "wldrw%?\t%0, %1";
21658 /* Fix up an out-of-range load of a GR register. */
21659 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21660 wcgr = operands[0];
21661 operands[0] = reg;
21662 output_asm_insn ("ldr%?\t%0, %1", operands);
21664 operands[0] = wcgr;
21665 operands[1] = reg;
21666 output_asm_insn ("tmcr%?\t%0, %1", operands);
21667 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21669 return "";
21672 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21674 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21675 named arg and all anonymous args onto the stack.
21676 XXX I know the prologue shouldn't be pushing registers, but it is faster
21677 that way. */
21679 static void
21680 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21681 enum machine_mode mode,
21682 tree type,
21683 int *pretend_size,
21684 int second_time ATTRIBUTE_UNUSED)
21686 int nregs;
21688 cfun->machine->uses_anonymous_args = 1;
21689 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21691 nregs = pcum->aapcs_ncrn;
21692 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21693 nregs++;
21695 else
21696 nregs = pcum->nregs;
21698 if (nregs < NUM_ARG_REGS)
21699 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21702 /* Return nonzero if the CONSUMER instruction (a store) does not need
21703 PRODUCER's value to calculate the address. */
21706 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21708 rtx value = PATTERN (producer);
21709 rtx addr = PATTERN (consumer);
21711 if (GET_CODE (value) == COND_EXEC)
21712 value = COND_EXEC_CODE (value);
21713 if (GET_CODE (value) == PARALLEL)
21714 value = XVECEXP (value, 0, 0);
21715 value = XEXP (value, 0);
21716 if (GET_CODE (addr) == COND_EXEC)
21717 addr = COND_EXEC_CODE (addr);
21718 if (GET_CODE (addr) == PARALLEL)
21719 addr = XVECEXP (addr, 0, 0);
21720 addr = XEXP (addr, 0);
21722 return !reg_overlap_mentioned_p (value, addr);
21725 /* Return nonzero if the CONSUMER instruction (a store) does need
21726 PRODUCER's value to calculate the address. */
21729 arm_early_store_addr_dep (rtx producer, rtx consumer)
21731 return !arm_no_early_store_addr_dep (producer, consumer);
21734 /* Return nonzero if the CONSUMER instruction (a load) does need
21735 PRODUCER's value to calculate the address. */
21738 arm_early_load_addr_dep (rtx producer, rtx consumer)
21740 rtx value = PATTERN (producer);
21741 rtx addr = PATTERN (consumer);
21743 if (GET_CODE (value) == COND_EXEC)
21744 value = COND_EXEC_CODE (value);
21745 if (GET_CODE (value) == PARALLEL)
21746 value = XVECEXP (value, 0, 0);
21747 value = XEXP (value, 0);
21748 if (GET_CODE (addr) == COND_EXEC)
21749 addr = COND_EXEC_CODE (addr);
21750 if (GET_CODE (addr) == PARALLEL)
21751 addr = XVECEXP (addr, 0, 0);
21752 addr = XEXP (addr, 1);
21754 return reg_overlap_mentioned_p (value, addr);
21757 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21758 have an early register shift value or amount dependency on the
21759 result of PRODUCER. */
21762 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21764 rtx value = PATTERN (producer);
21765 rtx op = PATTERN (consumer);
21766 rtx early_op;
21768 if (GET_CODE (value) == COND_EXEC)
21769 value = COND_EXEC_CODE (value);
21770 if (GET_CODE (value) == PARALLEL)
21771 value = XVECEXP (value, 0, 0);
21772 value = XEXP (value, 0);
21773 if (GET_CODE (op) == COND_EXEC)
21774 op = COND_EXEC_CODE (op);
21775 if (GET_CODE (op) == PARALLEL)
21776 op = XVECEXP (op, 0, 0);
21777 op = XEXP (op, 1);
21779 early_op = XEXP (op, 0);
21780 /* This is either an actual independent shift, or a shift applied to
21781 the first operand of another operation. We want the whole shift
21782 operation. */
21783 if (GET_CODE (early_op) == REG)
21784 early_op = op;
21786 return !reg_overlap_mentioned_p (value, early_op);
21789 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21790 have an early register shift value dependency on the result of
21791 PRODUCER. */
21794 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21796 rtx value = PATTERN (producer);
21797 rtx op = PATTERN (consumer);
21798 rtx early_op;
21800 if (GET_CODE (value) == COND_EXEC)
21801 value = COND_EXEC_CODE (value);
21802 if (GET_CODE (value) == PARALLEL)
21803 value = XVECEXP (value, 0, 0);
21804 value = XEXP (value, 0);
21805 if (GET_CODE (op) == COND_EXEC)
21806 op = COND_EXEC_CODE (op);
21807 if (GET_CODE (op) == PARALLEL)
21808 op = XVECEXP (op, 0, 0);
21809 op = XEXP (op, 1);
21811 early_op = XEXP (op, 0);
21813 /* This is either an actual independent shift, or a shift applied to
21814 the first operand of another operation. We want the value being
21815 shifted, in either case. */
21816 if (GET_CODE (early_op) != REG)
21817 early_op = XEXP (early_op, 0);
21819 return !reg_overlap_mentioned_p (value, early_op);
21822 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21823 have an early register mult dependency on the result of
21824 PRODUCER. */
21827 arm_no_early_mul_dep (rtx producer, rtx consumer)
21829 rtx value = PATTERN (producer);
21830 rtx op = PATTERN (consumer);
21832 if (GET_CODE (value) == COND_EXEC)
21833 value = COND_EXEC_CODE (value);
21834 if (GET_CODE (value) == PARALLEL)
21835 value = XVECEXP (value, 0, 0);
21836 value = XEXP (value, 0);
21837 if (GET_CODE (op) == COND_EXEC)
21838 op = COND_EXEC_CODE (op);
21839 if (GET_CODE (op) == PARALLEL)
21840 op = XVECEXP (op, 0, 0);
21841 op = XEXP (op, 1);
21843 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21845 if (GET_CODE (XEXP (op, 0)) == MULT)
21846 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21847 else
21848 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21851 return 0;
21854 /* We can't rely on the caller doing the proper promotion when
21855 using APCS or ATPCS. */
21857 static bool
21858 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21860 return !TARGET_AAPCS_BASED;
21863 static enum machine_mode
21864 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21865 enum machine_mode mode,
21866 int *punsignedp ATTRIBUTE_UNUSED,
21867 const_tree fntype ATTRIBUTE_UNUSED,
21868 int for_return ATTRIBUTE_UNUSED)
21870 if (GET_MODE_CLASS (mode) == MODE_INT
21871 && GET_MODE_SIZE (mode) < 4)
21872 return SImode;
21874 return mode;
21877 /* AAPCS based ABIs use short enums by default. */
21879 static bool
21880 arm_default_short_enums (void)
21882 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21886 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21888 static bool
21889 arm_align_anon_bitfield (void)
21891 return TARGET_AAPCS_BASED;
21895 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21897 static tree
21898 arm_cxx_guard_type (void)
21900 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21903 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21904 has an accumulator dependency on the result of the producer (a
21905 multiplication instruction) and no other dependency on that result. */
21907 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21909 rtx mul = PATTERN (producer);
21910 rtx mac = PATTERN (consumer);
21911 rtx mul_result;
21912 rtx mac_op0, mac_op1, mac_acc;
21914 if (GET_CODE (mul) == COND_EXEC)
21915 mul = COND_EXEC_CODE (mul);
21916 if (GET_CODE (mac) == COND_EXEC)
21917 mac = COND_EXEC_CODE (mac);
21919 /* Check that mul is of the form (set (...) (mult ...))
21920 and mla is of the form (set (...) (plus (mult ...) (...))). */
21921 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21922 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21923 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21924 return 0;
21926 mul_result = XEXP (mul, 0);
21927 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21928 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21929 mac_acc = XEXP (XEXP (mac, 1), 1);
21931 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21932 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21933 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21937 /* The EABI says test the least significant bit of a guard variable. */
21939 static bool
21940 arm_cxx_guard_mask_bit (void)
21942 return TARGET_AAPCS_BASED;
21946 /* The EABI specifies that all array cookies are 8 bytes long. */
21948 static tree
21949 arm_get_cookie_size (tree type)
21951 tree size;
21953 if (!TARGET_AAPCS_BASED)
21954 return default_cxx_get_cookie_size (type);
21956 size = build_int_cst (sizetype, 8);
21957 return size;
21961 /* The EABI says that array cookies should also contain the element size. */
21963 static bool
21964 arm_cookie_has_size (void)
21966 return TARGET_AAPCS_BASED;
21970 /* The EABI says constructors and destructors should return a pointer to
21971 the object constructed/destroyed. */
21973 static bool
21974 arm_cxx_cdtor_returns_this (void)
21976 return TARGET_AAPCS_BASED;
21979 /* The EABI says that an inline function may never be the key
21980 method. */
21982 static bool
21983 arm_cxx_key_method_may_be_inline (void)
21985 return !TARGET_AAPCS_BASED;
21988 static void
21989 arm_cxx_determine_class_data_visibility (tree decl)
21991 if (!TARGET_AAPCS_BASED
21992 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21993 return;
21995 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21996 is exported. However, on systems without dynamic vague linkage,
21997 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21998 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21999 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22000 else
22001 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22002 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22005 static bool
22006 arm_cxx_class_data_always_comdat (void)
22008 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22009 vague linkage if the class has no key function. */
22010 return !TARGET_AAPCS_BASED;
22014 /* The EABI says __aeabi_atexit should be used to register static
22015 destructors. */
22017 static bool
22018 arm_cxx_use_aeabi_atexit (void)
22020 return TARGET_AAPCS_BASED;
22024 void
22025 arm_set_return_address (rtx source, rtx scratch)
22027 arm_stack_offsets *offsets;
22028 HOST_WIDE_INT delta;
22029 rtx addr;
22030 unsigned long saved_regs;
22032 offsets = arm_get_frame_offsets ();
22033 saved_regs = offsets->saved_regs_mask;
22035 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22036 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22037 else
22039 if (frame_pointer_needed)
22040 addr = plus_constant(hard_frame_pointer_rtx, -4);
22041 else
22043 /* LR will be the first saved register. */
22044 delta = offsets->outgoing_args - (offsets->frame + 4);
22047 if (delta >= 4096)
22049 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22050 GEN_INT (delta & ~4095)));
22051 addr = scratch;
22052 delta &= 4095;
22054 else
22055 addr = stack_pointer_rtx;
22057 addr = plus_constant (addr, delta);
22059 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22064 void
22065 thumb_set_return_address (rtx source, rtx scratch)
22067 arm_stack_offsets *offsets;
22068 HOST_WIDE_INT delta;
22069 HOST_WIDE_INT limit;
22070 int reg;
22071 rtx addr;
22072 unsigned long mask;
22074 emit_use (source);
22076 offsets = arm_get_frame_offsets ();
22077 mask = offsets->saved_regs_mask;
22078 if (mask & (1 << LR_REGNUM))
22080 limit = 1024;
22081 /* Find the saved regs. */
22082 if (frame_pointer_needed)
22084 delta = offsets->soft_frame - offsets->saved_args;
22085 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22086 if (TARGET_THUMB1)
22087 limit = 128;
22089 else
22091 delta = offsets->outgoing_args - offsets->saved_args;
22092 reg = SP_REGNUM;
22094 /* Allow for the stack frame. */
22095 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22096 delta -= 16;
22097 /* The link register is always the first saved register. */
22098 delta -= 4;
22100 /* Construct the address. */
22101 addr = gen_rtx_REG (SImode, reg);
22102 if (delta > limit)
22104 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22105 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22106 addr = scratch;
22108 else
22109 addr = plus_constant (addr, delta);
22111 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22113 else
22114 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22117 /* Implements target hook vector_mode_supported_p. */
22118 bool
22119 arm_vector_mode_supported_p (enum machine_mode mode)
22121 /* Neon also supports V2SImode, etc. listed in the clause below. */
22122 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22123 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22124 return true;
22126 if ((TARGET_NEON || TARGET_IWMMXT)
22127 && ((mode == V2SImode)
22128 || (mode == V4HImode)
22129 || (mode == V8QImode)))
22130 return true;
22132 return false;
22135 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22136 registers when autovectorizing for Neon, at least until multiple vector
22137 widths are supported properly by the middle-end. */
22139 static enum machine_mode
22140 arm_preferred_simd_mode (enum machine_mode mode)
22142 if (TARGET_NEON)
22143 switch (mode)
22145 case SFmode:
22146 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22147 case SImode:
22148 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22149 case HImode:
22150 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22151 case QImode:
22152 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22153 case DImode:
22154 if (TARGET_NEON_VECTORIZE_QUAD)
22155 return V2DImode;
22156 break;
22158 default:;
22161 if (TARGET_REALLY_IWMMXT)
22162 switch (mode)
22164 case SImode:
22165 return V2SImode;
22166 case HImode:
22167 return V4HImode;
22168 case QImode:
22169 return V8QImode;
22171 default:;
22174 return word_mode;
22177 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22179 We need to define this for LO_REGS on thumb. Otherwise we can end up
22180 using r0-r4 for function arguments, r7 for the stack frame and don't
22181 have enough left over to do doubleword arithmetic. */
22183 static bool
22184 arm_class_likely_spilled_p (reg_class_t rclass)
22186 if ((TARGET_THUMB && rclass == LO_REGS)
22187 || rclass == CC_REG)
22188 return true;
22190 return false;
22193 /* Implements target hook small_register_classes_for_mode_p. */
22194 bool
22195 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22197 return TARGET_THUMB1;
22200 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22201 ARM insns and therefore guarantee that the shift count is modulo 256.
22202 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22203 guarantee no particular behavior for out-of-range counts. */
22205 static unsigned HOST_WIDE_INT
22206 arm_shift_truncation_mask (enum machine_mode mode)
22208 return mode == SImode ? 255 : 0;
22212 /* Map internal gcc register numbers to DWARF2 register numbers. */
22214 unsigned int
22215 arm_dbx_register_number (unsigned int regno)
22217 if (regno < 16)
22218 return regno;
22220 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22221 compatibility. The EABI defines them as registers 96-103. */
22222 if (IS_FPA_REGNUM (regno))
22223 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22225 if (IS_VFP_REGNUM (regno))
22227 /* See comment in arm_dwarf_register_span. */
22228 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22229 return 64 + regno - FIRST_VFP_REGNUM;
22230 else
22231 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22234 if (IS_IWMMXT_GR_REGNUM (regno))
22235 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22237 if (IS_IWMMXT_REGNUM (regno))
22238 return 112 + regno - FIRST_IWMMXT_REGNUM;
22240 gcc_unreachable ();
22243 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22244 GCC models tham as 64 32-bit registers, so we need to describe this to
22245 the DWARF generation code. Other registers can use the default. */
22246 static rtx
22247 arm_dwarf_register_span (rtx rtl)
22249 unsigned regno;
22250 int nregs;
22251 int i;
22252 rtx p;
22254 regno = REGNO (rtl);
22255 if (!IS_VFP_REGNUM (regno))
22256 return NULL_RTX;
22258 /* XXX FIXME: The EABI defines two VFP register ranges:
22259 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22260 256-287: D0-D31
22261 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22262 corresponding D register. Until GDB supports this, we shall use the
22263 legacy encodings. We also use these encodings for D0-D15 for
22264 compatibility with older debuggers. */
22265 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22266 return NULL_RTX;
22268 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22269 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22270 regno = (regno - FIRST_VFP_REGNUM) / 2;
22271 for (i = 0; i < nregs; i++)
22272 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22274 return p;
22277 #if ARM_UNWIND_INFO
22278 /* Emit unwind directives for a store-multiple instruction or stack pointer
22279 push during alignment.
22280 These should only ever be generated by the function prologue code, so
22281 expect them to have a particular form. */
22283 static void
22284 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22286 int i;
22287 HOST_WIDE_INT offset;
22288 HOST_WIDE_INT nregs;
22289 int reg_size;
22290 unsigned reg;
22291 unsigned lastreg;
22292 rtx e;
22294 e = XVECEXP (p, 0, 0);
22295 if (GET_CODE (e) != SET)
22296 abort ();
22298 /* First insn will adjust the stack pointer. */
22299 if (GET_CODE (e) != SET
22300 || GET_CODE (XEXP (e, 0)) != REG
22301 || REGNO (XEXP (e, 0)) != SP_REGNUM
22302 || GET_CODE (XEXP (e, 1)) != PLUS)
22303 abort ();
22305 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22306 nregs = XVECLEN (p, 0) - 1;
22308 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22309 if (reg < 16)
22311 /* The function prologue may also push pc, but not annotate it as it is
22312 never restored. We turn this into a stack pointer adjustment. */
22313 if (nregs * 4 == offset - 4)
22315 fprintf (asm_out_file, "\t.pad #4\n");
22316 offset -= 4;
22318 reg_size = 4;
22319 fprintf (asm_out_file, "\t.save {");
22321 else if (IS_VFP_REGNUM (reg))
22323 reg_size = 8;
22324 fprintf (asm_out_file, "\t.vsave {");
22326 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22328 /* FPA registers are done differently. */
22329 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22330 return;
22332 else
22333 /* Unknown register type. */
22334 abort ();
22336 /* If the stack increment doesn't match the size of the saved registers,
22337 something has gone horribly wrong. */
22338 if (offset != nregs * reg_size)
22339 abort ();
22341 offset = 0;
22342 lastreg = 0;
22343 /* The remaining insns will describe the stores. */
22344 for (i = 1; i <= nregs; i++)
22346 /* Expect (set (mem <addr>) (reg)).
22347 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22348 e = XVECEXP (p, 0, i);
22349 if (GET_CODE (e) != SET
22350 || GET_CODE (XEXP (e, 0)) != MEM
22351 || GET_CODE (XEXP (e, 1)) != REG)
22352 abort ();
22354 reg = REGNO (XEXP (e, 1));
22355 if (reg < lastreg)
22356 abort ();
22358 if (i != 1)
22359 fprintf (asm_out_file, ", ");
22360 /* We can't use %r for vfp because we need to use the
22361 double precision register names. */
22362 if (IS_VFP_REGNUM (reg))
22363 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22364 else
22365 asm_fprintf (asm_out_file, "%r", reg);
22367 #ifdef ENABLE_CHECKING
22368 /* Check that the addresses are consecutive. */
22369 e = XEXP (XEXP (e, 0), 0);
22370 if (GET_CODE (e) == PLUS)
22372 offset += reg_size;
22373 if (GET_CODE (XEXP (e, 0)) != REG
22374 || REGNO (XEXP (e, 0)) != SP_REGNUM
22375 || GET_CODE (XEXP (e, 1)) != CONST_INT
22376 || offset != INTVAL (XEXP (e, 1)))
22377 abort ();
22379 else if (i != 1
22380 || GET_CODE (e) != REG
22381 || REGNO (e) != SP_REGNUM)
22382 abort ();
22383 #endif
22385 fprintf (asm_out_file, "}\n");
22388 /* Emit unwind directives for a SET. */
22390 static void
22391 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22393 rtx e0;
22394 rtx e1;
22395 unsigned reg;
22397 e0 = XEXP (p, 0);
22398 e1 = XEXP (p, 1);
22399 switch (GET_CODE (e0))
22401 case MEM:
22402 /* Pushing a single register. */
22403 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22404 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22405 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22406 abort ();
22408 asm_fprintf (asm_out_file, "\t.save ");
22409 if (IS_VFP_REGNUM (REGNO (e1)))
22410 asm_fprintf(asm_out_file, "{d%d}\n",
22411 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22412 else
22413 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22414 break;
22416 case REG:
22417 if (REGNO (e0) == SP_REGNUM)
22419 /* A stack increment. */
22420 if (GET_CODE (e1) != PLUS
22421 || GET_CODE (XEXP (e1, 0)) != REG
22422 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22423 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22424 abort ();
22426 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22427 -INTVAL (XEXP (e1, 1)));
22429 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22431 HOST_WIDE_INT offset;
22433 if (GET_CODE (e1) == PLUS)
22435 if (GET_CODE (XEXP (e1, 0)) != REG
22436 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22437 abort ();
22438 reg = REGNO (XEXP (e1, 0));
22439 offset = INTVAL (XEXP (e1, 1));
22440 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22441 HARD_FRAME_POINTER_REGNUM, reg,
22442 offset);
22444 else if (GET_CODE (e1) == REG)
22446 reg = REGNO (e1);
22447 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22448 HARD_FRAME_POINTER_REGNUM, reg);
22450 else
22451 abort ();
22453 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22455 /* Move from sp to reg. */
22456 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22458 else if (GET_CODE (e1) == PLUS
22459 && GET_CODE (XEXP (e1, 0)) == REG
22460 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22461 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22463 /* Set reg to offset from sp. */
22464 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22465 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22467 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22469 /* Stack pointer save before alignment. */
22470 reg = REGNO (e0);
22471 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22472 reg + 0x90, reg);
22474 else
22475 abort ();
22476 break;
22478 default:
22479 abort ();
22484 /* Emit unwind directives for the given insn. */
22486 static void
22487 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22489 rtx pat;
22491 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22492 return;
22494 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22495 && (TREE_NOTHROW (current_function_decl)
22496 || crtl->all_throwers_are_sibcalls))
22497 return;
22499 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22500 return;
22502 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22503 if (pat)
22504 pat = XEXP (pat, 0);
22505 else
22506 pat = PATTERN (insn);
22508 switch (GET_CODE (pat))
22510 case SET:
22511 arm_unwind_emit_set (asm_out_file, pat);
22512 break;
22514 case SEQUENCE:
22515 /* Store multiple. */
22516 arm_unwind_emit_sequence (asm_out_file, pat);
22517 break;
22519 default:
22520 abort();
22525 /* Output a reference from a function exception table to the type_info
22526 object X. The EABI specifies that the symbol should be relocated by
22527 an R_ARM_TARGET2 relocation. */
22529 static bool
22530 arm_output_ttype (rtx x)
22532 fputs ("\t.word\t", asm_out_file);
22533 output_addr_const (asm_out_file, x);
22534 /* Use special relocations for symbol references. */
22535 if (GET_CODE (x) != CONST_INT)
22536 fputs ("(TARGET2)", asm_out_file);
22537 fputc ('\n', asm_out_file);
22539 return TRUE;
22542 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22544 static void
22545 arm_asm_emit_except_personality (rtx personality)
22547 fputs ("\t.personality\t", asm_out_file);
22548 output_addr_const (asm_out_file, personality);
22549 fputc ('\n', asm_out_file);
22552 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22554 static void
22555 arm_asm_init_sections (void)
22557 exception_section = get_unnamed_section (0, output_section_asm_op,
22558 "\t.handlerdata");
22560 #endif /* ARM_UNWIND_INFO */
22562 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22564 static enum unwind_info_type
22565 arm_except_unwind_info (struct gcc_options *opts)
22567 /* Honor the --enable-sjlj-exceptions configure switch. */
22568 #ifdef CONFIG_SJLJ_EXCEPTIONS
22569 if (CONFIG_SJLJ_EXCEPTIONS)
22570 return UI_SJLJ;
22571 #endif
22573 /* If not using ARM EABI unwind tables... */
22574 if (ARM_UNWIND_INFO)
22576 /* For simplicity elsewhere in this file, indicate that all unwind
22577 info is disabled if we're not emitting unwind tables. */
22578 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22579 return UI_NONE;
22580 else
22581 return UI_TARGET;
22584 /* ... we use sjlj exceptions for backwards compatibility. */
22585 return UI_SJLJ;
22589 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22590 stack alignment. */
22592 static void
22593 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22595 rtx unspec = SET_SRC (pattern);
22596 gcc_assert (GET_CODE (unspec) == UNSPEC);
22598 switch (index)
22600 case UNSPEC_STACK_ALIGN:
22601 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22602 put anything on the stack, so hopefully it won't matter.
22603 CFA = SP will be correct after alignment. */
22604 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22605 SET_DEST (pattern));
22606 break;
22607 default:
22608 gcc_unreachable ();
22613 /* Output unwind directives for the start/end of a function. */
22615 void
22616 arm_output_fn_unwind (FILE * f, bool prologue)
22618 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22619 return;
22621 if (prologue)
22622 fputs ("\t.fnstart\n", f);
22623 else
22625 /* If this function will never be unwound, then mark it as such.
22626 The came condition is used in arm_unwind_emit to suppress
22627 the frame annotations. */
22628 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22629 && (TREE_NOTHROW (current_function_decl)
22630 || crtl->all_throwers_are_sibcalls))
22631 fputs("\t.cantunwind\n", f);
22633 fputs ("\t.fnend\n", f);
22637 static bool
22638 arm_emit_tls_decoration (FILE *fp, rtx x)
22640 enum tls_reloc reloc;
22641 rtx val;
22643 val = XVECEXP (x, 0, 0);
22644 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22646 output_addr_const (fp, val);
22648 switch (reloc)
22650 case TLS_GD32:
22651 fputs ("(tlsgd)", fp);
22652 break;
22653 case TLS_LDM32:
22654 fputs ("(tlsldm)", fp);
22655 break;
22656 case TLS_LDO32:
22657 fputs ("(tlsldo)", fp);
22658 break;
22659 case TLS_IE32:
22660 fputs ("(gottpoff)", fp);
22661 break;
22662 case TLS_LE32:
22663 fputs ("(tpoff)", fp);
22664 break;
22665 default:
22666 gcc_unreachable ();
22669 switch (reloc)
22671 case TLS_GD32:
22672 case TLS_LDM32:
22673 case TLS_IE32:
22674 fputs (" + (. - ", fp);
22675 output_addr_const (fp, XVECEXP (x, 0, 2));
22676 fputs (" - ", fp);
22677 output_addr_const (fp, XVECEXP (x, 0, 3));
22678 fputc (')', fp);
22679 break;
22680 default:
22681 break;
22684 return TRUE;
22687 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22689 static void
22690 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22692 gcc_assert (size == 4);
22693 fputs ("\t.word\t", file);
22694 output_addr_const (file, x);
22695 fputs ("(tlsldo)", file);
22698 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22700 static bool
22701 arm_output_addr_const_extra (FILE *fp, rtx x)
22703 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22704 return arm_emit_tls_decoration (fp, x);
22705 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22707 char label[256];
22708 int labelno = INTVAL (XVECEXP (x, 0, 0));
22710 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22711 assemble_name_raw (fp, label);
22713 return TRUE;
22715 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22717 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22718 if (GOT_PCREL)
22719 fputs ("+.", fp);
22720 fputs ("-(", fp);
22721 output_addr_const (fp, XVECEXP (x, 0, 0));
22722 fputc (')', fp);
22723 return TRUE;
22725 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22727 output_addr_const (fp, XVECEXP (x, 0, 0));
22728 if (GOT_PCREL)
22729 fputs ("+.", fp);
22730 fputs ("-(", fp);
22731 output_addr_const (fp, XVECEXP (x, 0, 1));
22732 fputc (')', fp);
22733 return TRUE;
22735 else if (GET_CODE (x) == CONST_VECTOR)
22736 return arm_emit_vector_const (fp, x);
22738 return FALSE;
22741 /* Output assembly for a shift instruction.
22742 SET_FLAGS determines how the instruction modifies the condition codes.
22743 0 - Do not set condition codes.
22744 1 - Set condition codes.
22745 2 - Use smallest instruction. */
22746 const char *
22747 arm_output_shift(rtx * operands, int set_flags)
22749 char pattern[100];
22750 static const char flag_chars[3] = {'?', '.', '!'};
22751 const char *shift;
22752 HOST_WIDE_INT val;
22753 char c;
22755 c = flag_chars[set_flags];
22756 if (TARGET_UNIFIED_ASM)
22758 shift = shift_op(operands[3], &val);
22759 if (shift)
22761 if (val != -1)
22762 operands[2] = GEN_INT(val);
22763 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22765 else
22766 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22768 else
22769 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22770 output_asm_insn (pattern, operands);
22771 return "";
22774 /* Output a Thumb-1 casesi dispatch sequence. */
22775 const char *
22776 thumb1_output_casesi (rtx *operands)
22778 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22780 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22782 switch (GET_MODE(diff_vec))
22784 case QImode:
22785 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22786 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22787 case HImode:
22788 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22789 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22790 case SImode:
22791 return "bl\t%___gnu_thumb1_case_si";
22792 default:
22793 gcc_unreachable ();
22797 /* Output a Thumb-2 casesi instruction. */
22798 const char *
22799 thumb2_output_casesi (rtx *operands)
22801 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22803 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22805 output_asm_insn ("cmp\t%0, %1", operands);
22806 output_asm_insn ("bhi\t%l3", operands);
22807 switch (GET_MODE(diff_vec))
22809 case QImode:
22810 return "tbb\t[%|pc, %0]";
22811 case HImode:
22812 return "tbh\t[%|pc, %0, lsl #1]";
22813 case SImode:
22814 if (flag_pic)
22816 output_asm_insn ("adr\t%4, %l2", operands);
22817 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22818 output_asm_insn ("add\t%4, %4, %5", operands);
22819 return "bx\t%4";
22821 else
22823 output_asm_insn ("adr\t%4, %l2", operands);
22824 return "ldr\t%|pc, [%4, %0, lsl #2]";
22826 default:
22827 gcc_unreachable ();
22831 /* Most ARM cores are single issue, but some newer ones can dual issue.
22832 The scheduler descriptions rely on this being correct. */
22833 static int
22834 arm_issue_rate (void)
22836 switch (arm_tune)
22838 case cortexr4:
22839 case cortexr4f:
22840 case cortexa5:
22841 case cortexa8:
22842 case cortexa9:
22843 return 2;
22845 default:
22846 return 1;
22850 /* A table and a function to perform ARM-specific name mangling for
22851 NEON vector types in order to conform to the AAPCS (see "Procedure
22852 Call Standard for the ARM Architecture", Appendix A). To qualify
22853 for emission with the mangled names defined in that document, a
22854 vector type must not only be of the correct mode but also be
22855 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22856 typedef struct
22858 enum machine_mode mode;
22859 const char *element_type_name;
22860 const char *aapcs_name;
22861 } arm_mangle_map_entry;
22863 static arm_mangle_map_entry arm_mangle_map[] = {
22864 /* 64-bit containerized types. */
22865 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22866 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22867 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22868 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22869 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22870 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22871 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22872 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22873 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22874 /* 128-bit containerized types. */
22875 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22876 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22877 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22878 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22879 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22880 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22881 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22882 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22883 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22884 { VOIDmode, NULL, NULL }
22887 const char *
22888 arm_mangle_type (const_tree type)
22890 arm_mangle_map_entry *pos = arm_mangle_map;
22892 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22893 has to be managled as if it is in the "std" namespace. */
22894 if (TARGET_AAPCS_BASED
22895 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22897 static bool warned;
22898 if (!warned && warn_psabi && !in_system_header)
22900 warned = true;
22901 inform (input_location,
22902 "the mangling of %<va_list%> has changed in GCC 4.4");
22904 return "St9__va_list";
22907 /* Half-precision float. */
22908 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22909 return "Dh";
22911 if (TREE_CODE (type) != VECTOR_TYPE)
22912 return NULL;
22914 /* Check the mode of the vector type, and the name of the vector
22915 element type, against the table. */
22916 while (pos->mode != VOIDmode)
22918 tree elt_type = TREE_TYPE (type);
22920 if (pos->mode == TYPE_MODE (type)
22921 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22922 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22923 pos->element_type_name))
22924 return pos->aapcs_name;
22926 pos++;
22929 /* Use the default mangling for unrecognized (possibly user-defined)
22930 vector types. */
22931 return NULL;
22934 /* Order of allocation of core registers for Thumb: this allocation is
22935 written over the corresponding initial entries of the array
22936 initialized with REG_ALLOC_ORDER. We allocate all low registers
22937 first. Saving and restoring a low register is usually cheaper than
22938 using a call-clobbered high register. */
22940 static const int thumb_core_reg_alloc_order[] =
22942 3, 2, 1, 0, 4, 5, 6, 7,
22943 14, 12, 8, 9, 10, 11, 13, 15
22946 /* Adjust register allocation order when compiling for Thumb. */
22948 void
22949 arm_order_regs_for_local_alloc (void)
22951 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22952 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22953 if (TARGET_THUMB)
22954 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22955 sizeof (thumb_core_reg_alloc_order));
22958 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22960 bool
22961 arm_frame_pointer_required (void)
22963 return (cfun->has_nonlocal_label
22964 || SUBTARGET_FRAME_POINTER_REQUIRED
22965 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22968 /* Only thumb1 can't support conditional execution, so return true if
22969 the target is not thumb1. */
22970 static bool
22971 arm_have_conditional_execution (void)
22973 return !TARGET_THUMB1;
22976 /* Legitimize a memory reference for sync primitive implemented using
22977 ldrex / strex. We currently force the form of the reference to be
22978 indirect without offset. We do not yet support the indirect offset
22979 addressing supported by some ARM targets for these
22980 instructions. */
22981 static rtx
22982 arm_legitimize_sync_memory (rtx memory)
22984 rtx addr = force_reg (Pmode, XEXP (memory, 0));
22985 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
22987 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
22988 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
22989 return legitimate_memory;
22992 /* An instruction emitter. */
22993 typedef void (* emit_f) (int label, const char *, rtx *);
22995 /* An instruction emitter that emits via the conventional
22996 output_asm_insn. */
22997 static void
22998 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23000 output_asm_insn (pattern, operands);
23003 /* Count the number of emitted synchronization instructions. */
23004 static unsigned arm_insn_count;
23006 /* An emitter that counts emitted instructions but does not actually
23007 emit instruction into the the instruction stream. */
23008 static void
23009 arm_count (int label,
23010 const char *pattern ATTRIBUTE_UNUSED,
23011 rtx *operands ATTRIBUTE_UNUSED)
23013 if (! label)
23014 ++ arm_insn_count;
23017 /* Construct a pattern using conventional output formatting and feed
23018 it to output_asm_insn. Provides a mechanism to construct the
23019 output pattern on the fly. Note the hard limit on the pattern
23020 buffer size. */
23021 static void ATTRIBUTE_PRINTF_4
23022 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23023 const char *pattern, ...)
23025 va_list ap;
23026 char buffer[256];
23028 va_start (ap, pattern);
23029 vsprintf (buffer, pattern, ap);
23030 va_end (ap);
23031 emit (label, buffer, operands);
23034 /* Emit the memory barrier instruction, if any, provided by this
23035 target to a specified emitter. */
23036 static void
23037 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23039 if (TARGET_HAVE_DMB)
23041 /* Note we issue a system level barrier. We should consider
23042 issuing a inner shareabilty zone barrier here instead, ie.
23043 "DMB ISH". */
23044 emit (0, "dmb\tsy", operands);
23045 return;
23048 if (TARGET_HAVE_DMB_MCR)
23050 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23051 return;
23054 gcc_unreachable ();
23057 /* Emit the memory barrier instruction, if any, provided by this
23058 target. */
23059 const char *
23060 arm_output_memory_barrier (rtx *operands)
23062 arm_process_output_memory_barrier (arm_emit, operands);
23063 return "";
23066 /* Helper to figure out the instruction suffix required on ldrex/strex
23067 for operations on an object of the specified mode. */
23068 static const char *
23069 arm_ldrex_suffix (enum machine_mode mode)
23071 switch (mode)
23073 case QImode: return "b";
23074 case HImode: return "h";
23075 case SImode: return "";
23076 case DImode: return "d";
23077 default:
23078 gcc_unreachable ();
23080 return "";
23083 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23084 mode. */
23085 static void
23086 arm_output_ldrex (emit_f emit,
23087 enum machine_mode mode,
23088 rtx target,
23089 rtx memory)
23091 const char *suffix = arm_ldrex_suffix (mode);
23092 rtx operands[2];
23094 operands[0] = target;
23095 operands[1] = memory;
23096 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23099 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23100 mode. */
23101 static void
23102 arm_output_strex (emit_f emit,
23103 enum machine_mode mode,
23104 const char *cc,
23105 rtx result,
23106 rtx value,
23107 rtx memory)
23109 const char *suffix = arm_ldrex_suffix (mode);
23110 rtx operands[3];
23112 operands[0] = result;
23113 operands[1] = value;
23114 operands[2] = memory;
23115 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23116 cc);
23119 /* Helper to emit a two operand instruction. */
23120 static void
23121 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23123 rtx operands[2];
23125 operands[0] = d;
23126 operands[1] = s;
23127 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23130 /* Helper to emit a three operand instruction. */
23131 static void
23132 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23134 rtx operands[3];
23136 operands[0] = d;
23137 operands[1] = a;
23138 operands[2] = b;
23139 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23142 /* Emit a load store exclusive synchronization loop.
23145 old_value = [mem]
23146 if old_value != required_value
23147 break;
23148 t1 = sync_op (old_value, new_value)
23149 [mem] = t1, t2 = [0|1]
23150 while ! t2
23152 Note:
23153 t1 == t2 is not permitted
23154 t1 == old_value is permitted
23156 required_value:
23158 RTX register or const_int representing the required old_value for
23159 the modify to continue, if NULL no comparsion is performed. */
23160 static void
23161 arm_output_sync_loop (emit_f emit,
23162 enum machine_mode mode,
23163 rtx old_value,
23164 rtx memory,
23165 rtx required_value,
23166 rtx new_value,
23167 rtx t1,
23168 rtx t2,
23169 enum attr_sync_op sync_op,
23170 int early_barrier_required)
23172 rtx operands[1];
23174 gcc_assert (t1 != t2);
23176 if (early_barrier_required)
23177 arm_process_output_memory_barrier (emit, NULL);
23179 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23181 arm_output_ldrex (emit, mode, old_value, memory);
23183 if (required_value)
23185 rtx operands[2];
23187 operands[0] = old_value;
23188 operands[1] = required_value;
23189 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23190 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23193 switch (sync_op)
23195 case SYNC_OP_ADD:
23196 arm_output_op3 (emit, "add", t1, old_value, new_value);
23197 break;
23199 case SYNC_OP_SUB:
23200 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23201 break;
23203 case SYNC_OP_IOR:
23204 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23205 break;
23207 case SYNC_OP_XOR:
23208 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23209 break;
23211 case SYNC_OP_AND:
23212 arm_output_op3 (emit,"and", t1, old_value, new_value);
23213 break;
23215 case SYNC_OP_NAND:
23216 arm_output_op3 (emit, "and", t1, old_value, new_value);
23217 arm_output_op2 (emit, "mvn", t1, t1);
23218 break;
23220 case SYNC_OP_NONE:
23221 t1 = new_value;
23222 break;
23225 arm_output_strex (emit, mode, "", t2, t1, memory);
23226 operands[0] = t2;
23227 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23228 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
23230 arm_process_output_memory_barrier (emit, NULL);
23231 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23234 static rtx
23235 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23237 if (index > 0)
23238 default_value = operands[index - 1];
23240 return default_value;
23243 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23244 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23246 /* Extract the operands for a synchroniztion instruction from the
23247 instructions attributes and emit the instruction. */
23248 static void
23249 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23251 rtx result, memory, required_value, new_value, t1, t2;
23252 int early_barrier;
23253 enum machine_mode mode;
23254 enum attr_sync_op sync_op;
23256 result = FETCH_SYNC_OPERAND(result, 0);
23257 memory = FETCH_SYNC_OPERAND(memory, 0);
23258 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23259 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23260 t1 = FETCH_SYNC_OPERAND(t1, 0);
23261 t2 = FETCH_SYNC_OPERAND(t2, 0);
23262 early_barrier =
23263 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23264 sync_op = get_attr_sync_op (insn);
23265 mode = GET_MODE (memory);
23267 arm_output_sync_loop (emit, mode, result, memory, required_value,
23268 new_value, t1, t2, sync_op, early_barrier);
23271 /* Emit a synchronization instruction loop. */
23272 const char *
23273 arm_output_sync_insn (rtx insn, rtx *operands)
23275 arm_process_output_sync_insn (arm_emit, insn, operands);
23276 return "";
23279 /* Count the number of machine instruction that will be emitted for a
23280 synchronization instruction. Note that the emitter used does not
23281 emit instructions, it just counts instructions being carefull not
23282 to count labels. */
23283 unsigned int
23284 arm_sync_loop_insns (rtx insn, rtx *operands)
23286 arm_insn_count = 0;
23287 arm_process_output_sync_insn (arm_count, insn, operands);
23288 return arm_insn_count;
23291 /* Helper to call a target sync instruction generator, dealing with
23292 the variation in operands required by the different generators. */
23293 static rtx
23294 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23295 rtx memory, rtx required_value, rtx new_value)
23297 switch (generator->op)
23299 case arm_sync_generator_omn:
23300 gcc_assert (! required_value);
23301 return generator->u.omn (old_value, memory, new_value);
23303 case arm_sync_generator_omrn:
23304 gcc_assert (required_value);
23305 return generator->u.omrn (old_value, memory, required_value, new_value);
23308 return NULL;
23311 /* Expand a synchronization loop. The synchronization loop is expanded
23312 as an opaque block of instructions in order to ensure that we do
23313 not subsequently get extraneous memory accesses inserted within the
23314 critical region. The exclusive access property of ldrex/strex is
23315 only guaranteed in there are no intervening memory accesses. */
23316 void
23317 arm_expand_sync (enum machine_mode mode,
23318 struct arm_sync_generator *generator,
23319 rtx target, rtx memory, rtx required_value, rtx new_value)
23321 if (target == NULL)
23322 target = gen_reg_rtx (mode);
23324 memory = arm_legitimize_sync_memory (memory);
23325 if (mode != SImode)
23327 rtx load_temp = gen_reg_rtx (SImode);
23329 if (required_value)
23330 required_value = convert_modes (SImode, mode, required_value, true);
23332 new_value = convert_modes (SImode, mode, new_value, true);
23333 emit_insn (arm_call_generator (generator, load_temp, memory,
23334 required_value, new_value));
23335 emit_move_insn (target, gen_lowpart (mode, load_temp));
23337 else
23339 emit_insn (arm_call_generator (generator, target, memory, required_value,
23340 new_value));
23344 static bool
23345 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23347 /* Vectors which aren't in packed structures will not be less aligned than
23348 the natural alignment of their element type, so this is safe. */
23349 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23350 return !is_packed;
23352 return default_builtin_vector_alignment_reachable (type, is_packed);
23355 static bool
23356 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23357 const_tree type, int misalignment,
23358 bool is_packed)
23360 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23362 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23364 if (is_packed)
23365 return align == 1;
23367 /* If the misalignment is unknown, we should be able to handle the access
23368 so long as it is not to a member of a packed data structure. */
23369 if (misalignment == -1)
23370 return true;
23372 /* Return true if the misalignment is a multiple of the natural alignment
23373 of the vector's element type. This is probably always going to be
23374 true in practice, since we've already established that this isn't a
23375 packed access. */
23376 return ((misalignment % align) == 0);
23379 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23380 is_packed);
23383 static void
23384 arm_conditional_register_usage (void)
23386 int regno;
23388 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23390 for (regno = FIRST_FPA_REGNUM;
23391 regno <= LAST_FPA_REGNUM; ++regno)
23392 fixed_regs[regno] = call_used_regs[regno] = 1;
23395 if (TARGET_THUMB1 && optimize_size)
23397 /* When optimizing for size on Thumb-1, it's better not
23398 to use the HI regs, because of the overhead of
23399 stacking them. */
23400 for (regno = FIRST_HI_REGNUM;
23401 regno <= LAST_HI_REGNUM; ++regno)
23402 fixed_regs[regno] = call_used_regs[regno] = 1;
23405 /* The link register can be clobbered by any branch insn,
23406 but we have no way to track that at present, so mark
23407 it as unavailable. */
23408 if (TARGET_THUMB1)
23409 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23411 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23413 if (TARGET_MAVERICK)
23415 for (regno = FIRST_FPA_REGNUM;
23416 regno <= LAST_FPA_REGNUM; ++ regno)
23417 fixed_regs[regno] = call_used_regs[regno] = 1;
23418 for (regno = FIRST_CIRRUS_FP_REGNUM;
23419 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23421 fixed_regs[regno] = 0;
23422 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23425 if (TARGET_VFP)
23427 /* VFPv3 registers are disabled when earlier VFP
23428 versions are selected due to the definition of
23429 LAST_VFP_REGNUM. */
23430 for (regno = FIRST_VFP_REGNUM;
23431 regno <= LAST_VFP_REGNUM; ++ regno)
23433 fixed_regs[regno] = 0;
23434 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23435 || regno >= FIRST_VFP_REGNUM + 32;
23440 if (TARGET_REALLY_IWMMXT)
23442 regno = FIRST_IWMMXT_GR_REGNUM;
23443 /* The 2002/10/09 revision of the XScale ABI has wCG0
23444 and wCG1 as call-preserved registers. The 2002/11/21
23445 revision changed this so that all wCG registers are
23446 scratch registers. */
23447 for (regno = FIRST_IWMMXT_GR_REGNUM;
23448 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23449 fixed_regs[regno] = 0;
23450 /* The XScale ABI has wR0 - wR9 as scratch registers,
23451 the rest as call-preserved registers. */
23452 for (regno = FIRST_IWMMXT_REGNUM;
23453 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23455 fixed_regs[regno] = 0;
23456 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23460 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23462 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23463 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23465 else if (TARGET_APCS_STACK)
23467 fixed_regs[10] = 1;
23468 call_used_regs[10] = 1;
23470 /* -mcaller-super-interworking reserves r11 for calls to
23471 _interwork_r11_call_via_rN(). Making the register global
23472 is an easy way of ensuring that it remains valid for all
23473 calls. */
23474 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23475 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23477 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23478 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23479 if (TARGET_CALLER_INTERWORKING)
23480 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23482 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23485 static reg_class_t
23486 arm_preferred_rename_class (reg_class_t rclass)
23488 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23489 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23490 and code size can be reduced. */
23491 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23492 return LO_REGS;
23493 else
23494 return NO_REGS;
23497 #include "gt-arm.h"