2011-03-06 Andrew Stubbs <ams@codesourcery.com>
[official-gcc.git] / gcc / config / arm / arm.c
blob533ad910c8f29b7b336db6b6d86aec4e16892c78
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 int, HOST_WIDE_INT);
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 rtx);
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 #endif
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int count_insns_for_constant (HOST_WIDE_INT, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree, tree);
136 static enum machine_mode arm_promote_function_mode (const_tree,
137 enum machine_mode, int *,
138 const_tree, int);
139 static bool arm_return_in_memory (const_tree, const_tree);
140 static rtx arm_function_value (const_tree, const_tree, bool);
141 static rtx arm_libcall_value (enum machine_mode, const_rtx);
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 tree);
146 static bool arm_have_conditional_execution (void);
147 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
148 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
149 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_rtx_costs (rtx, int, int, int *, bool);
154 static int arm_address_cost (rtx, bool);
155 static bool arm_memory_load_p (rtx);
156 static bool arm_cirrus_insn_p (rtx);
157 static void cirrus_reorg (rtx);
158 static void arm_init_builtins (void);
159 static void arm_init_iwmmxt_builtins (void);
160 static rtx safe_vector_operand (rtx, enum machine_mode);
161 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
162 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
163 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
164 static void emit_constant_insn (rtx cond, rtx pattern);
165 static rtx emit_set_insn (rtx, rtx);
166 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
167 tree, bool);
168 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
169 const_tree, bool);
170 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
171 const_tree, bool);
172 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
173 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
174 const_tree);
175 static int aapcs_select_return_coproc (const_tree, const_tree);
177 #ifdef OBJECT_FORMAT_ELF
178 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
179 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
180 #endif
181 #ifndef ARM_PE
182 static void arm_encode_section_info (tree, rtx, int);
183 #endif
185 static void arm_file_end (void);
186 static void arm_file_start (void);
188 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
189 tree, int *, int);
190 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
191 enum machine_mode, const_tree, bool);
192 static bool arm_promote_prototypes (const_tree);
193 static bool arm_default_short_enums (void);
194 static bool arm_align_anon_bitfield (void);
195 static bool arm_return_in_msb (const_tree);
196 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
197 static bool arm_return_in_memory (const_tree, const_tree);
198 #if ARM_UNWIND_INFO
199 static void arm_unwind_emit (FILE *, rtx);
200 static bool arm_output_ttype (rtx);
201 static void arm_asm_emit_except_personality (rtx);
202 static void arm_asm_init_sections (void);
203 #endif
204 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
205 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
206 static rtx arm_dwarf_register_span (rtx);
208 static tree arm_cxx_guard_type (void);
209 static bool arm_cxx_guard_mask_bit (void);
210 static tree arm_get_cookie_size (tree);
211 static bool arm_cookie_has_size (void);
212 static bool arm_cxx_cdtor_returns_this (void);
213 static bool arm_cxx_key_method_may_be_inline (void);
214 static void arm_cxx_determine_class_data_visibility (tree);
215 static bool arm_cxx_class_data_always_comdat (void);
216 static bool arm_cxx_use_aeabi_atexit (void);
217 static void arm_init_libfuncs (void);
218 static tree arm_build_builtin_va_list (void);
219 static void arm_expand_builtin_va_start (tree, rtx);
220 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
221 static void arm_option_override (void);
222 static bool arm_handle_option (struct gcc_options *, struct gcc_options *,
223 const struct cl_decoded_option *, location_t);
224 static void arm_target_help (void);
225 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
226 static bool arm_cannot_copy_insn_p (rtx);
227 static bool arm_tls_symbol_p (rtx x);
228 static int arm_issue_rate (void);
229 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
230 static bool arm_output_addr_const_extra (FILE *, rtx);
231 static bool arm_allocate_stack_slots_for_args (void);
232 static const char *arm_invalid_parameter_type (const_tree t);
233 static const char *arm_invalid_return_type (const_tree t);
234 static tree arm_promoted_type (const_tree t);
235 static tree arm_convert_to_type (tree type, tree expr);
236 static bool arm_scalar_mode_supported_p (enum machine_mode);
237 static bool arm_frame_pointer_required (void);
238 static bool arm_can_eliminate (const int, const int);
239 static void arm_asm_trampoline_template (FILE *);
240 static void arm_trampoline_init (rtx, tree, rtx);
241 static rtx arm_trampoline_adjust_address (rtx);
242 static rtx arm_pic_static_addr (rtx orig, rtx reg);
243 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
245 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
246 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
247 static bool arm_class_likely_spilled_p (reg_class_t);
248 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
249 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
250 const_tree type,
251 int misalignment,
252 bool is_packed);
253 static void arm_conditional_register_usage (void);
254 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
255 static unsigned int arm_autovectorize_vector_sizes (void);
258 /* Table of machine attributes. */
259 static const struct attribute_spec arm_attribute_table[] =
261 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
262 affects_type_identity } */
263 /* Function calls made to this symbol must be done indirectly, because
264 it may lie outside of the 26 bit addressing range of a normal function
265 call. */
266 { "long_call", 0, 0, false, true, true, NULL, false },
267 /* Whereas these functions are always known to reside within the 26 bit
268 addressing range. */
269 { "short_call", 0, 0, false, true, true, NULL, false },
270 /* Specify the procedure call conventions for a function. */
271 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
272 false },
273 /* Interrupt Service Routines have special prologue and epilogue requirements. */
274 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
275 false },
276 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
277 false },
278 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
279 false },
280 #ifdef ARM_PE
281 /* ARM/PE has three new attributes:
282 interfacearm - ?
283 dllexport - for exporting a function/variable that will live in a dll
284 dllimport - for importing a function/variable from a dll
286 Microsoft allows multiple declspecs in one __declspec, separating
287 them with spaces. We do NOT support this. Instead, use __declspec
288 multiple times.
290 { "dllimport", 0, 0, true, false, false, NULL, false },
291 { "dllexport", 0, 0, true, false, false, NULL, false },
292 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
293 false },
294 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
295 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
296 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
297 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
298 false },
299 #endif
300 { NULL, 0, 0, false, false, false, NULL, false }
303 /* Set default optimization options. */
304 static const struct default_options arm_option_optimization_table[] =
306 /* Enable section anchors by default at -O1 or higher. */
307 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
308 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
309 { OPT_LEVELS_NONE, 0, NULL, 0 }
312 /* Initialize the GCC target structure. */
313 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
314 #undef TARGET_MERGE_DECL_ATTRIBUTES
315 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
316 #endif
318 #undef TARGET_LEGITIMIZE_ADDRESS
319 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
321 #undef TARGET_ATTRIBUTE_TABLE
322 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
324 #undef TARGET_ASM_FILE_START
325 #define TARGET_ASM_FILE_START arm_file_start
326 #undef TARGET_ASM_FILE_END
327 #define TARGET_ASM_FILE_END arm_file_end
329 #undef TARGET_ASM_ALIGNED_SI_OP
330 #define TARGET_ASM_ALIGNED_SI_OP NULL
331 #undef TARGET_ASM_INTEGER
332 #define TARGET_ASM_INTEGER arm_assemble_integer
334 #undef TARGET_PRINT_OPERAND
335 #define TARGET_PRINT_OPERAND arm_print_operand
336 #undef TARGET_PRINT_OPERAND_ADDRESS
337 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
338 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
339 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
341 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
342 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
344 #undef TARGET_ASM_FUNCTION_PROLOGUE
345 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
347 #undef TARGET_ASM_FUNCTION_EPILOGUE
348 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
350 #undef TARGET_DEFAULT_TARGET_FLAGS
351 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
352 #undef TARGET_HANDLE_OPTION
353 #define TARGET_HANDLE_OPTION arm_handle_option
354 #undef TARGET_HELP
355 #define TARGET_HELP arm_target_help
356 #undef TARGET_OPTION_OVERRIDE
357 #define TARGET_OPTION_OVERRIDE arm_option_override
358 #undef TARGET_OPTION_OPTIMIZATION_TABLE
359 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
361 #undef TARGET_COMP_TYPE_ATTRIBUTES
362 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
364 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
365 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
367 #undef TARGET_SCHED_ADJUST_COST
368 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
370 #undef TARGET_ENCODE_SECTION_INFO
371 #ifdef ARM_PE
372 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
373 #else
374 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
375 #endif
377 #undef TARGET_STRIP_NAME_ENCODING
378 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
380 #undef TARGET_ASM_INTERNAL_LABEL
381 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
383 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
384 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
386 #undef TARGET_FUNCTION_VALUE
387 #define TARGET_FUNCTION_VALUE arm_function_value
389 #undef TARGET_LIBCALL_VALUE
390 #define TARGET_LIBCALL_VALUE arm_libcall_value
392 #undef TARGET_ASM_OUTPUT_MI_THUNK
393 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
394 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
395 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS arm_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST arm_address_cost
402 #undef TARGET_SHIFT_TRUNCATION_MASK
403 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
404 #undef TARGET_VECTOR_MODE_SUPPORTED_P
405 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
406 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
407 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
408 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
409 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
410 arm_autovectorize_vector_sizes
412 #undef TARGET_MACHINE_DEPENDENT_REORG
413 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
415 #undef TARGET_INIT_BUILTINS
416 #define TARGET_INIT_BUILTINS arm_init_builtins
417 #undef TARGET_EXPAND_BUILTIN
418 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
420 #undef TARGET_INIT_LIBFUNCS
421 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
423 #undef TARGET_PROMOTE_FUNCTION_MODE
424 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
425 #undef TARGET_PROMOTE_PROTOTYPES
426 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
427 #undef TARGET_PASS_BY_REFERENCE
428 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
429 #undef TARGET_ARG_PARTIAL_BYTES
430 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
431 #undef TARGET_FUNCTION_ARG
432 #define TARGET_FUNCTION_ARG arm_function_arg
433 #undef TARGET_FUNCTION_ARG_ADVANCE
434 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
435 #undef TARGET_FUNCTION_ARG_BOUNDARY
436 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
438 #undef TARGET_SETUP_INCOMING_VARARGS
439 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
441 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
442 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
444 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
445 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
446 #undef TARGET_TRAMPOLINE_INIT
447 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
448 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
449 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
451 #undef TARGET_DEFAULT_SHORT_ENUMS
452 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
454 #undef TARGET_ALIGN_ANON_BITFIELD
455 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
457 #undef TARGET_NARROW_VOLATILE_BITFIELD
458 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
460 #undef TARGET_CXX_GUARD_TYPE
461 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
463 #undef TARGET_CXX_GUARD_MASK_BIT
464 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
466 #undef TARGET_CXX_GET_COOKIE_SIZE
467 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
469 #undef TARGET_CXX_COOKIE_HAS_SIZE
470 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
472 #undef TARGET_CXX_CDTOR_RETURNS_THIS
473 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
475 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
476 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
478 #undef TARGET_CXX_USE_AEABI_ATEXIT
479 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
481 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
482 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
483 arm_cxx_determine_class_data_visibility
485 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
486 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
488 #undef TARGET_RETURN_IN_MSB
489 #define TARGET_RETURN_IN_MSB arm_return_in_msb
491 #undef TARGET_RETURN_IN_MEMORY
492 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
494 #undef TARGET_MUST_PASS_IN_STACK
495 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
497 #if ARM_UNWIND_INFO
498 #undef TARGET_ASM_UNWIND_EMIT
499 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
501 /* EABI unwinding tables use a different format for the typeinfo tables. */
502 #undef TARGET_ASM_TTYPE
503 #define TARGET_ASM_TTYPE arm_output_ttype
505 #undef TARGET_ARM_EABI_UNWINDER
506 #define TARGET_ARM_EABI_UNWINDER true
508 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
509 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
511 #undef TARGET_ASM_INIT_SECTIONS
512 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
513 #endif /* ARM_UNWIND_INFO */
515 #undef TARGET_EXCEPT_UNWIND_INFO
516 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
518 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
519 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
521 #undef TARGET_DWARF_REGISTER_SPAN
522 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
524 #undef TARGET_CANNOT_COPY_INSN_P
525 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
527 #ifdef HAVE_AS_TLS
528 #undef TARGET_HAVE_TLS
529 #define TARGET_HAVE_TLS true
530 #endif
532 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
533 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
535 #undef TARGET_CANNOT_FORCE_CONST_MEM
536 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
538 #undef TARGET_MAX_ANCHOR_OFFSET
539 #define TARGET_MAX_ANCHOR_OFFSET 4095
541 /* The minimum is set such that the total size of the block
542 for a particular anchor is -4088 + 1 + 4095 bytes, which is
543 divisible by eight, ensuring natural spacing of anchors. */
544 #undef TARGET_MIN_ANCHOR_OFFSET
545 #define TARGET_MIN_ANCHOR_OFFSET -4088
547 #undef TARGET_SCHED_ISSUE_RATE
548 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
550 #undef TARGET_MANGLE_TYPE
551 #define TARGET_MANGLE_TYPE arm_mangle_type
553 #undef TARGET_BUILD_BUILTIN_VA_LIST
554 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
555 #undef TARGET_EXPAND_BUILTIN_VA_START
556 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
557 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
558 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
560 #ifdef HAVE_AS_TLS
561 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
562 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
563 #endif
565 #undef TARGET_LEGITIMATE_ADDRESS_P
566 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
568 #undef TARGET_INVALID_PARAMETER_TYPE
569 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
571 #undef TARGET_INVALID_RETURN_TYPE
572 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
574 #undef TARGET_PROMOTED_TYPE
575 #define TARGET_PROMOTED_TYPE arm_promoted_type
577 #undef TARGET_CONVERT_TO_TYPE
578 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
580 #undef TARGET_SCALAR_MODE_SUPPORTED_P
581 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
583 #undef TARGET_FRAME_POINTER_REQUIRED
584 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
586 #undef TARGET_CAN_ELIMINATE
587 #define TARGET_CAN_ELIMINATE arm_can_eliminate
589 #undef TARGET_CONDITIONAL_REGISTER_USAGE
590 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
592 #undef TARGET_CLASS_LIKELY_SPILLED_P
593 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
595 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
596 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
597 arm_vector_alignment_reachable
599 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
600 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
601 arm_builtin_support_vector_misalignment
603 #undef TARGET_PREFERRED_RENAME_CLASS
604 #define TARGET_PREFERRED_RENAME_CLASS \
605 arm_preferred_rename_class
607 struct gcc_target targetm = TARGET_INITIALIZER;
609 /* Obstack for minipool constant handling. */
610 static struct obstack minipool_obstack;
611 static char * minipool_startobj;
613 /* The maximum number of insns skipped which
614 will be conditionalised if possible. */
615 static int max_insns_skipped = 5;
617 extern FILE * asm_out_file;
619 /* True if we are currently building a constant table. */
620 int making_const_table;
622 /* The processor for which instructions should be scheduled. */
623 enum processor_type arm_tune = arm_none;
625 /* The current tuning set. */
626 const struct tune_params *current_tune;
628 /* Which floating point hardware to schedule for. */
629 int arm_fpu_attr;
631 /* Which floating popint hardware to use. */
632 const struct arm_fpu_desc *arm_fpu_desc;
634 /* Whether to use floating point hardware. */
635 enum float_abi_type arm_float_abi;
637 /* Which __fp16 format to use. */
638 enum arm_fp16_format_type arm_fp16_format;
640 /* Which ABI to use. */
641 enum arm_abi_type arm_abi;
643 /* Which thread pointer model to use. */
644 enum arm_tp_type target_thread_pointer = TP_AUTO;
646 /* Used to parse -mstructure_size_boundary command line option. */
647 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
649 /* Used for Thumb call_via trampolines. */
650 rtx thumb_call_via_label[14];
651 static int thumb_call_reg_needed;
653 /* Bit values used to identify processor capabilities. */
654 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
655 #define FL_ARCH3M (1 << 1) /* Extended multiply */
656 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
657 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
658 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
659 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
660 #define FL_THUMB (1 << 6) /* Thumb aware */
661 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
662 #define FL_STRONG (1 << 8) /* StrongARM */
663 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
664 #define FL_XSCALE (1 << 10) /* XScale */
665 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
666 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
667 media instructions. */
668 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
669 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
670 Note: ARM6 & 7 derivatives only. */
671 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
672 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
673 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
674 profile. */
675 #define FL_DIV (1 << 18) /* Hardware divide. */
676 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
677 #define FL_NEON (1 << 20) /* Neon instructions. */
678 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
679 architecture. */
680 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
682 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
684 /* Flags that only effect tuning, not available instructions. */
685 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
686 | FL_CO_PROC)
688 #define FL_FOR_ARCH2 FL_NOTM
689 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
690 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
691 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
692 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
693 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
694 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
695 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
696 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
697 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
698 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
699 #define FL_FOR_ARCH6J FL_FOR_ARCH6
700 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
701 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
702 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
703 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
704 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
705 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
706 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
707 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
708 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
709 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
711 /* The bits in this mask specify which
712 instructions we are allowed to generate. */
713 static unsigned long insn_flags = 0;
715 /* The bits in this mask specify which instruction scheduling options should
716 be used. */
717 static unsigned long tune_flags = 0;
719 /* The following are used in the arm.md file as equivalents to bits
720 in the above two flag variables. */
722 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
723 int arm_arch3m = 0;
725 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
726 int arm_arch4 = 0;
728 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
729 int arm_arch4t = 0;
731 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
732 int arm_arch5 = 0;
734 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
735 int arm_arch5e = 0;
737 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
738 int arm_arch6 = 0;
740 /* Nonzero if this chip supports the ARM 6K extensions. */
741 int arm_arch6k = 0;
743 /* Nonzero if this chip supports the ARM 7 extensions. */
744 int arm_arch7 = 0;
746 /* Nonzero if instructions not present in the 'M' profile can be used. */
747 int arm_arch_notm = 0;
749 /* Nonzero if instructions present in ARMv7E-M can be used. */
750 int arm_arch7em = 0;
752 /* Nonzero if this chip can benefit from load scheduling. */
753 int arm_ld_sched = 0;
755 /* Nonzero if this chip is a StrongARM. */
756 int arm_tune_strongarm = 0;
758 /* Nonzero if this chip is a Cirrus variant. */
759 int arm_arch_cirrus = 0;
761 /* Nonzero if this chip supports Intel Wireless MMX technology. */
762 int arm_arch_iwmmxt = 0;
764 /* Nonzero if this chip is an XScale. */
765 int arm_arch_xscale = 0;
767 /* Nonzero if tuning for XScale */
768 int arm_tune_xscale = 0;
770 /* Nonzero if we want to tune for stores that access the write-buffer.
771 This typically means an ARM6 or ARM7 with MMU or MPU. */
772 int arm_tune_wbuf = 0;
774 /* Nonzero if tuning for Cortex-A9. */
775 int arm_tune_cortex_a9 = 0;
777 /* Nonzero if generating Thumb instructions. */
778 int thumb_code = 0;
780 /* Nonzero if generating Thumb-1 instructions. */
781 int thumb1_code = 0;
783 /* Nonzero if we should define __THUMB_INTERWORK__ in the
784 preprocessor.
785 XXX This is a bit of a hack, it's intended to help work around
786 problems in GLD which doesn't understand that armv5t code is
787 interworking clean. */
788 int arm_cpp_interwork = 0;
790 /* Nonzero if chip supports Thumb 2. */
791 int arm_arch_thumb2;
793 /* Nonzero if chip supports integer division instruction. */
794 int arm_arch_hwdiv;
796 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
797 we must report the mode of the memory reference from
798 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
799 enum machine_mode output_memory_reference_mode;
801 /* The register number to be used for the PIC offset register. */
802 unsigned arm_pic_register = INVALID_REGNUM;
804 /* Set to 1 after arm_reorg has started. Reset to start at the start of
805 the next function. */
806 static int after_arm_reorg = 0;
808 enum arm_pcs arm_pcs_default;
810 /* For an explanation of these variables, see final_prescan_insn below. */
811 int arm_ccfsm_state;
812 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
813 enum arm_cond_code arm_current_cc;
815 rtx arm_target_insn;
816 int arm_target_label;
817 /* The number of conditionally executed insns, including the current insn. */
818 int arm_condexec_count = 0;
819 /* A bitmask specifying the patterns for the IT block.
820 Zero means do not output an IT block before this insn. */
821 int arm_condexec_mask = 0;
822 /* The number of bits used in arm_condexec_mask. */
823 int arm_condexec_masklen = 0;
825 /* The condition codes of the ARM, and the inverse function. */
826 static const char * const arm_condition_codes[] =
828 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
829 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
832 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
833 int arm_regs_in_sequence[] =
835 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
838 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
839 #define streq(string1, string2) (strcmp (string1, string2) == 0)
841 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
842 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
843 | (1 << PIC_OFFSET_TABLE_REGNUM)))
845 /* Initialization code. */
847 struct processors
849 const char *const name;
850 enum processor_type core;
851 const char *arch;
852 const unsigned long flags;
853 const struct tune_params *const tune;
857 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
858 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
859 prefetch_slots, \
860 l1_size, \
861 l1_line_size
863 const struct tune_params arm_slowmul_tune =
865 arm_slowmul_rtx_costs,
866 NULL,
868 ARM_PREFETCH_NOT_BENEFICIAL
871 const struct tune_params arm_fastmul_tune =
873 arm_fastmul_rtx_costs,
874 NULL,
876 ARM_PREFETCH_NOT_BENEFICIAL
879 const struct tune_params arm_xscale_tune =
881 arm_xscale_rtx_costs,
882 xscale_sched_adjust_cost,
884 ARM_PREFETCH_NOT_BENEFICIAL
887 const struct tune_params arm_9e_tune =
889 arm_9e_rtx_costs,
890 NULL,
892 ARM_PREFETCH_NOT_BENEFICIAL
895 const struct tune_params arm_cortex_a9_tune =
897 arm_9e_rtx_costs,
898 cortex_a9_sched_adjust_cost,
900 ARM_PREFETCH_BENEFICIAL(4,32,32)
903 const struct tune_params arm_fa726te_tune =
905 arm_9e_rtx_costs,
906 fa726te_sched_adjust_cost,
908 ARM_PREFETCH_NOT_BENEFICIAL
912 /* Not all of these give usefully different compilation alternatives,
913 but there is no simple way of generalizing them. */
914 static const struct processors all_cores[] =
916 /* ARM Cores */
917 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
918 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
919 #include "arm-cores.def"
920 #undef ARM_CORE
921 {NULL, arm_none, NULL, 0, NULL}
924 static const struct processors all_architectures[] =
926 /* ARM Architectures */
927 /* We don't specify tuning costs here as it will be figured out
928 from the core. */
930 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
931 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
932 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
933 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
934 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
935 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
936 implementations that support it, so we will leave it out for now. */
937 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
938 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
939 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
940 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
941 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
942 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
943 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
944 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
945 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
946 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
947 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
948 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
949 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
950 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
951 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
952 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
953 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
954 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
955 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
956 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
957 {NULL, arm_none, NULL, 0 , NULL}
961 /* These are populated as commandline arguments are processed, or NULL
962 if not specified. */
963 static const struct processors *arm_selected_arch;
964 static const struct processors *arm_selected_cpu;
965 static const struct processors *arm_selected_tune;
967 /* The name of the preprocessor macro to define for this architecture. */
969 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
971 /* Available values for -mfpu=. */
973 static const struct arm_fpu_desc all_fpus[] =
975 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
976 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
977 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
978 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
979 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
980 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
981 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
982 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
983 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
984 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
985 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
986 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
987 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
988 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
989 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
990 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
991 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
992 /* Compatibility aliases. */
993 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
997 struct float_abi
999 const char * name;
1000 enum float_abi_type abi_type;
1004 /* Available values for -mfloat-abi=. */
1006 static const struct float_abi all_float_abis[] =
1008 {"soft", ARM_FLOAT_ABI_SOFT},
1009 {"softfp", ARM_FLOAT_ABI_SOFTFP},
1010 {"hard", ARM_FLOAT_ABI_HARD}
1014 struct fp16_format
1016 const char *name;
1017 enum arm_fp16_format_type fp16_format_type;
1021 /* Available values for -mfp16-format=. */
1023 static const struct fp16_format all_fp16_formats[] =
1025 {"none", ARM_FP16_FORMAT_NONE},
1026 {"ieee", ARM_FP16_FORMAT_IEEE},
1027 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1031 struct abi_name
1033 const char *name;
1034 enum arm_abi_type abi_type;
1038 /* Available values for -mabi=. */
1040 static const struct abi_name arm_all_abis[] =
1042 {"apcs-gnu", ARM_ABI_APCS},
1043 {"atpcs", ARM_ABI_ATPCS},
1044 {"aapcs", ARM_ABI_AAPCS},
1045 {"iwmmxt", ARM_ABI_IWMMXT},
1046 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1049 /* Supported TLS relocations. */
1051 enum tls_reloc {
1052 TLS_GD32,
1053 TLS_LDM32,
1054 TLS_LDO32,
1055 TLS_IE32,
1056 TLS_LE32
1059 /* The maximum number of insns to be used when loading a constant. */
1060 inline static int
1061 arm_constant_limit (bool size_p)
1063 return size_p ? 1 : current_tune->constant_limit;
1066 /* Emit an insn that's a simple single-set. Both the operands must be known
1067 to be valid. */
1068 inline static rtx
1069 emit_set_insn (rtx x, rtx y)
1071 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1074 /* Return the number of bits set in VALUE. */
1075 static unsigned
1076 bit_count (unsigned long value)
1078 unsigned long count = 0;
1080 while (value)
1082 count++;
1083 value &= value - 1; /* Clear the least-significant set bit. */
1086 return count;
1089 /* Set up library functions unique to ARM. */
1091 static void
1092 arm_init_libfuncs (void)
1094 /* There are no special library functions unless we are using the
1095 ARM BPABI. */
1096 if (!TARGET_BPABI)
1097 return;
1099 /* The functions below are described in Section 4 of the "Run-Time
1100 ABI for the ARM architecture", Version 1.0. */
1102 /* Double-precision floating-point arithmetic. Table 2. */
1103 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1104 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1105 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1106 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1107 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1109 /* Double-precision comparisons. Table 3. */
1110 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1111 set_optab_libfunc (ne_optab, DFmode, NULL);
1112 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1113 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1114 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1115 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1116 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1118 /* Single-precision floating-point arithmetic. Table 4. */
1119 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1120 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1121 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1122 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1123 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1125 /* Single-precision comparisons. Table 5. */
1126 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1127 set_optab_libfunc (ne_optab, SFmode, NULL);
1128 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1129 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1130 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1131 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1132 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1134 /* Floating-point to integer conversions. Table 6. */
1135 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1136 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1137 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1138 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1139 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1140 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1141 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1142 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1144 /* Conversions between floating types. Table 7. */
1145 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1146 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1148 /* Integer to floating-point conversions. Table 8. */
1149 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1150 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1151 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1152 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1153 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1154 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1155 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1156 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1158 /* Long long. Table 9. */
1159 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1160 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1161 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1162 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1163 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1164 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1165 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1166 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1168 /* Integer (32/32->32) division. \S 4.3.1. */
1169 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1170 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1172 /* The divmod functions are designed so that they can be used for
1173 plain division, even though they return both the quotient and the
1174 remainder. The quotient is returned in the usual location (i.e.,
1175 r0 for SImode, {r0, r1} for DImode), just as would be expected
1176 for an ordinary division routine. Because the AAPCS calling
1177 conventions specify that all of { r0, r1, r2, r3 } are
1178 callee-saved registers, there is no need to tell the compiler
1179 explicitly that those registers are clobbered by these
1180 routines. */
1181 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1182 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1184 /* For SImode division the ABI provides div-without-mod routines,
1185 which are faster. */
1186 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1187 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1189 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1190 divmod libcalls instead. */
1191 set_optab_libfunc (smod_optab, DImode, NULL);
1192 set_optab_libfunc (umod_optab, DImode, NULL);
1193 set_optab_libfunc (smod_optab, SImode, NULL);
1194 set_optab_libfunc (umod_optab, SImode, NULL);
1196 /* Half-precision float operations. The compiler handles all operations
1197 with NULL libfuncs by converting the SFmode. */
1198 switch (arm_fp16_format)
1200 case ARM_FP16_FORMAT_IEEE:
1201 case ARM_FP16_FORMAT_ALTERNATIVE:
1203 /* Conversions. */
1204 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1205 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1206 ? "__gnu_f2h_ieee"
1207 : "__gnu_f2h_alternative"));
1208 set_conv_libfunc (sext_optab, SFmode, HFmode,
1209 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1210 ? "__gnu_h2f_ieee"
1211 : "__gnu_h2f_alternative"));
1213 /* Arithmetic. */
1214 set_optab_libfunc (add_optab, HFmode, NULL);
1215 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1216 set_optab_libfunc (smul_optab, HFmode, NULL);
1217 set_optab_libfunc (neg_optab, HFmode, NULL);
1218 set_optab_libfunc (sub_optab, HFmode, NULL);
1220 /* Comparisons. */
1221 set_optab_libfunc (eq_optab, HFmode, NULL);
1222 set_optab_libfunc (ne_optab, HFmode, NULL);
1223 set_optab_libfunc (lt_optab, HFmode, NULL);
1224 set_optab_libfunc (le_optab, HFmode, NULL);
1225 set_optab_libfunc (ge_optab, HFmode, NULL);
1226 set_optab_libfunc (gt_optab, HFmode, NULL);
1227 set_optab_libfunc (unord_optab, HFmode, NULL);
1228 break;
1230 default:
1231 break;
1234 if (TARGET_AAPCS_BASED)
1235 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1238 /* On AAPCS systems, this is the "struct __va_list". */
1239 static GTY(()) tree va_list_type;
1241 /* Return the type to use as __builtin_va_list. */
1242 static tree
1243 arm_build_builtin_va_list (void)
1245 tree va_list_name;
1246 tree ap_field;
1248 if (!TARGET_AAPCS_BASED)
1249 return std_build_builtin_va_list ();
1251 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1252 defined as:
1254 struct __va_list
1256 void *__ap;
1259 The C Library ABI further reinforces this definition in \S
1260 4.1.
1262 We must follow this definition exactly. The structure tag
1263 name is visible in C++ mangled names, and thus forms a part
1264 of the ABI. The field name may be used by people who
1265 #include <stdarg.h>. */
1266 /* Create the type. */
1267 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1268 /* Give it the required name. */
1269 va_list_name = build_decl (BUILTINS_LOCATION,
1270 TYPE_DECL,
1271 get_identifier ("__va_list"),
1272 va_list_type);
1273 DECL_ARTIFICIAL (va_list_name) = 1;
1274 TYPE_NAME (va_list_type) = va_list_name;
1275 TYPE_STUB_DECL (va_list_type) = va_list_name;
1276 /* Create the __ap field. */
1277 ap_field = build_decl (BUILTINS_LOCATION,
1278 FIELD_DECL,
1279 get_identifier ("__ap"),
1280 ptr_type_node);
1281 DECL_ARTIFICIAL (ap_field) = 1;
1282 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1283 TYPE_FIELDS (va_list_type) = ap_field;
1284 /* Compute its layout. */
1285 layout_type (va_list_type);
1287 return va_list_type;
1290 /* Return an expression of type "void *" pointing to the next
1291 available argument in a variable-argument list. VALIST is the
1292 user-level va_list object, of type __builtin_va_list. */
1293 static tree
1294 arm_extract_valist_ptr (tree valist)
1296 if (TREE_TYPE (valist) == error_mark_node)
1297 return error_mark_node;
1299 /* On an AAPCS target, the pointer is stored within "struct
1300 va_list". */
1301 if (TARGET_AAPCS_BASED)
1303 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1304 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1305 valist, ap_field, NULL_TREE);
1308 return valist;
1311 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1312 static void
1313 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1315 valist = arm_extract_valist_ptr (valist);
1316 std_expand_builtin_va_start (valist, nextarg);
1319 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1320 static tree
1321 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1322 gimple_seq *post_p)
1324 valist = arm_extract_valist_ptr (valist);
1325 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1328 /* Lookup NAME in SEL. */
1330 static const struct processors *
1331 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1333 if (!(name && *name))
1334 return NULL;
1336 for (; sel->name != NULL; sel++)
1338 if (streq (name, sel->name))
1339 return sel;
1342 error ("bad value (%s) for %s switch", name, desc);
1343 return NULL;
1346 /* Implement TARGET_HANDLE_OPTION. */
1348 static bool
1349 arm_handle_option (struct gcc_options *opts, struct gcc_options *opts_set,
1350 const struct cl_decoded_option *decoded,
1351 location_t loc ATTRIBUTE_UNUSED)
1353 size_t code = decoded->opt_index;
1354 const char *arg = decoded->arg;
1356 gcc_assert (opts == &global_options);
1357 gcc_assert (opts_set == &global_options_set);
1359 switch (code)
1361 case OPT_march_:
1362 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1363 return true;
1365 case OPT_mcpu_:
1366 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1367 return true;
1369 case OPT_mtune_:
1370 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1371 return true;
1373 default:
1374 return true;
1378 static void
1379 arm_target_help (void)
1381 int i;
1382 static int columns = 0;
1383 int remaining;
1385 /* If we have not done so already, obtain the desired maximum width of
1386 the output. Note - this is a duplication of the code at the start of
1387 gcc/opts.c:print_specific_help() - the two copies should probably be
1388 replaced by a single function. */
1389 if (columns == 0)
1391 const char *p;
1393 p = getenv ("COLUMNS");
1394 if (p != NULL)
1396 int value = atoi (p);
1398 if (value > 0)
1399 columns = value;
1402 if (columns == 0)
1403 /* Use a reasonable default. */
1404 columns = 80;
1407 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1409 /* The - 2 is because we know that the last entry in the array is NULL. */
1410 i = ARRAY_SIZE (all_cores) - 2;
1411 gcc_assert (i > 0);
1412 printf (" %s", all_cores[i].name);
1413 remaining = columns - (strlen (all_cores[i].name) + 4);
1414 gcc_assert (remaining >= 0);
1416 while (i--)
1418 int len = strlen (all_cores[i].name);
1420 if (remaining > len + 2)
1422 printf (", %s", all_cores[i].name);
1423 remaining -= len + 2;
1425 else
1427 if (remaining > 0)
1428 printf (",");
1429 printf ("\n %s", all_cores[i].name);
1430 remaining = columns - (len + 4);
1434 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1436 i = ARRAY_SIZE (all_architectures) - 2;
1437 gcc_assert (i > 0);
1439 printf (" %s", all_architectures[i].name);
1440 remaining = columns - (strlen (all_architectures[i].name) + 4);
1441 gcc_assert (remaining >= 0);
1443 while (i--)
1445 int len = strlen (all_architectures[i].name);
1447 if (remaining > len + 2)
1449 printf (", %s", all_architectures[i].name);
1450 remaining -= len + 2;
1452 else
1454 if (remaining > 0)
1455 printf (",");
1456 printf ("\n %s", all_architectures[i].name);
1457 remaining = columns - (len + 4);
1460 printf ("\n");
1464 /* Fix up any incompatible options that the user has specified. */
1465 static void
1466 arm_option_override (void)
1468 unsigned i;
1470 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1471 SUBTARGET_OVERRIDE_OPTIONS;
1472 #endif
1474 if (arm_selected_arch)
1476 if (arm_selected_cpu)
1478 /* Check for conflict between mcpu and march. */
1479 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1481 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1482 arm_selected_cpu->name, arm_selected_arch->name);
1483 /* -march wins for code generation.
1484 -mcpu wins for default tuning. */
1485 if (!arm_selected_tune)
1486 arm_selected_tune = arm_selected_cpu;
1488 arm_selected_cpu = arm_selected_arch;
1490 else
1491 /* -mcpu wins. */
1492 arm_selected_arch = NULL;
1494 else
1495 /* Pick a CPU based on the architecture. */
1496 arm_selected_cpu = arm_selected_arch;
1499 /* If the user did not specify a processor, choose one for them. */
1500 if (!arm_selected_cpu)
1502 const struct processors * sel;
1503 unsigned int sought;
1505 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1506 if (!arm_selected_cpu->name)
1508 #ifdef SUBTARGET_CPU_DEFAULT
1509 /* Use the subtarget default CPU if none was specified by
1510 configure. */
1511 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1512 #endif
1513 /* Default to ARM6. */
1514 if (!arm_selected_cpu->name)
1515 arm_selected_cpu = &all_cores[arm6];
1518 sel = arm_selected_cpu;
1519 insn_flags = sel->flags;
1521 /* Now check to see if the user has specified some command line
1522 switch that require certain abilities from the cpu. */
1523 sought = 0;
1525 if (TARGET_INTERWORK || TARGET_THUMB)
1527 sought |= (FL_THUMB | FL_MODE32);
1529 /* There are no ARM processors that support both APCS-26 and
1530 interworking. Therefore we force FL_MODE26 to be removed
1531 from insn_flags here (if it was set), so that the search
1532 below will always be able to find a compatible processor. */
1533 insn_flags &= ~FL_MODE26;
1536 if (sought != 0 && ((sought & insn_flags) != sought))
1538 /* Try to locate a CPU type that supports all of the abilities
1539 of the default CPU, plus the extra abilities requested by
1540 the user. */
1541 for (sel = all_cores; sel->name != NULL; sel++)
1542 if ((sel->flags & sought) == (sought | insn_flags))
1543 break;
1545 if (sel->name == NULL)
1547 unsigned current_bit_count = 0;
1548 const struct processors * best_fit = NULL;
1550 /* Ideally we would like to issue an error message here
1551 saying that it was not possible to find a CPU compatible
1552 with the default CPU, but which also supports the command
1553 line options specified by the programmer, and so they
1554 ought to use the -mcpu=<name> command line option to
1555 override the default CPU type.
1557 If we cannot find a cpu that has both the
1558 characteristics of the default cpu and the given
1559 command line options we scan the array again looking
1560 for a best match. */
1561 for (sel = all_cores; sel->name != NULL; sel++)
1562 if ((sel->flags & sought) == sought)
1564 unsigned count;
1566 count = bit_count (sel->flags & insn_flags);
1568 if (count >= current_bit_count)
1570 best_fit = sel;
1571 current_bit_count = count;
1575 gcc_assert (best_fit);
1576 sel = best_fit;
1579 arm_selected_cpu = sel;
1583 gcc_assert (arm_selected_cpu);
1584 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1585 if (!arm_selected_tune)
1586 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1588 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1589 insn_flags = arm_selected_cpu->flags;
1591 arm_tune = arm_selected_tune->core;
1592 tune_flags = arm_selected_tune->flags;
1593 current_tune = arm_selected_tune->tune;
1595 if (target_fp16_format_name)
1597 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1599 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1601 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1602 break;
1605 if (i == ARRAY_SIZE (all_fp16_formats))
1606 error ("invalid __fp16 format option: -mfp16-format=%s",
1607 target_fp16_format_name);
1609 else
1610 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1612 if (target_abi_name)
1614 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1616 if (streq (arm_all_abis[i].name, target_abi_name))
1618 arm_abi = arm_all_abis[i].abi_type;
1619 break;
1622 if (i == ARRAY_SIZE (arm_all_abis))
1623 error ("invalid ABI option: -mabi=%s", target_abi_name);
1625 else
1626 arm_abi = ARM_DEFAULT_ABI;
1628 /* Make sure that the processor choice does not conflict with any of the
1629 other command line choices. */
1630 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1631 error ("target CPU does not support ARM mode");
1633 /* BPABI targets use linker tricks to allow interworking on cores
1634 without thumb support. */
1635 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1637 warning (0, "target CPU does not support interworking" );
1638 target_flags &= ~MASK_INTERWORK;
1641 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1643 warning (0, "target CPU does not support THUMB instructions");
1644 target_flags &= ~MASK_THUMB;
1647 if (TARGET_APCS_FRAME && TARGET_THUMB)
1649 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1650 target_flags &= ~MASK_APCS_FRAME;
1653 /* Callee super interworking implies thumb interworking. Adding
1654 this to the flags here simplifies the logic elsewhere. */
1655 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1656 target_flags |= MASK_INTERWORK;
1658 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1659 from here where no function is being compiled currently. */
1660 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1661 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1663 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1664 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1666 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1668 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1669 target_flags |= MASK_APCS_FRAME;
1672 if (TARGET_POKE_FUNCTION_NAME)
1673 target_flags |= MASK_APCS_FRAME;
1675 if (TARGET_APCS_REENT && flag_pic)
1676 error ("-fpic and -mapcs-reent are incompatible");
1678 if (TARGET_APCS_REENT)
1679 warning (0, "APCS reentrant code not supported. Ignored");
1681 /* If this target is normally configured to use APCS frames, warn if they
1682 are turned off and debugging is turned on. */
1683 if (TARGET_ARM
1684 && write_symbols != NO_DEBUG
1685 && !TARGET_APCS_FRAME
1686 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1687 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1689 if (TARGET_APCS_FLOAT)
1690 warning (0, "passing floating point arguments in fp regs not yet supported");
1692 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1693 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1694 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1695 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1696 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1697 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1698 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1699 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1700 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1701 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1702 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1703 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1704 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1705 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1707 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1708 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1709 thumb_code = TARGET_ARM == 0;
1710 thumb1_code = TARGET_THUMB1 != 0;
1711 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1712 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1713 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1714 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1715 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1717 /* If we are not using the default (ARM mode) section anchor offset
1718 ranges, then set the correct ranges now. */
1719 if (TARGET_THUMB1)
1721 /* Thumb-1 LDR instructions cannot have negative offsets.
1722 Permissible positive offset ranges are 5-bit (for byte loads),
1723 6-bit (for halfword loads), or 7-bit (for word loads).
1724 Empirical results suggest a 7-bit anchor range gives the best
1725 overall code size. */
1726 targetm.min_anchor_offset = 0;
1727 targetm.max_anchor_offset = 127;
1729 else if (TARGET_THUMB2)
1731 /* The minimum is set such that the total size of the block
1732 for a particular anchor is 248 + 1 + 4095 bytes, which is
1733 divisible by eight, ensuring natural spacing of anchors. */
1734 targetm.min_anchor_offset = -248;
1735 targetm.max_anchor_offset = 4095;
1738 /* V5 code we generate is completely interworking capable, so we turn off
1739 TARGET_INTERWORK here to avoid many tests later on. */
1741 /* XXX However, we must pass the right pre-processor defines to CPP
1742 or GLD can get confused. This is a hack. */
1743 if (TARGET_INTERWORK)
1744 arm_cpp_interwork = 1;
1746 if (arm_arch5)
1747 target_flags &= ~MASK_INTERWORK;
1749 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1750 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1752 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1753 error ("iwmmxt abi requires an iwmmxt capable cpu");
1755 if (target_fpu_name == NULL && target_fpe_name != NULL)
1757 if (streq (target_fpe_name, "2"))
1758 target_fpu_name = "fpe2";
1759 else if (streq (target_fpe_name, "3"))
1760 target_fpu_name = "fpe3";
1761 else
1762 error ("invalid floating point emulation option: -mfpe=%s",
1763 target_fpe_name);
1766 if (target_fpu_name == NULL)
1768 #ifdef FPUTYPE_DEFAULT
1769 target_fpu_name = FPUTYPE_DEFAULT;
1770 #else
1771 if (arm_arch_cirrus)
1772 target_fpu_name = "maverick";
1773 else
1774 target_fpu_name = "fpe2";
1775 #endif
1778 arm_fpu_desc = NULL;
1779 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1781 if (streq (all_fpus[i].name, target_fpu_name))
1783 arm_fpu_desc = &all_fpus[i];
1784 break;
1788 if (!arm_fpu_desc)
1790 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1791 return;
1794 switch (arm_fpu_desc->model)
1796 case ARM_FP_MODEL_FPA:
1797 if (arm_fpu_desc->rev == 2)
1798 arm_fpu_attr = FPU_FPE2;
1799 else if (arm_fpu_desc->rev == 3)
1800 arm_fpu_attr = FPU_FPE3;
1801 else
1802 arm_fpu_attr = FPU_FPA;
1803 break;
1805 case ARM_FP_MODEL_MAVERICK:
1806 arm_fpu_attr = FPU_MAVERICK;
1807 break;
1809 case ARM_FP_MODEL_VFP:
1810 arm_fpu_attr = FPU_VFP;
1811 break;
1813 default:
1814 gcc_unreachable();
1817 if (target_float_abi_name != NULL)
1819 /* The user specified a FP ABI. */
1820 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1822 if (streq (all_float_abis[i].name, target_float_abi_name))
1824 arm_float_abi = all_float_abis[i].abi_type;
1825 break;
1828 if (i == ARRAY_SIZE (all_float_abis))
1829 error ("invalid floating point abi: -mfloat-abi=%s",
1830 target_float_abi_name);
1832 else
1833 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1835 if (TARGET_AAPCS_BASED
1836 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1837 error ("FPA is unsupported in the AAPCS");
1839 if (TARGET_AAPCS_BASED)
1841 if (TARGET_CALLER_INTERWORKING)
1842 error ("AAPCS does not support -mcaller-super-interworking");
1843 else
1844 if (TARGET_CALLEE_INTERWORKING)
1845 error ("AAPCS does not support -mcallee-super-interworking");
1848 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1849 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1850 will ever exist. GCC makes no attempt to support this combination. */
1851 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1852 sorry ("iWMMXt and hardware floating point");
1854 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1855 if (TARGET_THUMB2 && TARGET_IWMMXT)
1856 sorry ("Thumb-2 iWMMXt");
1858 /* __fp16 support currently assumes the core has ldrh. */
1859 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1860 sorry ("__fp16 and no ldrh");
1862 /* If soft-float is specified then don't use FPU. */
1863 if (TARGET_SOFT_FLOAT)
1864 arm_fpu_attr = FPU_NONE;
1866 if (TARGET_AAPCS_BASED)
1868 if (arm_abi == ARM_ABI_IWMMXT)
1869 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1870 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1871 && TARGET_HARD_FLOAT
1872 && TARGET_VFP)
1873 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1874 else
1875 arm_pcs_default = ARM_PCS_AAPCS;
1877 else
1879 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1880 sorry ("-mfloat-abi=hard and VFP");
1882 if (arm_abi == ARM_ABI_APCS)
1883 arm_pcs_default = ARM_PCS_APCS;
1884 else
1885 arm_pcs_default = ARM_PCS_ATPCS;
1888 /* For arm2/3 there is no need to do any scheduling if there is only
1889 a floating point emulator, or we are doing software floating-point. */
1890 if ((TARGET_SOFT_FLOAT
1891 || (TARGET_FPA && arm_fpu_desc->rev))
1892 && (tune_flags & FL_MODE32) == 0)
1893 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1895 if (target_thread_switch)
1897 if (strcmp (target_thread_switch, "soft") == 0)
1898 target_thread_pointer = TP_SOFT;
1899 else if (strcmp (target_thread_switch, "auto") == 0)
1900 target_thread_pointer = TP_AUTO;
1901 else if (strcmp (target_thread_switch, "cp15") == 0)
1902 target_thread_pointer = TP_CP15;
1903 else
1904 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1907 /* Use the cp15 method if it is available. */
1908 if (target_thread_pointer == TP_AUTO)
1910 if (arm_arch6k && !TARGET_THUMB1)
1911 target_thread_pointer = TP_CP15;
1912 else
1913 target_thread_pointer = TP_SOFT;
1916 if (TARGET_HARD_TP && TARGET_THUMB1)
1917 error ("can not use -mtp=cp15 with 16-bit Thumb");
1919 /* Override the default structure alignment for AAPCS ABI. */
1920 if (TARGET_AAPCS_BASED)
1921 arm_structure_size_boundary = 8;
1923 if (structure_size_string != NULL)
1925 int size = strtol (structure_size_string, NULL, 0);
1927 if (size == 8 || size == 32
1928 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1929 arm_structure_size_boundary = size;
1930 else
1931 warning (0, "structure size boundary can only be set to %s",
1932 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1935 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1937 error ("RTP PIC is incompatible with Thumb");
1938 flag_pic = 0;
1941 /* If stack checking is disabled, we can use r10 as the PIC register,
1942 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1943 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1945 if (TARGET_VXWORKS_RTP)
1946 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1947 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1950 if (flag_pic && TARGET_VXWORKS_RTP)
1951 arm_pic_register = 9;
1953 if (arm_pic_register_string != NULL)
1955 int pic_register = decode_reg_name (arm_pic_register_string);
1957 if (!flag_pic)
1958 warning (0, "-mpic-register= is useless without -fpic");
1960 /* Prevent the user from choosing an obviously stupid PIC register. */
1961 else if (pic_register < 0 || call_used_regs[pic_register]
1962 || pic_register == HARD_FRAME_POINTER_REGNUM
1963 || pic_register == STACK_POINTER_REGNUM
1964 || pic_register >= PC_REGNUM
1965 || (TARGET_VXWORKS_RTP
1966 && (unsigned int) pic_register != arm_pic_register))
1967 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1968 else
1969 arm_pic_register = pic_register;
1972 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1973 if (fix_cm3_ldrd == 2)
1975 if (arm_selected_cpu->core == cortexm3)
1976 fix_cm3_ldrd = 1;
1977 else
1978 fix_cm3_ldrd = 0;
1981 if (TARGET_THUMB1 && flag_schedule_insns)
1983 /* Don't warn since it's on by default in -O2. */
1984 flag_schedule_insns = 0;
1987 if (optimize_size)
1989 /* If optimizing for size, bump the number of instructions that we
1990 are prepared to conditionally execute (even on a StrongARM). */
1991 max_insns_skipped = 6;
1993 else
1995 /* StrongARM has early execution of branches, so a sequence
1996 that is worth skipping is shorter. */
1997 if (arm_tune_strongarm)
1998 max_insns_skipped = 3;
2001 /* Hot/Cold partitioning is not currently supported, since we can't
2002 handle literal pool placement in that case. */
2003 if (flag_reorder_blocks_and_partition)
2005 inform (input_location,
2006 "-freorder-blocks-and-partition not supported on this architecture");
2007 flag_reorder_blocks_and_partition = 0;
2008 flag_reorder_blocks = 1;
2011 if (flag_pic)
2012 /* Hoisting PIC address calculations more aggressively provides a small,
2013 but measurable, size reduction for PIC code. Therefore, we decrease
2014 the bar for unrestricted expression hoisting to the cost of PIC address
2015 calculation, which is 2 instructions. */
2016 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2017 global_options.x_param_values,
2018 global_options_set.x_param_values);
2020 /* ARM EABI defaults to strict volatile bitfields. */
2021 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
2022 flag_strict_volatile_bitfields = 1;
2024 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2025 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2026 if (flag_prefetch_loop_arrays < 0
2027 && HAVE_prefetch
2028 && optimize >= 3
2029 && current_tune->num_prefetch_slots > 0)
2030 flag_prefetch_loop_arrays = 1;
2032 /* Set up parameters to be used in prefetching algorithm. Do not override the
2033 defaults unless we are tuning for a core we have researched values for. */
2034 if (current_tune->num_prefetch_slots > 0)
2035 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2036 current_tune->num_prefetch_slots,
2037 global_options.x_param_values,
2038 global_options_set.x_param_values);
2039 if (current_tune->l1_cache_line_size >= 0)
2040 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2041 current_tune->l1_cache_line_size,
2042 global_options.x_param_values,
2043 global_options_set.x_param_values);
2044 if (current_tune->l1_cache_size >= 0)
2045 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2046 current_tune->l1_cache_size,
2047 global_options.x_param_values,
2048 global_options_set.x_param_values);
2050 /* Register global variables with the garbage collector. */
2051 arm_add_gc_roots ();
2054 static void
2055 arm_add_gc_roots (void)
2057 gcc_obstack_init(&minipool_obstack);
2058 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2061 /* A table of known ARM exception types.
2062 For use with the interrupt function attribute. */
2064 typedef struct
2066 const char *const arg;
2067 const unsigned long return_value;
2069 isr_attribute_arg;
2071 static const isr_attribute_arg isr_attribute_args [] =
2073 { "IRQ", ARM_FT_ISR },
2074 { "irq", ARM_FT_ISR },
2075 { "FIQ", ARM_FT_FIQ },
2076 { "fiq", ARM_FT_FIQ },
2077 { "ABORT", ARM_FT_ISR },
2078 { "abort", ARM_FT_ISR },
2079 { "ABORT", ARM_FT_ISR },
2080 { "abort", ARM_FT_ISR },
2081 { "UNDEF", ARM_FT_EXCEPTION },
2082 { "undef", ARM_FT_EXCEPTION },
2083 { "SWI", ARM_FT_EXCEPTION },
2084 { "swi", ARM_FT_EXCEPTION },
2085 { NULL, ARM_FT_NORMAL }
2088 /* Returns the (interrupt) function type of the current
2089 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2091 static unsigned long
2092 arm_isr_value (tree argument)
2094 const isr_attribute_arg * ptr;
2095 const char * arg;
2097 if (!arm_arch_notm)
2098 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2100 /* No argument - default to IRQ. */
2101 if (argument == NULL_TREE)
2102 return ARM_FT_ISR;
2104 /* Get the value of the argument. */
2105 if (TREE_VALUE (argument) == NULL_TREE
2106 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2107 return ARM_FT_UNKNOWN;
2109 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2111 /* Check it against the list of known arguments. */
2112 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2113 if (streq (arg, ptr->arg))
2114 return ptr->return_value;
2116 /* An unrecognized interrupt type. */
2117 return ARM_FT_UNKNOWN;
2120 /* Computes the type of the current function. */
2122 static unsigned long
2123 arm_compute_func_type (void)
2125 unsigned long type = ARM_FT_UNKNOWN;
2126 tree a;
2127 tree attr;
2129 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2131 /* Decide if the current function is volatile. Such functions
2132 never return, and many memory cycles can be saved by not storing
2133 register values that will never be needed again. This optimization
2134 was added to speed up context switching in a kernel application. */
2135 if (optimize > 0
2136 && (TREE_NOTHROW (current_function_decl)
2137 || !(flag_unwind_tables
2138 || (flag_exceptions
2139 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2140 && TREE_THIS_VOLATILE (current_function_decl))
2141 type |= ARM_FT_VOLATILE;
2143 if (cfun->static_chain_decl != NULL)
2144 type |= ARM_FT_NESTED;
2146 attr = DECL_ATTRIBUTES (current_function_decl);
2148 a = lookup_attribute ("naked", attr);
2149 if (a != NULL_TREE)
2150 type |= ARM_FT_NAKED;
2152 a = lookup_attribute ("isr", attr);
2153 if (a == NULL_TREE)
2154 a = lookup_attribute ("interrupt", attr);
2156 if (a == NULL_TREE)
2157 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2158 else
2159 type |= arm_isr_value (TREE_VALUE (a));
2161 return type;
2164 /* Returns the type of the current function. */
2166 unsigned long
2167 arm_current_func_type (void)
2169 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2170 cfun->machine->func_type = arm_compute_func_type ();
2172 return cfun->machine->func_type;
2175 bool
2176 arm_allocate_stack_slots_for_args (void)
2178 /* Naked functions should not allocate stack slots for arguments. */
2179 return !IS_NAKED (arm_current_func_type ());
2183 /* Output assembler code for a block containing the constant parts
2184 of a trampoline, leaving space for the variable parts.
2186 On the ARM, (if r8 is the static chain regnum, and remembering that
2187 referencing pc adds an offset of 8) the trampoline looks like:
2188 ldr r8, [pc, #0]
2189 ldr pc, [pc]
2190 .word static chain value
2191 .word function's address
2192 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2194 static void
2195 arm_asm_trampoline_template (FILE *f)
2197 if (TARGET_ARM)
2199 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2200 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2202 else if (TARGET_THUMB2)
2204 /* The Thumb-2 trampoline is similar to the arm implementation.
2205 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2206 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2207 STATIC_CHAIN_REGNUM, PC_REGNUM);
2208 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2210 else
2212 ASM_OUTPUT_ALIGN (f, 2);
2213 fprintf (f, "\t.code\t16\n");
2214 fprintf (f, ".Ltrampoline_start:\n");
2215 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2216 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2217 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2218 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2219 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2220 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2222 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2223 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2226 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2228 static void
2229 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2231 rtx fnaddr, mem, a_tramp;
2233 emit_block_move (m_tramp, assemble_trampoline_template (),
2234 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2236 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2237 emit_move_insn (mem, chain_value);
2239 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2240 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2241 emit_move_insn (mem, fnaddr);
2243 a_tramp = XEXP (m_tramp, 0);
2244 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2245 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2246 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2249 /* Thumb trampolines should be entered in thumb mode, so set
2250 the bottom bit of the address. */
2252 static rtx
2253 arm_trampoline_adjust_address (rtx addr)
2255 if (TARGET_THUMB)
2256 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2257 NULL, 0, OPTAB_LIB_WIDEN);
2258 return addr;
2261 /* Return 1 if it is possible to return using a single instruction.
2262 If SIBLING is non-null, this is a test for a return before a sibling
2263 call. SIBLING is the call insn, so we can examine its register usage. */
2266 use_return_insn (int iscond, rtx sibling)
2268 int regno;
2269 unsigned int func_type;
2270 unsigned long saved_int_regs;
2271 unsigned HOST_WIDE_INT stack_adjust;
2272 arm_stack_offsets *offsets;
2274 /* Never use a return instruction before reload has run. */
2275 if (!reload_completed)
2276 return 0;
2278 func_type = arm_current_func_type ();
2280 /* Naked, volatile and stack alignment functions need special
2281 consideration. */
2282 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2283 return 0;
2285 /* So do interrupt functions that use the frame pointer and Thumb
2286 interrupt functions. */
2287 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2288 return 0;
2290 offsets = arm_get_frame_offsets ();
2291 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2293 /* As do variadic functions. */
2294 if (crtl->args.pretend_args_size
2295 || cfun->machine->uses_anonymous_args
2296 /* Or if the function calls __builtin_eh_return () */
2297 || crtl->calls_eh_return
2298 /* Or if the function calls alloca */
2299 || cfun->calls_alloca
2300 /* Or if there is a stack adjustment. However, if the stack pointer
2301 is saved on the stack, we can use a pre-incrementing stack load. */
2302 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2303 && stack_adjust == 4)))
2304 return 0;
2306 saved_int_regs = offsets->saved_regs_mask;
2308 /* Unfortunately, the insn
2310 ldmib sp, {..., sp, ...}
2312 triggers a bug on most SA-110 based devices, such that the stack
2313 pointer won't be correctly restored if the instruction takes a
2314 page fault. We work around this problem by popping r3 along with
2315 the other registers, since that is never slower than executing
2316 another instruction.
2318 We test for !arm_arch5 here, because code for any architecture
2319 less than this could potentially be run on one of the buggy
2320 chips. */
2321 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2323 /* Validate that r3 is a call-clobbered register (always true in
2324 the default abi) ... */
2325 if (!call_used_regs[3])
2326 return 0;
2328 /* ... that it isn't being used for a return value ... */
2329 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2330 return 0;
2332 /* ... or for a tail-call argument ... */
2333 if (sibling)
2335 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2337 if (find_regno_fusage (sibling, USE, 3))
2338 return 0;
2341 /* ... and that there are no call-saved registers in r0-r2
2342 (always true in the default ABI). */
2343 if (saved_int_regs & 0x7)
2344 return 0;
2347 /* Can't be done if interworking with Thumb, and any registers have been
2348 stacked. */
2349 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2350 return 0;
2352 /* On StrongARM, conditional returns are expensive if they aren't
2353 taken and multiple registers have been stacked. */
2354 if (iscond && arm_tune_strongarm)
2356 /* Conditional return when just the LR is stored is a simple
2357 conditional-load instruction, that's not expensive. */
2358 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2359 return 0;
2361 if (flag_pic
2362 && arm_pic_register != INVALID_REGNUM
2363 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2364 return 0;
2367 /* If there are saved registers but the LR isn't saved, then we need
2368 two instructions for the return. */
2369 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2370 return 0;
2372 /* Can't be done if any of the FPA regs are pushed,
2373 since this also requires an insn. */
2374 if (TARGET_HARD_FLOAT && TARGET_FPA)
2375 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2376 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2377 return 0;
2379 /* Likewise VFP regs. */
2380 if (TARGET_HARD_FLOAT && TARGET_VFP)
2381 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2382 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2383 return 0;
2385 if (TARGET_REALLY_IWMMXT)
2386 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2387 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2388 return 0;
2390 return 1;
2393 /* Return TRUE if int I is a valid immediate ARM constant. */
2396 const_ok_for_arm (HOST_WIDE_INT i)
2398 int lowbit;
2400 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2401 be all zero, or all one. */
2402 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2403 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2404 != ((~(unsigned HOST_WIDE_INT) 0)
2405 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2406 return FALSE;
2408 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2410 /* Fast return for 0 and small values. We must do this for zero, since
2411 the code below can't handle that one case. */
2412 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2413 return TRUE;
2415 /* Get the number of trailing zeros. */
2416 lowbit = ffs((int) i) - 1;
2418 /* Only even shifts are allowed in ARM mode so round down to the
2419 nearest even number. */
2420 if (TARGET_ARM)
2421 lowbit &= ~1;
2423 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2424 return TRUE;
2426 if (TARGET_ARM)
2428 /* Allow rotated constants in ARM mode. */
2429 if (lowbit <= 4
2430 && ((i & ~0xc000003f) == 0
2431 || (i & ~0xf000000f) == 0
2432 || (i & ~0xfc000003) == 0))
2433 return TRUE;
2435 else
2437 HOST_WIDE_INT v;
2439 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2440 v = i & 0xff;
2441 v |= v << 16;
2442 if (i == v || i == (v | (v << 8)))
2443 return TRUE;
2445 /* Allow repeated pattern 0xXY00XY00. */
2446 v = i & 0xff00;
2447 v |= v << 16;
2448 if (i == v)
2449 return TRUE;
2452 return FALSE;
2455 /* Return true if I is a valid constant for the operation CODE. */
2456 static int
2457 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2459 if (const_ok_for_arm (i))
2460 return 1;
2462 switch (code)
2464 case PLUS:
2465 case COMPARE:
2466 case EQ:
2467 case NE:
2468 case GT:
2469 case LE:
2470 case LT:
2471 case GE:
2472 case GEU:
2473 case LTU:
2474 case GTU:
2475 case LEU:
2476 case UNORDERED:
2477 case ORDERED:
2478 case UNEQ:
2479 case UNGE:
2480 case UNLT:
2481 case UNGT:
2482 case UNLE:
2483 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2485 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2486 case XOR:
2487 return 0;
2489 case IOR:
2490 if (TARGET_THUMB2)
2491 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2492 return 0;
2494 case AND:
2495 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2497 default:
2498 gcc_unreachable ();
2502 /* Emit a sequence of insns to handle a large constant.
2503 CODE is the code of the operation required, it can be any of SET, PLUS,
2504 IOR, AND, XOR, MINUS;
2505 MODE is the mode in which the operation is being performed;
2506 VAL is the integer to operate on;
2507 SOURCE is the other operand (a register, or a null-pointer for SET);
2508 SUBTARGETS means it is safe to create scratch registers if that will
2509 either produce a simpler sequence, or we will want to cse the values.
2510 Return value is the number of insns emitted. */
2512 /* ??? Tweak this for thumb2. */
2514 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2515 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2517 rtx cond;
2519 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2520 cond = COND_EXEC_TEST (PATTERN (insn));
2521 else
2522 cond = NULL_RTX;
2524 if (subtargets || code == SET
2525 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2526 && REGNO (target) != REGNO (source)))
2528 /* After arm_reorg has been called, we can't fix up expensive
2529 constants by pushing them into memory so we must synthesize
2530 them in-line, regardless of the cost. This is only likely to
2531 be more costly on chips that have load delay slots and we are
2532 compiling without running the scheduler (so no splitting
2533 occurred before the final instruction emission).
2535 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2537 if (!after_arm_reorg
2538 && !cond
2539 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2540 1, 0)
2541 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2542 + (code != SET))))
2544 if (code == SET)
2546 /* Currently SET is the only monadic value for CODE, all
2547 the rest are diadic. */
2548 if (TARGET_USE_MOVT)
2549 arm_emit_movpair (target, GEN_INT (val));
2550 else
2551 emit_set_insn (target, GEN_INT (val));
2553 return 1;
2555 else
2557 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2559 if (TARGET_USE_MOVT)
2560 arm_emit_movpair (temp, GEN_INT (val));
2561 else
2562 emit_set_insn (temp, GEN_INT (val));
2564 /* For MINUS, the value is subtracted from, since we never
2565 have subtraction of a constant. */
2566 if (code == MINUS)
2567 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2568 else
2569 emit_set_insn (target,
2570 gen_rtx_fmt_ee (code, mode, source, temp));
2571 return 2;
2576 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2580 /* Return the number of instructions required to synthesize the given
2581 constant, if we start emitting them from bit-position I. */
2582 static int
2583 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2585 HOST_WIDE_INT temp1;
2586 int step_size = TARGET_ARM ? 2 : 1;
2587 int num_insns = 0;
2589 gcc_assert (TARGET_ARM || i == 0);
2593 int end;
2595 if (i <= 0)
2596 i += 32;
2597 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2599 end = i - 8;
2600 if (end < 0)
2601 end += 32;
2602 temp1 = remainder & ((0x0ff << end)
2603 | ((i < end) ? (0xff >> (32 - end)) : 0));
2604 remainder &= ~temp1;
2605 num_insns++;
2606 i -= 8 - step_size;
2608 i -= step_size;
2609 } while (remainder);
2610 return num_insns;
2613 static int
2614 find_best_start (unsigned HOST_WIDE_INT remainder)
2616 int best_consecutive_zeros = 0;
2617 int i;
2618 int best_start = 0;
2620 /* If we aren't targetting ARM, the best place to start is always at
2621 the bottom. */
2622 if (! TARGET_ARM)
2623 return 0;
2625 for (i = 0; i < 32; i += 2)
2627 int consecutive_zeros = 0;
2629 if (!(remainder & (3 << i)))
2631 while ((i < 32) && !(remainder & (3 << i)))
2633 consecutive_zeros += 2;
2634 i += 2;
2636 if (consecutive_zeros > best_consecutive_zeros)
2638 best_consecutive_zeros = consecutive_zeros;
2639 best_start = i - consecutive_zeros;
2641 i -= 2;
2645 /* So long as it won't require any more insns to do so, it's
2646 desirable to emit a small constant (in bits 0...9) in the last
2647 insn. This way there is more chance that it can be combined with
2648 a later addressing insn to form a pre-indexed load or store
2649 operation. Consider:
2651 *((volatile int *)0xe0000100) = 1;
2652 *((volatile int *)0xe0000110) = 2;
2654 We want this to wind up as:
2656 mov rA, #0xe0000000
2657 mov rB, #1
2658 str rB, [rA, #0x100]
2659 mov rB, #2
2660 str rB, [rA, #0x110]
2662 rather than having to synthesize both large constants from scratch.
2664 Therefore, we calculate how many insns would be required to emit
2665 the constant starting from `best_start', and also starting from
2666 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2667 yield a shorter sequence, we may as well use zero. */
2668 if (best_start != 0
2669 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2670 && (count_insns_for_constant (remainder, 0) <=
2671 count_insns_for_constant (remainder, best_start)))
2672 best_start = 0;
2674 return best_start;
2677 /* Emit an instruction with the indicated PATTERN. If COND is
2678 non-NULL, conditionalize the execution of the instruction on COND
2679 being true. */
2681 static void
2682 emit_constant_insn (rtx cond, rtx pattern)
2684 if (cond)
2685 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2686 emit_insn (pattern);
2689 /* As above, but extra parameter GENERATE which, if clear, suppresses
2690 RTL generation. */
2691 /* ??? This needs more work for thumb2. */
2693 static int
2694 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2695 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2696 int generate)
2698 int can_invert = 0;
2699 int can_negate = 0;
2700 int final_invert = 0;
2701 int can_negate_initial = 0;
2702 int i;
2703 int num_bits_set = 0;
2704 int set_sign_bit_copies = 0;
2705 int clear_sign_bit_copies = 0;
2706 int clear_zero_bit_copies = 0;
2707 int set_zero_bit_copies = 0;
2708 int insns = 0;
2709 unsigned HOST_WIDE_INT temp1, temp2;
2710 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2711 int step_size = TARGET_ARM ? 2 : 1;
2713 /* Find out which operations are safe for a given CODE. Also do a quick
2714 check for degenerate cases; these can occur when DImode operations
2715 are split. */
2716 switch (code)
2718 case SET:
2719 can_invert = 1;
2720 can_negate = 1;
2721 break;
2723 case PLUS:
2724 can_negate = 1;
2725 can_negate_initial = 1;
2726 break;
2728 case IOR:
2729 if (remainder == 0xffffffff)
2731 if (generate)
2732 emit_constant_insn (cond,
2733 gen_rtx_SET (VOIDmode, target,
2734 GEN_INT (ARM_SIGN_EXTEND (val))));
2735 return 1;
2738 if (remainder == 0)
2740 if (reload_completed && rtx_equal_p (target, source))
2741 return 0;
2743 if (generate)
2744 emit_constant_insn (cond,
2745 gen_rtx_SET (VOIDmode, target, source));
2746 return 1;
2749 if (TARGET_THUMB2)
2750 can_invert = 1;
2751 break;
2753 case AND:
2754 if (remainder == 0)
2756 if (generate)
2757 emit_constant_insn (cond,
2758 gen_rtx_SET (VOIDmode, target, const0_rtx));
2759 return 1;
2761 if (remainder == 0xffffffff)
2763 if (reload_completed && rtx_equal_p (target, source))
2764 return 0;
2765 if (generate)
2766 emit_constant_insn (cond,
2767 gen_rtx_SET (VOIDmode, target, source));
2768 return 1;
2770 can_invert = 1;
2771 break;
2773 case XOR:
2774 if (remainder == 0)
2776 if (reload_completed && rtx_equal_p (target, source))
2777 return 0;
2778 if (generate)
2779 emit_constant_insn (cond,
2780 gen_rtx_SET (VOIDmode, target, source));
2781 return 1;
2784 if (remainder == 0xffffffff)
2786 if (generate)
2787 emit_constant_insn (cond,
2788 gen_rtx_SET (VOIDmode, target,
2789 gen_rtx_NOT (mode, source)));
2790 return 1;
2792 break;
2794 case MINUS:
2795 /* We treat MINUS as (val - source), since (source - val) is always
2796 passed as (source + (-val)). */
2797 if (remainder == 0)
2799 if (generate)
2800 emit_constant_insn (cond,
2801 gen_rtx_SET (VOIDmode, target,
2802 gen_rtx_NEG (mode, source)));
2803 return 1;
2805 if (const_ok_for_arm (val))
2807 if (generate)
2808 emit_constant_insn (cond,
2809 gen_rtx_SET (VOIDmode, target,
2810 gen_rtx_MINUS (mode, GEN_INT (val),
2811 source)));
2812 return 1;
2814 can_negate = 1;
2816 break;
2818 default:
2819 gcc_unreachable ();
2822 /* If we can do it in one insn get out quickly. */
2823 if (const_ok_for_arm (val)
2824 || (can_negate_initial && const_ok_for_arm (-val))
2825 || (can_invert && const_ok_for_arm (~val)))
2827 if (generate)
2828 emit_constant_insn (cond,
2829 gen_rtx_SET (VOIDmode, target,
2830 (source
2831 ? gen_rtx_fmt_ee (code, mode, source,
2832 GEN_INT (val))
2833 : GEN_INT (val))));
2834 return 1;
2837 /* Calculate a few attributes that may be useful for specific
2838 optimizations. */
2839 /* Count number of leading zeros. */
2840 for (i = 31; i >= 0; i--)
2842 if ((remainder & (1 << i)) == 0)
2843 clear_sign_bit_copies++;
2844 else
2845 break;
2848 /* Count number of leading 1's. */
2849 for (i = 31; i >= 0; i--)
2851 if ((remainder & (1 << i)) != 0)
2852 set_sign_bit_copies++;
2853 else
2854 break;
2857 /* Count number of trailing zero's. */
2858 for (i = 0; i <= 31; i++)
2860 if ((remainder & (1 << i)) == 0)
2861 clear_zero_bit_copies++;
2862 else
2863 break;
2866 /* Count number of trailing 1's. */
2867 for (i = 0; i <= 31; i++)
2869 if ((remainder & (1 << i)) != 0)
2870 set_zero_bit_copies++;
2871 else
2872 break;
2875 switch (code)
2877 case SET:
2878 /* See if we can use movw. */
2879 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2881 if (generate)
2882 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2883 GEN_INT (val)));
2884 return 1;
2887 /* See if we can do this by sign_extending a constant that is known
2888 to be negative. This is a good, way of doing it, since the shift
2889 may well merge into a subsequent insn. */
2890 if (set_sign_bit_copies > 1)
2892 if (const_ok_for_arm
2893 (temp1 = ARM_SIGN_EXTEND (remainder
2894 << (set_sign_bit_copies - 1))))
2896 if (generate)
2898 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2899 emit_constant_insn (cond,
2900 gen_rtx_SET (VOIDmode, new_src,
2901 GEN_INT (temp1)));
2902 emit_constant_insn (cond,
2903 gen_ashrsi3 (target, new_src,
2904 GEN_INT (set_sign_bit_copies - 1)));
2906 return 2;
2908 /* For an inverted constant, we will need to set the low bits,
2909 these will be shifted out of harm's way. */
2910 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2911 if (const_ok_for_arm (~temp1))
2913 if (generate)
2915 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2916 emit_constant_insn (cond,
2917 gen_rtx_SET (VOIDmode, new_src,
2918 GEN_INT (temp1)));
2919 emit_constant_insn (cond,
2920 gen_ashrsi3 (target, new_src,
2921 GEN_INT (set_sign_bit_copies - 1)));
2923 return 2;
2927 /* See if we can calculate the value as the difference between two
2928 valid immediates. */
2929 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2931 int topshift = clear_sign_bit_copies & ~1;
2933 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2934 & (0xff000000 >> topshift));
2936 /* If temp1 is zero, then that means the 9 most significant
2937 bits of remainder were 1 and we've caused it to overflow.
2938 When topshift is 0 we don't need to do anything since we
2939 can borrow from 'bit 32'. */
2940 if (temp1 == 0 && topshift != 0)
2941 temp1 = 0x80000000 >> (topshift - 1);
2943 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2945 if (const_ok_for_arm (temp2))
2947 if (generate)
2949 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2950 emit_constant_insn (cond,
2951 gen_rtx_SET (VOIDmode, new_src,
2952 GEN_INT (temp1)));
2953 emit_constant_insn (cond,
2954 gen_addsi3 (target, new_src,
2955 GEN_INT (-temp2)));
2958 return 2;
2962 /* See if we can generate this by setting the bottom (or the top)
2963 16 bits, and then shifting these into the other half of the
2964 word. We only look for the simplest cases, to do more would cost
2965 too much. Be careful, however, not to generate this when the
2966 alternative would take fewer insns. */
2967 if (val & 0xffff0000)
2969 temp1 = remainder & 0xffff0000;
2970 temp2 = remainder & 0x0000ffff;
2972 /* Overlaps outside this range are best done using other methods. */
2973 for (i = 9; i < 24; i++)
2975 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2976 && !const_ok_for_arm (temp2))
2978 rtx new_src = (subtargets
2979 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2980 : target);
2981 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2982 source, subtargets, generate);
2983 source = new_src;
2984 if (generate)
2985 emit_constant_insn
2986 (cond,
2987 gen_rtx_SET
2988 (VOIDmode, target,
2989 gen_rtx_IOR (mode,
2990 gen_rtx_ASHIFT (mode, source,
2991 GEN_INT (i)),
2992 source)));
2993 return insns + 1;
2997 /* Don't duplicate cases already considered. */
2998 for (i = 17; i < 24; i++)
3000 if (((temp1 | (temp1 >> i)) == remainder)
3001 && !const_ok_for_arm (temp1))
3003 rtx new_src = (subtargets
3004 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3005 : target);
3006 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3007 source, subtargets, generate);
3008 source = new_src;
3009 if (generate)
3010 emit_constant_insn
3011 (cond,
3012 gen_rtx_SET (VOIDmode, target,
3013 gen_rtx_IOR
3014 (mode,
3015 gen_rtx_LSHIFTRT (mode, source,
3016 GEN_INT (i)),
3017 source)));
3018 return insns + 1;
3022 break;
3024 case IOR:
3025 case XOR:
3026 /* If we have IOR or XOR, and the constant can be loaded in a
3027 single instruction, and we can find a temporary to put it in,
3028 then this can be done in two instructions instead of 3-4. */
3029 if (subtargets
3030 /* TARGET can't be NULL if SUBTARGETS is 0 */
3031 || (reload_completed && !reg_mentioned_p (target, source)))
3033 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3035 if (generate)
3037 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3039 emit_constant_insn (cond,
3040 gen_rtx_SET (VOIDmode, sub,
3041 GEN_INT (val)));
3042 emit_constant_insn (cond,
3043 gen_rtx_SET (VOIDmode, target,
3044 gen_rtx_fmt_ee (code, mode,
3045 source, sub)));
3047 return 2;
3051 if (code == XOR)
3052 break;
3054 /* Convert.
3055 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3056 and the remainder 0s for e.g. 0xfff00000)
3057 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3059 This can be done in 2 instructions by using shifts with mov or mvn.
3060 e.g. for
3061 x = x | 0xfff00000;
3062 we generate.
3063 mvn r0, r0, asl #12
3064 mvn r0, r0, lsr #12 */
3065 if (set_sign_bit_copies > 8
3066 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3068 if (generate)
3070 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3071 rtx shift = GEN_INT (set_sign_bit_copies);
3073 emit_constant_insn
3074 (cond,
3075 gen_rtx_SET (VOIDmode, sub,
3076 gen_rtx_NOT (mode,
3077 gen_rtx_ASHIFT (mode,
3078 source,
3079 shift))));
3080 emit_constant_insn
3081 (cond,
3082 gen_rtx_SET (VOIDmode, target,
3083 gen_rtx_NOT (mode,
3084 gen_rtx_LSHIFTRT (mode, sub,
3085 shift))));
3087 return 2;
3090 /* Convert
3091 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3093 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3095 For eg. r0 = r0 | 0xfff
3096 mvn r0, r0, lsr #12
3097 mvn r0, r0, asl #12
3100 if (set_zero_bit_copies > 8
3101 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3103 if (generate)
3105 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3106 rtx shift = GEN_INT (set_zero_bit_copies);
3108 emit_constant_insn
3109 (cond,
3110 gen_rtx_SET (VOIDmode, sub,
3111 gen_rtx_NOT (mode,
3112 gen_rtx_LSHIFTRT (mode,
3113 source,
3114 shift))));
3115 emit_constant_insn
3116 (cond,
3117 gen_rtx_SET (VOIDmode, target,
3118 gen_rtx_NOT (mode,
3119 gen_rtx_ASHIFT (mode, sub,
3120 shift))));
3122 return 2;
3125 /* This will never be reached for Thumb2 because orn is a valid
3126 instruction. This is for Thumb1 and the ARM 32 bit cases.
3128 x = y | constant (such that ~constant is a valid constant)
3129 Transform this to
3130 x = ~(~y & ~constant).
3132 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3134 if (generate)
3136 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3137 emit_constant_insn (cond,
3138 gen_rtx_SET (VOIDmode, sub,
3139 gen_rtx_NOT (mode, source)));
3140 source = sub;
3141 if (subtargets)
3142 sub = gen_reg_rtx (mode);
3143 emit_constant_insn (cond,
3144 gen_rtx_SET (VOIDmode, sub,
3145 gen_rtx_AND (mode, source,
3146 GEN_INT (temp1))));
3147 emit_constant_insn (cond,
3148 gen_rtx_SET (VOIDmode, target,
3149 gen_rtx_NOT (mode, sub)));
3151 return 3;
3153 break;
3155 case AND:
3156 /* See if two shifts will do 2 or more insn's worth of work. */
3157 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3159 HOST_WIDE_INT shift_mask = ((0xffffffff
3160 << (32 - clear_sign_bit_copies))
3161 & 0xffffffff);
3163 if ((remainder | shift_mask) != 0xffffffff)
3165 if (generate)
3167 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3168 insns = arm_gen_constant (AND, mode, cond,
3169 remainder | shift_mask,
3170 new_src, source, subtargets, 1);
3171 source = new_src;
3173 else
3175 rtx targ = subtargets ? NULL_RTX : target;
3176 insns = arm_gen_constant (AND, mode, cond,
3177 remainder | shift_mask,
3178 targ, source, subtargets, 0);
3182 if (generate)
3184 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3185 rtx shift = GEN_INT (clear_sign_bit_copies);
3187 emit_insn (gen_ashlsi3 (new_src, source, shift));
3188 emit_insn (gen_lshrsi3 (target, new_src, shift));
3191 return insns + 2;
3194 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3196 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3198 if ((remainder | shift_mask) != 0xffffffff)
3200 if (generate)
3202 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3204 insns = arm_gen_constant (AND, mode, cond,
3205 remainder | shift_mask,
3206 new_src, source, subtargets, 1);
3207 source = new_src;
3209 else
3211 rtx targ = subtargets ? NULL_RTX : target;
3213 insns = arm_gen_constant (AND, mode, cond,
3214 remainder | shift_mask,
3215 targ, source, subtargets, 0);
3219 if (generate)
3221 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3222 rtx shift = GEN_INT (clear_zero_bit_copies);
3224 emit_insn (gen_lshrsi3 (new_src, source, shift));
3225 emit_insn (gen_ashlsi3 (target, new_src, shift));
3228 return insns + 2;
3231 break;
3233 default:
3234 break;
3237 for (i = 0; i < 32; i++)
3238 if (remainder & (1 << i))
3239 num_bits_set++;
3241 if ((code == AND)
3242 || (code != IOR && can_invert && num_bits_set > 16))
3243 remainder ^= 0xffffffff;
3244 else if (code == PLUS && num_bits_set > 16)
3245 remainder = (-remainder) & 0xffffffff;
3247 /* For XOR, if more than half the bits are set and there's a sequence
3248 of more than 8 consecutive ones in the pattern then we can XOR by the
3249 inverted constant and then invert the final result; this may save an
3250 instruction and might also lead to the final mvn being merged with
3251 some other operation. */
3252 else if (code == XOR && num_bits_set > 16
3253 && (count_insns_for_constant (remainder ^ 0xffffffff,
3254 find_best_start
3255 (remainder ^ 0xffffffff))
3256 < count_insns_for_constant (remainder,
3257 find_best_start (remainder))))
3259 remainder ^= 0xffffffff;
3260 final_invert = 1;
3262 else
3264 can_invert = 0;
3265 can_negate = 0;
3268 /* Now try and find a way of doing the job in either two or three
3269 instructions.
3270 We start by looking for the largest block of zeros that are aligned on
3271 a 2-bit boundary, we then fill up the temps, wrapping around to the
3272 top of the word when we drop off the bottom.
3273 In the worst case this code should produce no more than four insns.
3274 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3275 best place to start. */
3277 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3278 the same. */
3280 /* Now start emitting the insns. */
3281 i = find_best_start (remainder);
3284 int end;
3286 if (i <= 0)
3287 i += 32;
3288 if (remainder & (3 << (i - 2)))
3290 end = i - 8;
3291 if (end < 0)
3292 end += 32;
3293 temp1 = remainder & ((0x0ff << end)
3294 | ((i < end) ? (0xff >> (32 - end)) : 0));
3295 remainder &= ~temp1;
3297 if (generate)
3299 rtx new_src, temp1_rtx;
3301 if (code == SET || code == MINUS)
3303 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3304 if (can_invert && code != MINUS)
3305 temp1 = ~temp1;
3307 else
3309 if ((final_invert || remainder) && subtargets)
3310 new_src = gen_reg_rtx (mode);
3311 else
3312 new_src = target;
3313 if (can_invert)
3314 temp1 = ~temp1;
3315 else if (can_negate)
3316 temp1 = -temp1;
3319 temp1 = trunc_int_for_mode (temp1, mode);
3320 temp1_rtx = GEN_INT (temp1);
3322 if (code == SET)
3324 else if (code == MINUS)
3325 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3326 else
3327 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3329 emit_constant_insn (cond,
3330 gen_rtx_SET (VOIDmode, new_src,
3331 temp1_rtx));
3332 source = new_src;
3335 if (code == SET)
3337 can_invert = 0;
3338 code = PLUS;
3340 else if (code == MINUS)
3341 code = PLUS;
3343 insns++;
3344 i -= 8 - step_size;
3346 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3347 shifts. */
3348 i -= step_size;
3350 while (remainder);
3353 if (final_invert)
3355 if (generate)
3356 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3357 gen_rtx_NOT (mode, source)));
3358 insns++;
3361 return insns;
3364 /* Canonicalize a comparison so that we are more likely to recognize it.
3365 This can be done for a few constant compares, where we can make the
3366 immediate value easier to load. */
3368 enum rtx_code
3369 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3371 enum machine_mode mode;
3372 unsigned HOST_WIDE_INT i, maxval;
3374 mode = GET_MODE (*op0);
3375 if (mode == VOIDmode)
3376 mode = GET_MODE (*op1);
3378 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3380 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3381 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3382 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3383 for GTU/LEU in Thumb mode. */
3384 if (mode == DImode)
3386 rtx tem;
3388 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3389 available. */
3390 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3391 return code;
3393 if (code == GT || code == LE
3394 || (!TARGET_ARM && (code == GTU || code == LEU)))
3396 /* Missing comparison. First try to use an available
3397 comparison. */
3398 if (GET_CODE (*op1) == CONST_INT)
3400 i = INTVAL (*op1);
3401 switch (code)
3403 case GT:
3404 case LE:
3405 if (i != maxval
3406 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3408 *op1 = GEN_INT (i + 1);
3409 return code == GT ? GE : LT;
3411 break;
3412 case GTU:
3413 case LEU:
3414 if (i != ~((unsigned HOST_WIDE_INT) 0)
3415 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3417 *op1 = GEN_INT (i + 1);
3418 return code == GTU ? GEU : LTU;
3420 break;
3421 default:
3422 gcc_unreachable ();
3426 /* If that did not work, reverse the condition. */
3427 tem = *op0;
3428 *op0 = *op1;
3429 *op1 = tem;
3430 return swap_condition (code);
3433 return code;
3436 /* Comparisons smaller than DImode. Only adjust comparisons against
3437 an out-of-range constant. */
3438 if (GET_CODE (*op1) != CONST_INT
3439 || const_ok_for_arm (INTVAL (*op1))
3440 || const_ok_for_arm (- INTVAL (*op1)))
3441 return code;
3443 i = INTVAL (*op1);
3445 switch (code)
3447 case EQ:
3448 case NE:
3449 return code;
3451 case GT:
3452 case LE:
3453 if (i != maxval
3454 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3456 *op1 = GEN_INT (i + 1);
3457 return code == GT ? GE : LT;
3459 break;
3461 case GE:
3462 case LT:
3463 if (i != ~maxval
3464 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3466 *op1 = GEN_INT (i - 1);
3467 return code == GE ? GT : LE;
3469 break;
3471 case GTU:
3472 case LEU:
3473 if (i != ~((unsigned HOST_WIDE_INT) 0)
3474 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3476 *op1 = GEN_INT (i + 1);
3477 return code == GTU ? GEU : LTU;
3479 break;
3481 case GEU:
3482 case LTU:
3483 if (i != 0
3484 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3486 *op1 = GEN_INT (i - 1);
3487 return code == GEU ? GTU : LEU;
3489 break;
3491 default:
3492 gcc_unreachable ();
3495 return code;
3499 /* Define how to find the value returned by a function. */
3501 static rtx
3502 arm_function_value(const_tree type, const_tree func,
3503 bool outgoing ATTRIBUTE_UNUSED)
3505 enum machine_mode mode;
3506 int unsignedp ATTRIBUTE_UNUSED;
3507 rtx r ATTRIBUTE_UNUSED;
3509 mode = TYPE_MODE (type);
3511 if (TARGET_AAPCS_BASED)
3512 return aapcs_allocate_return_reg (mode, type, func);
3514 /* Promote integer types. */
3515 if (INTEGRAL_TYPE_P (type))
3516 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3518 /* Promotes small structs returned in a register to full-word size
3519 for big-endian AAPCS. */
3520 if (arm_return_in_msb (type))
3522 HOST_WIDE_INT size = int_size_in_bytes (type);
3523 if (size % UNITS_PER_WORD != 0)
3525 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3526 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3530 return LIBCALL_VALUE (mode);
3533 static int
3534 libcall_eq (const void *p1, const void *p2)
3536 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3539 static hashval_t
3540 libcall_hash (const void *p1)
3542 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3545 static void
3546 add_libcall (htab_t htab, rtx libcall)
3548 *htab_find_slot (htab, libcall, INSERT) = libcall;
3551 static bool
3552 arm_libcall_uses_aapcs_base (const_rtx libcall)
3554 static bool init_done = false;
3555 static htab_t libcall_htab;
3557 if (!init_done)
3559 init_done = true;
3561 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3562 NULL);
3563 add_libcall (libcall_htab,
3564 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3565 add_libcall (libcall_htab,
3566 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3567 add_libcall (libcall_htab,
3568 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3569 add_libcall (libcall_htab,
3570 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3572 add_libcall (libcall_htab,
3573 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3574 add_libcall (libcall_htab,
3575 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3576 add_libcall (libcall_htab,
3577 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3578 add_libcall (libcall_htab,
3579 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3581 add_libcall (libcall_htab,
3582 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3583 add_libcall (libcall_htab,
3584 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3585 add_libcall (libcall_htab,
3586 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3587 add_libcall (libcall_htab,
3588 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3589 add_libcall (libcall_htab,
3590 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3591 add_libcall (libcall_htab,
3592 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3595 return libcall && htab_find (libcall_htab, libcall) != NULL;
3599 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3601 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3602 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3604 /* The following libcalls return their result in integer registers,
3605 even though they return a floating point value. */
3606 if (arm_libcall_uses_aapcs_base (libcall))
3607 return gen_rtx_REG (mode, ARG_REGISTER(1));
3611 return LIBCALL_VALUE (mode);
3614 /* Determine the amount of memory needed to store the possible return
3615 registers of an untyped call. */
3617 arm_apply_result_size (void)
3619 int size = 16;
3621 if (TARGET_32BIT)
3623 if (TARGET_HARD_FLOAT_ABI)
3625 if (TARGET_VFP)
3626 size += 32;
3627 if (TARGET_FPA)
3628 size += 12;
3629 if (TARGET_MAVERICK)
3630 size += 8;
3632 if (TARGET_IWMMXT_ABI)
3633 size += 8;
3636 return size;
3639 /* Decide whether TYPE should be returned in memory (true)
3640 or in a register (false). FNTYPE is the type of the function making
3641 the call. */
3642 static bool
3643 arm_return_in_memory (const_tree type, const_tree fntype)
3645 HOST_WIDE_INT size;
3647 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3649 if (TARGET_AAPCS_BASED)
3651 /* Simple, non-aggregate types (ie not including vectors and
3652 complex) are always returned in a register (or registers).
3653 We don't care about which register here, so we can short-cut
3654 some of the detail. */
3655 if (!AGGREGATE_TYPE_P (type)
3656 && TREE_CODE (type) != VECTOR_TYPE
3657 && TREE_CODE (type) != COMPLEX_TYPE)
3658 return false;
3660 /* Any return value that is no larger than one word can be
3661 returned in r0. */
3662 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3663 return false;
3665 /* Check any available co-processors to see if they accept the
3666 type as a register candidate (VFP, for example, can return
3667 some aggregates in consecutive registers). These aren't
3668 available if the call is variadic. */
3669 if (aapcs_select_return_coproc (type, fntype) >= 0)
3670 return false;
3672 /* Vector values should be returned using ARM registers, not
3673 memory (unless they're over 16 bytes, which will break since
3674 we only have four call-clobbered registers to play with). */
3675 if (TREE_CODE (type) == VECTOR_TYPE)
3676 return (size < 0 || size > (4 * UNITS_PER_WORD));
3678 /* The rest go in memory. */
3679 return true;
3682 if (TREE_CODE (type) == VECTOR_TYPE)
3683 return (size < 0 || size > (4 * UNITS_PER_WORD));
3685 if (!AGGREGATE_TYPE_P (type) &&
3686 (TREE_CODE (type) != VECTOR_TYPE))
3687 /* All simple types are returned in registers. */
3688 return false;
3690 if (arm_abi != ARM_ABI_APCS)
3692 /* ATPCS and later return aggregate types in memory only if they are
3693 larger than a word (or are variable size). */
3694 return (size < 0 || size > UNITS_PER_WORD);
3697 /* For the arm-wince targets we choose to be compatible with Microsoft's
3698 ARM and Thumb compilers, which always return aggregates in memory. */
3699 #ifndef ARM_WINCE
3700 /* All structures/unions bigger than one word are returned in memory.
3701 Also catch the case where int_size_in_bytes returns -1. In this case
3702 the aggregate is either huge or of variable size, and in either case
3703 we will want to return it via memory and not in a register. */
3704 if (size < 0 || size > UNITS_PER_WORD)
3705 return true;
3707 if (TREE_CODE (type) == RECORD_TYPE)
3709 tree field;
3711 /* For a struct the APCS says that we only return in a register
3712 if the type is 'integer like' and every addressable element
3713 has an offset of zero. For practical purposes this means
3714 that the structure can have at most one non bit-field element
3715 and that this element must be the first one in the structure. */
3717 /* Find the first field, ignoring non FIELD_DECL things which will
3718 have been created by C++. */
3719 for (field = TYPE_FIELDS (type);
3720 field && TREE_CODE (field) != FIELD_DECL;
3721 field = DECL_CHAIN (field))
3722 continue;
3724 if (field == NULL)
3725 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3727 /* Check that the first field is valid for returning in a register. */
3729 /* ... Floats are not allowed */
3730 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3731 return true;
3733 /* ... Aggregates that are not themselves valid for returning in
3734 a register are not allowed. */
3735 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3736 return true;
3738 /* Now check the remaining fields, if any. Only bitfields are allowed,
3739 since they are not addressable. */
3740 for (field = DECL_CHAIN (field);
3741 field;
3742 field = DECL_CHAIN (field))
3744 if (TREE_CODE (field) != FIELD_DECL)
3745 continue;
3747 if (!DECL_BIT_FIELD_TYPE (field))
3748 return true;
3751 return false;
3754 if (TREE_CODE (type) == UNION_TYPE)
3756 tree field;
3758 /* Unions can be returned in registers if every element is
3759 integral, or can be returned in an integer register. */
3760 for (field = TYPE_FIELDS (type);
3761 field;
3762 field = DECL_CHAIN (field))
3764 if (TREE_CODE (field) != FIELD_DECL)
3765 continue;
3767 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3768 return true;
3770 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3771 return true;
3774 return false;
3776 #endif /* not ARM_WINCE */
3778 /* Return all other types in memory. */
3779 return true;
3782 /* Indicate whether or not words of a double are in big-endian order. */
3785 arm_float_words_big_endian (void)
3787 if (TARGET_MAVERICK)
3788 return 0;
3790 /* For FPA, float words are always big-endian. For VFP, floats words
3791 follow the memory system mode. */
3793 if (TARGET_FPA)
3795 return 1;
3798 if (TARGET_VFP)
3799 return (TARGET_BIG_END ? 1 : 0);
3801 return 1;
3804 const struct pcs_attribute_arg
3806 const char *arg;
3807 enum arm_pcs value;
3808 } pcs_attribute_args[] =
3810 {"aapcs", ARM_PCS_AAPCS},
3811 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3812 #if 0
3813 /* We could recognize these, but changes would be needed elsewhere
3814 * to implement them. */
3815 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3816 {"atpcs", ARM_PCS_ATPCS},
3817 {"apcs", ARM_PCS_APCS},
3818 #endif
3819 {NULL, ARM_PCS_UNKNOWN}
3822 static enum arm_pcs
3823 arm_pcs_from_attribute (tree attr)
3825 const struct pcs_attribute_arg *ptr;
3826 const char *arg;
3828 /* Get the value of the argument. */
3829 if (TREE_VALUE (attr) == NULL_TREE
3830 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3831 return ARM_PCS_UNKNOWN;
3833 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3835 /* Check it against the list of known arguments. */
3836 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3837 if (streq (arg, ptr->arg))
3838 return ptr->value;
3840 /* An unrecognized interrupt type. */
3841 return ARM_PCS_UNKNOWN;
3844 /* Get the PCS variant to use for this call. TYPE is the function's type
3845 specification, DECL is the specific declartion. DECL may be null if
3846 the call could be indirect or if this is a library call. */
3847 static enum arm_pcs
3848 arm_get_pcs_model (const_tree type, const_tree decl)
3850 bool user_convention = false;
3851 enum arm_pcs user_pcs = arm_pcs_default;
3852 tree attr;
3854 gcc_assert (type);
3856 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3857 if (attr)
3859 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3860 user_convention = true;
3863 if (TARGET_AAPCS_BASED)
3865 /* Detect varargs functions. These always use the base rules
3866 (no argument is ever a candidate for a co-processor
3867 register). */
3868 bool base_rules = stdarg_p (type);
3870 if (user_convention)
3872 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3873 sorry ("non-AAPCS derived PCS variant");
3874 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3875 error ("variadic functions must use the base AAPCS variant");
3878 if (base_rules)
3879 return ARM_PCS_AAPCS;
3880 else if (user_convention)
3881 return user_pcs;
3882 else if (decl && flag_unit_at_a_time)
3884 /* Local functions never leak outside this compilation unit,
3885 so we are free to use whatever conventions are
3886 appropriate. */
3887 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3888 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3889 if (i && i->local)
3890 return ARM_PCS_AAPCS_LOCAL;
3893 else if (user_convention && user_pcs != arm_pcs_default)
3894 sorry ("PCS variant");
3896 /* For everything else we use the target's default. */
3897 return arm_pcs_default;
3901 static void
3902 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3903 const_tree fntype ATTRIBUTE_UNUSED,
3904 rtx libcall ATTRIBUTE_UNUSED,
3905 const_tree fndecl ATTRIBUTE_UNUSED)
3907 /* Record the unallocated VFP registers. */
3908 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3909 pcum->aapcs_vfp_reg_alloc = 0;
3912 /* Walk down the type tree of TYPE counting consecutive base elements.
3913 If *MODEP is VOIDmode, then set it to the first valid floating point
3914 type. If a non-floating point type is found, or if a floating point
3915 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3916 otherwise return the count in the sub-tree. */
3917 static int
3918 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3920 enum machine_mode mode;
3921 HOST_WIDE_INT size;
3923 switch (TREE_CODE (type))
3925 case REAL_TYPE:
3926 mode = TYPE_MODE (type);
3927 if (mode != DFmode && mode != SFmode)
3928 return -1;
3930 if (*modep == VOIDmode)
3931 *modep = mode;
3933 if (*modep == mode)
3934 return 1;
3936 break;
3938 case COMPLEX_TYPE:
3939 mode = TYPE_MODE (TREE_TYPE (type));
3940 if (mode != DFmode && mode != SFmode)
3941 return -1;
3943 if (*modep == VOIDmode)
3944 *modep = mode;
3946 if (*modep == mode)
3947 return 2;
3949 break;
3951 case VECTOR_TYPE:
3952 /* Use V2SImode and V4SImode as representatives of all 64-bit
3953 and 128-bit vector types, whether or not those modes are
3954 supported with the present options. */
3955 size = int_size_in_bytes (type);
3956 switch (size)
3958 case 8:
3959 mode = V2SImode;
3960 break;
3961 case 16:
3962 mode = V4SImode;
3963 break;
3964 default:
3965 return -1;
3968 if (*modep == VOIDmode)
3969 *modep = mode;
3971 /* Vector modes are considered to be opaque: two vectors are
3972 equivalent for the purposes of being homogeneous aggregates
3973 if they are the same size. */
3974 if (*modep == mode)
3975 return 1;
3977 break;
3979 case ARRAY_TYPE:
3981 int count;
3982 tree index = TYPE_DOMAIN (type);
3984 /* Can't handle incomplete types. */
3985 if (!COMPLETE_TYPE_P(type))
3986 return -1;
3988 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3989 if (count == -1
3990 || !index
3991 || !TYPE_MAX_VALUE (index)
3992 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3993 || !TYPE_MIN_VALUE (index)
3994 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3995 || count < 0)
3996 return -1;
3998 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3999 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4001 /* There must be no padding. */
4002 if (!host_integerp (TYPE_SIZE (type), 1)
4003 || (tree_low_cst (TYPE_SIZE (type), 1)
4004 != count * GET_MODE_BITSIZE (*modep)))
4005 return -1;
4007 return count;
4010 case RECORD_TYPE:
4012 int count = 0;
4013 int sub_count;
4014 tree field;
4016 /* Can't handle incomplete types. */
4017 if (!COMPLETE_TYPE_P(type))
4018 return -1;
4020 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4022 if (TREE_CODE (field) != FIELD_DECL)
4023 continue;
4025 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4026 if (sub_count < 0)
4027 return -1;
4028 count += sub_count;
4031 /* There must be no padding. */
4032 if (!host_integerp (TYPE_SIZE (type), 1)
4033 || (tree_low_cst (TYPE_SIZE (type), 1)
4034 != count * GET_MODE_BITSIZE (*modep)))
4035 return -1;
4037 return count;
4040 case UNION_TYPE:
4041 case QUAL_UNION_TYPE:
4043 /* These aren't very interesting except in a degenerate case. */
4044 int count = 0;
4045 int sub_count;
4046 tree field;
4048 /* Can't handle incomplete types. */
4049 if (!COMPLETE_TYPE_P(type))
4050 return -1;
4052 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4054 if (TREE_CODE (field) != FIELD_DECL)
4055 continue;
4057 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4058 if (sub_count < 0)
4059 return -1;
4060 count = count > sub_count ? count : sub_count;
4063 /* There must be no padding. */
4064 if (!host_integerp (TYPE_SIZE (type), 1)
4065 || (tree_low_cst (TYPE_SIZE (type), 1)
4066 != count * GET_MODE_BITSIZE (*modep)))
4067 return -1;
4069 return count;
4072 default:
4073 break;
4076 return -1;
4079 /* Return true if PCS_VARIANT should use VFP registers. */
4080 static bool
4081 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4083 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4085 static bool seen_thumb1_vfp = false;
4087 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4089 sorry ("Thumb-1 hard-float VFP ABI");
4090 /* sorry() is not immediately fatal, so only display this once. */
4091 seen_thumb1_vfp = true;
4094 return true;
4097 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4098 return false;
4100 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4101 (TARGET_VFP_DOUBLE || !is_double));
4104 static bool
4105 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4106 enum machine_mode mode, const_tree type,
4107 enum machine_mode *base_mode, int *count)
4109 enum machine_mode new_mode = VOIDmode;
4111 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4112 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4113 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4115 *count = 1;
4116 new_mode = mode;
4118 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4120 *count = 2;
4121 new_mode = (mode == DCmode ? DFmode : SFmode);
4123 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4125 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4127 if (ag_count > 0 && ag_count <= 4)
4128 *count = ag_count;
4129 else
4130 return false;
4132 else
4133 return false;
4136 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4137 return false;
4139 *base_mode = new_mode;
4140 return true;
4143 static bool
4144 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4145 enum machine_mode mode, const_tree type)
4147 int count ATTRIBUTE_UNUSED;
4148 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4150 if (!use_vfp_abi (pcs_variant, false))
4151 return false;
4152 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4153 &ag_mode, &count);
4156 static bool
4157 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4158 const_tree type)
4160 if (!use_vfp_abi (pcum->pcs_variant, false))
4161 return false;
4163 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4164 &pcum->aapcs_vfp_rmode,
4165 &pcum->aapcs_vfp_rcount);
4168 static bool
4169 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4170 const_tree type ATTRIBUTE_UNUSED)
4172 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4173 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4174 int regno;
4176 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4177 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4179 pcum->aapcs_vfp_reg_alloc = mask << regno;
4180 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4182 int i;
4183 int rcount = pcum->aapcs_vfp_rcount;
4184 int rshift = shift;
4185 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4186 rtx par;
4187 if (!TARGET_NEON)
4189 /* Avoid using unsupported vector modes. */
4190 if (rmode == V2SImode)
4191 rmode = DImode;
4192 else if (rmode == V4SImode)
4194 rmode = DImode;
4195 rcount *= 2;
4196 rshift /= 2;
4199 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4200 for (i = 0; i < rcount; i++)
4202 rtx tmp = gen_rtx_REG (rmode,
4203 FIRST_VFP_REGNUM + regno + i * rshift);
4204 tmp = gen_rtx_EXPR_LIST
4205 (VOIDmode, tmp,
4206 GEN_INT (i * GET_MODE_SIZE (rmode)));
4207 XVECEXP (par, 0, i) = tmp;
4210 pcum->aapcs_reg = par;
4212 else
4213 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4214 return true;
4216 return false;
4219 static rtx
4220 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4221 enum machine_mode mode,
4222 const_tree type ATTRIBUTE_UNUSED)
4224 if (!use_vfp_abi (pcs_variant, false))
4225 return false;
4227 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4229 int count;
4230 enum machine_mode ag_mode;
4231 int i;
4232 rtx par;
4233 int shift;
4235 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4236 &ag_mode, &count);
4238 if (!TARGET_NEON)
4240 if (ag_mode == V2SImode)
4241 ag_mode = DImode;
4242 else if (ag_mode == V4SImode)
4244 ag_mode = DImode;
4245 count *= 2;
4248 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4249 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4250 for (i = 0; i < count; i++)
4252 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4253 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4254 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4255 XVECEXP (par, 0, i) = tmp;
4258 return par;
4261 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4264 static void
4265 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4266 enum machine_mode mode ATTRIBUTE_UNUSED,
4267 const_tree type ATTRIBUTE_UNUSED)
4269 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4270 pcum->aapcs_vfp_reg_alloc = 0;
4271 return;
4274 #define AAPCS_CP(X) \
4276 aapcs_ ## X ## _cum_init, \
4277 aapcs_ ## X ## _is_call_candidate, \
4278 aapcs_ ## X ## _allocate, \
4279 aapcs_ ## X ## _is_return_candidate, \
4280 aapcs_ ## X ## _allocate_return_reg, \
4281 aapcs_ ## X ## _advance \
4284 /* Table of co-processors that can be used to pass arguments in
4285 registers. Idealy no arugment should be a candidate for more than
4286 one co-processor table entry, but the table is processed in order
4287 and stops after the first match. If that entry then fails to put
4288 the argument into a co-processor register, the argument will go on
4289 the stack. */
4290 static struct
4292 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4293 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4295 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4296 BLKmode) is a candidate for this co-processor's registers; this
4297 function should ignore any position-dependent state in
4298 CUMULATIVE_ARGS and only use call-type dependent information. */
4299 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4301 /* Return true if the argument does get a co-processor register; it
4302 should set aapcs_reg to an RTX of the register allocated as is
4303 required for a return from FUNCTION_ARG. */
4304 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4306 /* Return true if a result of mode MODE (or type TYPE if MODE is
4307 BLKmode) is can be returned in this co-processor's registers. */
4308 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4310 /* Allocate and return an RTX element to hold the return type of a
4311 call, this routine must not fail and will only be called if
4312 is_return_candidate returned true with the same parameters. */
4313 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4315 /* Finish processing this argument and prepare to start processing
4316 the next one. */
4317 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4318 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4320 AAPCS_CP(vfp)
4323 #undef AAPCS_CP
4325 static int
4326 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4327 const_tree type)
4329 int i;
4331 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4332 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4333 return i;
4335 return -1;
4338 static int
4339 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4341 /* We aren't passed a decl, so we can't check that a call is local.
4342 However, it isn't clear that that would be a win anyway, since it
4343 might limit some tail-calling opportunities. */
4344 enum arm_pcs pcs_variant;
4346 if (fntype)
4348 const_tree fndecl = NULL_TREE;
4350 if (TREE_CODE (fntype) == FUNCTION_DECL)
4352 fndecl = fntype;
4353 fntype = TREE_TYPE (fntype);
4356 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4358 else
4359 pcs_variant = arm_pcs_default;
4361 if (pcs_variant != ARM_PCS_AAPCS)
4363 int i;
4365 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4366 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4367 TYPE_MODE (type),
4368 type))
4369 return i;
4371 return -1;
4374 static rtx
4375 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4376 const_tree fntype)
4378 /* We aren't passed a decl, so we can't check that a call is local.
4379 However, it isn't clear that that would be a win anyway, since it
4380 might limit some tail-calling opportunities. */
4381 enum arm_pcs pcs_variant;
4382 int unsignedp ATTRIBUTE_UNUSED;
4384 if (fntype)
4386 const_tree fndecl = NULL_TREE;
4388 if (TREE_CODE (fntype) == FUNCTION_DECL)
4390 fndecl = fntype;
4391 fntype = TREE_TYPE (fntype);
4394 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4396 else
4397 pcs_variant = arm_pcs_default;
4399 /* Promote integer types. */
4400 if (type && INTEGRAL_TYPE_P (type))
4401 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4403 if (pcs_variant != ARM_PCS_AAPCS)
4405 int i;
4407 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4408 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4409 type))
4410 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4411 mode, type);
4414 /* Promotes small structs returned in a register to full-word size
4415 for big-endian AAPCS. */
4416 if (type && arm_return_in_msb (type))
4418 HOST_WIDE_INT size = int_size_in_bytes (type);
4419 if (size % UNITS_PER_WORD != 0)
4421 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4422 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4426 return gen_rtx_REG (mode, R0_REGNUM);
4430 aapcs_libcall_value (enum machine_mode mode)
4432 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4435 /* Lay out a function argument using the AAPCS rules. The rule
4436 numbers referred to here are those in the AAPCS. */
4437 static void
4438 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4439 const_tree type, bool named)
4441 int nregs, nregs2;
4442 int ncrn;
4444 /* We only need to do this once per argument. */
4445 if (pcum->aapcs_arg_processed)
4446 return;
4448 pcum->aapcs_arg_processed = true;
4450 /* Special case: if named is false then we are handling an incoming
4451 anonymous argument which is on the stack. */
4452 if (!named)
4453 return;
4455 /* Is this a potential co-processor register candidate? */
4456 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4458 int slot = aapcs_select_call_coproc (pcum, mode, type);
4459 pcum->aapcs_cprc_slot = slot;
4461 /* We don't have to apply any of the rules from part B of the
4462 preparation phase, these are handled elsewhere in the
4463 compiler. */
4465 if (slot >= 0)
4467 /* A Co-processor register candidate goes either in its own
4468 class of registers or on the stack. */
4469 if (!pcum->aapcs_cprc_failed[slot])
4471 /* C1.cp - Try to allocate the argument to co-processor
4472 registers. */
4473 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4474 return;
4476 /* C2.cp - Put the argument on the stack and note that we
4477 can't assign any more candidates in this slot. We also
4478 need to note that we have allocated stack space, so that
4479 we won't later try to split a non-cprc candidate between
4480 core registers and the stack. */
4481 pcum->aapcs_cprc_failed[slot] = true;
4482 pcum->can_split = false;
4485 /* We didn't get a register, so this argument goes on the
4486 stack. */
4487 gcc_assert (pcum->can_split == false);
4488 return;
4492 /* C3 - For double-word aligned arguments, round the NCRN up to the
4493 next even number. */
4494 ncrn = pcum->aapcs_ncrn;
4495 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4496 ncrn++;
4498 nregs = ARM_NUM_REGS2(mode, type);
4500 /* Sigh, this test should really assert that nregs > 0, but a GCC
4501 extension allows empty structs and then gives them empty size; it
4502 then allows such a structure to be passed by value. For some of
4503 the code below we have to pretend that such an argument has
4504 non-zero size so that we 'locate' it correctly either in
4505 registers or on the stack. */
4506 gcc_assert (nregs >= 0);
4508 nregs2 = nregs ? nregs : 1;
4510 /* C4 - Argument fits entirely in core registers. */
4511 if (ncrn + nregs2 <= NUM_ARG_REGS)
4513 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4514 pcum->aapcs_next_ncrn = ncrn + nregs;
4515 return;
4518 /* C5 - Some core registers left and there are no arguments already
4519 on the stack: split this argument between the remaining core
4520 registers and the stack. */
4521 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4523 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4524 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4525 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4526 return;
4529 /* C6 - NCRN is set to 4. */
4530 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4532 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4533 return;
4536 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4537 for a call to a function whose data type is FNTYPE.
4538 For a library call, FNTYPE is NULL. */
4539 void
4540 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4541 rtx libname,
4542 tree fndecl ATTRIBUTE_UNUSED)
4544 /* Long call handling. */
4545 if (fntype)
4546 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4547 else
4548 pcum->pcs_variant = arm_pcs_default;
4550 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4552 if (arm_libcall_uses_aapcs_base (libname))
4553 pcum->pcs_variant = ARM_PCS_AAPCS;
4555 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4556 pcum->aapcs_reg = NULL_RTX;
4557 pcum->aapcs_partial = 0;
4558 pcum->aapcs_arg_processed = false;
4559 pcum->aapcs_cprc_slot = -1;
4560 pcum->can_split = true;
4562 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4564 int i;
4566 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4568 pcum->aapcs_cprc_failed[i] = false;
4569 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4572 return;
4575 /* Legacy ABIs */
4577 /* On the ARM, the offset starts at 0. */
4578 pcum->nregs = 0;
4579 pcum->iwmmxt_nregs = 0;
4580 pcum->can_split = true;
4582 /* Varargs vectors are treated the same as long long.
4583 named_count avoids having to change the way arm handles 'named' */
4584 pcum->named_count = 0;
4585 pcum->nargs = 0;
4587 if (TARGET_REALLY_IWMMXT && fntype)
4589 tree fn_arg;
4591 for (fn_arg = TYPE_ARG_TYPES (fntype);
4592 fn_arg;
4593 fn_arg = TREE_CHAIN (fn_arg))
4594 pcum->named_count += 1;
4596 if (! pcum->named_count)
4597 pcum->named_count = INT_MAX;
4602 /* Return true if mode/type need doubleword alignment. */
4603 static bool
4604 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4606 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4607 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4611 /* Determine where to put an argument to a function.
4612 Value is zero to push the argument on the stack,
4613 or a hard register in which to store the argument.
4615 MODE is the argument's machine mode.
4616 TYPE is the data type of the argument (as a tree).
4617 This is null for libcalls where that information may
4618 not be available.
4619 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4620 the preceding args and about the function being called.
4621 NAMED is nonzero if this argument is a named parameter
4622 (otherwise it is an extra parameter matching an ellipsis).
4624 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4625 other arguments are passed on the stack. If (NAMED == 0) (which happens
4626 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4627 defined), say it is passed in the stack (function_prologue will
4628 indeed make it pass in the stack if necessary). */
4630 static rtx
4631 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4632 const_tree type, bool named)
4634 int nregs;
4636 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4637 a call insn (op3 of a call_value insn). */
4638 if (mode == VOIDmode)
4639 return const0_rtx;
4641 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4643 aapcs_layout_arg (pcum, mode, type, named);
4644 return pcum->aapcs_reg;
4647 /* Varargs vectors are treated the same as long long.
4648 named_count avoids having to change the way arm handles 'named' */
4649 if (TARGET_IWMMXT_ABI
4650 && arm_vector_mode_supported_p (mode)
4651 && pcum->named_count > pcum->nargs + 1)
4653 if (pcum->iwmmxt_nregs <= 9)
4654 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4655 else
4657 pcum->can_split = false;
4658 return NULL_RTX;
4662 /* Put doubleword aligned quantities in even register pairs. */
4663 if (pcum->nregs & 1
4664 && ARM_DOUBLEWORD_ALIGN
4665 && arm_needs_doubleword_align (mode, type))
4666 pcum->nregs++;
4668 /* Only allow splitting an arg between regs and memory if all preceding
4669 args were allocated to regs. For args passed by reference we only count
4670 the reference pointer. */
4671 if (pcum->can_split)
4672 nregs = 1;
4673 else
4674 nregs = ARM_NUM_REGS2 (mode, type);
4676 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4677 return NULL_RTX;
4679 return gen_rtx_REG (mode, pcum->nregs);
4682 static unsigned int
4683 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4685 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4686 ? DOUBLEWORD_ALIGNMENT
4687 : PARM_BOUNDARY);
4690 static int
4691 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4692 tree type, bool named)
4694 int nregs = pcum->nregs;
4696 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4698 aapcs_layout_arg (pcum, mode, type, named);
4699 return pcum->aapcs_partial;
4702 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4703 return 0;
4705 if (NUM_ARG_REGS > nregs
4706 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4707 && pcum->can_split)
4708 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4710 return 0;
4713 /* Update the data in PCUM to advance over an argument
4714 of mode MODE and data type TYPE.
4715 (TYPE is null for libcalls where that information may not be available.) */
4717 static void
4718 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4719 const_tree type, bool named)
4721 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4723 aapcs_layout_arg (pcum, mode, type, named);
4725 if (pcum->aapcs_cprc_slot >= 0)
4727 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4728 type);
4729 pcum->aapcs_cprc_slot = -1;
4732 /* Generic stuff. */
4733 pcum->aapcs_arg_processed = false;
4734 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4735 pcum->aapcs_reg = NULL_RTX;
4736 pcum->aapcs_partial = 0;
4738 else
4740 pcum->nargs += 1;
4741 if (arm_vector_mode_supported_p (mode)
4742 && pcum->named_count > pcum->nargs
4743 && TARGET_IWMMXT_ABI)
4744 pcum->iwmmxt_nregs += 1;
4745 else
4746 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4750 /* Variable sized types are passed by reference. This is a GCC
4751 extension to the ARM ABI. */
4753 static bool
4754 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4755 enum machine_mode mode ATTRIBUTE_UNUSED,
4756 const_tree type, bool named ATTRIBUTE_UNUSED)
4758 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4761 /* Encode the current state of the #pragma [no_]long_calls. */
4762 typedef enum
4764 OFF, /* No #pragma [no_]long_calls is in effect. */
4765 LONG, /* #pragma long_calls is in effect. */
4766 SHORT /* #pragma no_long_calls is in effect. */
4767 } arm_pragma_enum;
4769 static arm_pragma_enum arm_pragma_long_calls = OFF;
4771 void
4772 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4774 arm_pragma_long_calls = LONG;
4777 void
4778 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4780 arm_pragma_long_calls = SHORT;
4783 void
4784 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4786 arm_pragma_long_calls = OFF;
4789 /* Handle an attribute requiring a FUNCTION_DECL;
4790 arguments as in struct attribute_spec.handler. */
4791 static tree
4792 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4793 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4795 if (TREE_CODE (*node) != FUNCTION_DECL)
4797 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4798 name);
4799 *no_add_attrs = true;
4802 return NULL_TREE;
4805 /* Handle an "interrupt" or "isr" attribute;
4806 arguments as in struct attribute_spec.handler. */
4807 static tree
4808 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4809 bool *no_add_attrs)
4811 if (DECL_P (*node))
4813 if (TREE_CODE (*node) != FUNCTION_DECL)
4815 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4816 name);
4817 *no_add_attrs = true;
4819 /* FIXME: the argument if any is checked for type attributes;
4820 should it be checked for decl ones? */
4822 else
4824 if (TREE_CODE (*node) == FUNCTION_TYPE
4825 || TREE_CODE (*node) == METHOD_TYPE)
4827 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4829 warning (OPT_Wattributes, "%qE attribute ignored",
4830 name);
4831 *no_add_attrs = true;
4834 else if (TREE_CODE (*node) == POINTER_TYPE
4835 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4836 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4837 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4839 *node = build_variant_type_copy (*node);
4840 TREE_TYPE (*node) = build_type_attribute_variant
4841 (TREE_TYPE (*node),
4842 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4843 *no_add_attrs = true;
4845 else
4847 /* Possibly pass this attribute on from the type to a decl. */
4848 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4849 | (int) ATTR_FLAG_FUNCTION_NEXT
4850 | (int) ATTR_FLAG_ARRAY_NEXT))
4852 *no_add_attrs = true;
4853 return tree_cons (name, args, NULL_TREE);
4855 else
4857 warning (OPT_Wattributes, "%qE attribute ignored",
4858 name);
4863 return NULL_TREE;
4866 /* Handle a "pcs" attribute; arguments as in struct
4867 attribute_spec.handler. */
4868 static tree
4869 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4870 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4872 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4874 warning (OPT_Wattributes, "%qE attribute ignored", name);
4875 *no_add_attrs = true;
4877 return NULL_TREE;
4880 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4881 /* Handle the "notshared" attribute. This attribute is another way of
4882 requesting hidden visibility. ARM's compiler supports
4883 "__declspec(notshared)"; we support the same thing via an
4884 attribute. */
4886 static tree
4887 arm_handle_notshared_attribute (tree *node,
4888 tree name ATTRIBUTE_UNUSED,
4889 tree args ATTRIBUTE_UNUSED,
4890 int flags ATTRIBUTE_UNUSED,
4891 bool *no_add_attrs)
4893 tree decl = TYPE_NAME (*node);
4895 if (decl)
4897 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4898 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4899 *no_add_attrs = false;
4901 return NULL_TREE;
4903 #endif
4905 /* Return 0 if the attributes for two types are incompatible, 1 if they
4906 are compatible, and 2 if they are nearly compatible (which causes a
4907 warning to be generated). */
4908 static int
4909 arm_comp_type_attributes (const_tree type1, const_tree type2)
4911 int l1, l2, s1, s2;
4913 /* Check for mismatch of non-default calling convention. */
4914 if (TREE_CODE (type1) != FUNCTION_TYPE)
4915 return 1;
4917 /* Check for mismatched call attributes. */
4918 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4919 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4920 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4921 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4923 /* Only bother to check if an attribute is defined. */
4924 if (l1 | l2 | s1 | s2)
4926 /* If one type has an attribute, the other must have the same attribute. */
4927 if ((l1 != l2) || (s1 != s2))
4928 return 0;
4930 /* Disallow mixed attributes. */
4931 if ((l1 & s2) || (l2 & s1))
4932 return 0;
4935 /* Check for mismatched ISR attribute. */
4936 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4937 if (! l1)
4938 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4939 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4940 if (! l2)
4941 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4942 if (l1 != l2)
4943 return 0;
4945 return 1;
4948 /* Assigns default attributes to newly defined type. This is used to
4949 set short_call/long_call attributes for function types of
4950 functions defined inside corresponding #pragma scopes. */
4951 static void
4952 arm_set_default_type_attributes (tree type)
4954 /* Add __attribute__ ((long_call)) to all functions, when
4955 inside #pragma long_calls or __attribute__ ((short_call)),
4956 when inside #pragma no_long_calls. */
4957 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4959 tree type_attr_list, attr_name;
4960 type_attr_list = TYPE_ATTRIBUTES (type);
4962 if (arm_pragma_long_calls == LONG)
4963 attr_name = get_identifier ("long_call");
4964 else if (arm_pragma_long_calls == SHORT)
4965 attr_name = get_identifier ("short_call");
4966 else
4967 return;
4969 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4970 TYPE_ATTRIBUTES (type) = type_attr_list;
4974 /* Return true if DECL is known to be linked into section SECTION. */
4976 static bool
4977 arm_function_in_section_p (tree decl, section *section)
4979 /* We can only be certain about functions defined in the same
4980 compilation unit. */
4981 if (!TREE_STATIC (decl))
4982 return false;
4984 /* Make sure that SYMBOL always binds to the definition in this
4985 compilation unit. */
4986 if (!targetm.binds_local_p (decl))
4987 return false;
4989 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4990 if (!DECL_SECTION_NAME (decl))
4992 /* Make sure that we will not create a unique section for DECL. */
4993 if (flag_function_sections || DECL_ONE_ONLY (decl))
4994 return false;
4997 return function_section (decl) == section;
5000 /* Return nonzero if a 32-bit "long_call" should be generated for
5001 a call from the current function to DECL. We generate a long_call
5002 if the function:
5004 a. has an __attribute__((long call))
5005 or b. is within the scope of a #pragma long_calls
5006 or c. the -mlong-calls command line switch has been specified
5008 However we do not generate a long call if the function:
5010 d. has an __attribute__ ((short_call))
5011 or e. is inside the scope of a #pragma no_long_calls
5012 or f. is defined in the same section as the current function. */
5014 bool
5015 arm_is_long_call_p (tree decl)
5017 tree attrs;
5019 if (!decl)
5020 return TARGET_LONG_CALLS;
5022 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5023 if (lookup_attribute ("short_call", attrs))
5024 return false;
5026 /* For "f", be conservative, and only cater for cases in which the
5027 whole of the current function is placed in the same section. */
5028 if (!flag_reorder_blocks_and_partition
5029 && TREE_CODE (decl) == FUNCTION_DECL
5030 && arm_function_in_section_p (decl, current_function_section ()))
5031 return false;
5033 if (lookup_attribute ("long_call", attrs))
5034 return true;
5036 return TARGET_LONG_CALLS;
5039 /* Return nonzero if it is ok to make a tail-call to DECL. */
5040 static bool
5041 arm_function_ok_for_sibcall (tree decl, tree exp)
5043 unsigned long func_type;
5045 if (cfun->machine->sibcall_blocked)
5046 return false;
5048 /* Never tailcall something for which we have no decl, or if we
5049 are generating code for Thumb-1. */
5050 if (decl == NULL || TARGET_THUMB1)
5051 return false;
5053 /* The PIC register is live on entry to VxWorks PLT entries, so we
5054 must make the call before restoring the PIC register. */
5055 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5056 return false;
5058 /* Cannot tail-call to long calls, since these are out of range of
5059 a branch instruction. */
5060 if (arm_is_long_call_p (decl))
5061 return false;
5063 /* If we are interworking and the function is not declared static
5064 then we can't tail-call it unless we know that it exists in this
5065 compilation unit (since it might be a Thumb routine). */
5066 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5067 return false;
5069 func_type = arm_current_func_type ();
5070 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5071 if (IS_INTERRUPT (func_type))
5072 return false;
5074 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5076 /* Check that the return value locations are the same. For
5077 example that we aren't returning a value from the sibling in
5078 a VFP register but then need to transfer it to a core
5079 register. */
5080 rtx a, b;
5082 a = arm_function_value (TREE_TYPE (exp), decl, false);
5083 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5084 cfun->decl, false);
5085 if (!rtx_equal_p (a, b))
5086 return false;
5089 /* Never tailcall if function may be called with a misaligned SP. */
5090 if (IS_STACKALIGN (func_type))
5091 return false;
5093 /* Everything else is ok. */
5094 return true;
5098 /* Addressing mode support functions. */
5100 /* Return nonzero if X is a legitimate immediate operand when compiling
5101 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5103 legitimate_pic_operand_p (rtx x)
5105 if (GET_CODE (x) == SYMBOL_REF
5106 || (GET_CODE (x) == CONST
5107 && GET_CODE (XEXP (x, 0)) == PLUS
5108 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5109 return 0;
5111 return 1;
5114 /* Record that the current function needs a PIC register. Initialize
5115 cfun->machine->pic_reg if we have not already done so. */
5117 static void
5118 require_pic_register (void)
5120 /* A lot of the logic here is made obscure by the fact that this
5121 routine gets called as part of the rtx cost estimation process.
5122 We don't want those calls to affect any assumptions about the real
5123 function; and further, we can't call entry_of_function() until we
5124 start the real expansion process. */
5125 if (!crtl->uses_pic_offset_table)
5127 gcc_assert (can_create_pseudo_p ());
5128 if (arm_pic_register != INVALID_REGNUM)
5130 if (!cfun->machine->pic_reg)
5131 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5133 /* Play games to avoid marking the function as needing pic
5134 if we are being called as part of the cost-estimation
5135 process. */
5136 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5137 crtl->uses_pic_offset_table = 1;
5139 else
5141 rtx seq, insn;
5143 if (!cfun->machine->pic_reg)
5144 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5146 /* Play games to avoid marking the function as needing pic
5147 if we are being called as part of the cost-estimation
5148 process. */
5149 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5151 crtl->uses_pic_offset_table = 1;
5152 start_sequence ();
5154 arm_load_pic_register (0UL);
5156 seq = get_insns ();
5157 end_sequence ();
5159 for (insn = seq; insn; insn = NEXT_INSN (insn))
5160 if (INSN_P (insn))
5161 INSN_LOCATOR (insn) = prologue_locator;
5163 /* We can be called during expansion of PHI nodes, where
5164 we can't yet emit instructions directly in the final
5165 insn stream. Queue the insns on the entry edge, they will
5166 be committed after everything else is expanded. */
5167 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5174 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5176 if (GET_CODE (orig) == SYMBOL_REF
5177 || GET_CODE (orig) == LABEL_REF)
5179 rtx insn;
5181 if (reg == 0)
5183 gcc_assert (can_create_pseudo_p ());
5184 reg = gen_reg_rtx (Pmode);
5187 /* VxWorks does not impose a fixed gap between segments; the run-time
5188 gap can be different from the object-file gap. We therefore can't
5189 use GOTOFF unless we are absolutely sure that the symbol is in the
5190 same segment as the GOT. Unfortunately, the flexibility of linker
5191 scripts means that we can't be sure of that in general, so assume
5192 that GOTOFF is never valid on VxWorks. */
5193 if ((GET_CODE (orig) == LABEL_REF
5194 || (GET_CODE (orig) == SYMBOL_REF &&
5195 SYMBOL_REF_LOCAL_P (orig)))
5196 && NEED_GOT_RELOC
5197 && !TARGET_VXWORKS_RTP)
5198 insn = arm_pic_static_addr (orig, reg);
5199 else
5201 rtx pat;
5202 rtx mem;
5204 /* If this function doesn't have a pic register, create one now. */
5205 require_pic_register ();
5207 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5209 /* Make the MEM as close to a constant as possible. */
5210 mem = SET_SRC (pat);
5211 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5212 MEM_READONLY_P (mem) = 1;
5213 MEM_NOTRAP_P (mem) = 1;
5215 insn = emit_insn (pat);
5218 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5219 by loop. */
5220 set_unique_reg_note (insn, REG_EQUAL, orig);
5222 return reg;
5224 else if (GET_CODE (orig) == CONST)
5226 rtx base, offset;
5228 if (GET_CODE (XEXP (orig, 0)) == PLUS
5229 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5230 return orig;
5232 /* Handle the case where we have: const (UNSPEC_TLS). */
5233 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5234 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5235 return orig;
5237 /* Handle the case where we have:
5238 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5239 CONST_INT. */
5240 if (GET_CODE (XEXP (orig, 0)) == PLUS
5241 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5242 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5244 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5245 return orig;
5248 if (reg == 0)
5250 gcc_assert (can_create_pseudo_p ());
5251 reg = gen_reg_rtx (Pmode);
5254 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5256 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5257 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5258 base == reg ? 0 : reg);
5260 if (GET_CODE (offset) == CONST_INT)
5262 /* The base register doesn't really matter, we only want to
5263 test the index for the appropriate mode. */
5264 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5266 gcc_assert (can_create_pseudo_p ());
5267 offset = force_reg (Pmode, offset);
5270 if (GET_CODE (offset) == CONST_INT)
5271 return plus_constant (base, INTVAL (offset));
5274 if (GET_MODE_SIZE (mode) > 4
5275 && (GET_MODE_CLASS (mode) == MODE_INT
5276 || TARGET_SOFT_FLOAT))
5278 emit_insn (gen_addsi3 (reg, base, offset));
5279 return reg;
5282 return gen_rtx_PLUS (Pmode, base, offset);
5285 return orig;
5289 /* Find a spare register to use during the prolog of a function. */
5291 static int
5292 thumb_find_work_register (unsigned long pushed_regs_mask)
5294 int reg;
5296 /* Check the argument registers first as these are call-used. The
5297 register allocation order means that sometimes r3 might be used
5298 but earlier argument registers might not, so check them all. */
5299 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5300 if (!df_regs_ever_live_p (reg))
5301 return reg;
5303 /* Before going on to check the call-saved registers we can try a couple
5304 more ways of deducing that r3 is available. The first is when we are
5305 pushing anonymous arguments onto the stack and we have less than 4
5306 registers worth of fixed arguments(*). In this case r3 will be part of
5307 the variable argument list and so we can be sure that it will be
5308 pushed right at the start of the function. Hence it will be available
5309 for the rest of the prologue.
5310 (*): ie crtl->args.pretend_args_size is greater than 0. */
5311 if (cfun->machine->uses_anonymous_args
5312 && crtl->args.pretend_args_size > 0)
5313 return LAST_ARG_REGNUM;
5315 /* The other case is when we have fixed arguments but less than 4 registers
5316 worth. In this case r3 might be used in the body of the function, but
5317 it is not being used to convey an argument into the function. In theory
5318 we could just check crtl->args.size to see how many bytes are
5319 being passed in argument registers, but it seems that it is unreliable.
5320 Sometimes it will have the value 0 when in fact arguments are being
5321 passed. (See testcase execute/20021111-1.c for an example). So we also
5322 check the args_info.nregs field as well. The problem with this field is
5323 that it makes no allowances for arguments that are passed to the
5324 function but which are not used. Hence we could miss an opportunity
5325 when a function has an unused argument in r3. But it is better to be
5326 safe than to be sorry. */
5327 if (! cfun->machine->uses_anonymous_args
5328 && crtl->args.size >= 0
5329 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5330 && crtl->args.info.nregs < 4)
5331 return LAST_ARG_REGNUM;
5333 /* Otherwise look for a call-saved register that is going to be pushed. */
5334 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5335 if (pushed_regs_mask & (1 << reg))
5336 return reg;
5338 if (TARGET_THUMB2)
5340 /* Thumb-2 can use high regs. */
5341 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5342 if (pushed_regs_mask & (1 << reg))
5343 return reg;
5345 /* Something went wrong - thumb_compute_save_reg_mask()
5346 should have arranged for a suitable register to be pushed. */
5347 gcc_unreachable ();
5350 static GTY(()) int pic_labelno;
5352 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5353 low register. */
5355 void
5356 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5358 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5360 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5361 return;
5363 gcc_assert (flag_pic);
5365 pic_reg = cfun->machine->pic_reg;
5366 if (TARGET_VXWORKS_RTP)
5368 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5369 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5370 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5372 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5374 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5375 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5377 else
5379 /* We use an UNSPEC rather than a LABEL_REF because this label
5380 never appears in the code stream. */
5382 labelno = GEN_INT (pic_labelno++);
5383 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5384 l1 = gen_rtx_CONST (VOIDmode, l1);
5386 /* On the ARM the PC register contains 'dot + 8' at the time of the
5387 addition, on the Thumb it is 'dot + 4'. */
5388 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5389 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5390 UNSPEC_GOTSYM_OFF);
5391 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5393 if (TARGET_32BIT)
5395 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5396 if (TARGET_ARM)
5397 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5398 else
5399 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5401 else /* TARGET_THUMB1 */
5403 if (arm_pic_register != INVALID_REGNUM
5404 && REGNO (pic_reg) > LAST_LO_REGNUM)
5406 /* We will have pushed the pic register, so we should always be
5407 able to find a work register. */
5408 pic_tmp = gen_rtx_REG (SImode,
5409 thumb_find_work_register (saved_regs));
5410 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5411 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5413 else
5414 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5415 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5419 /* Need to emit this whether or not we obey regdecls,
5420 since setjmp/longjmp can cause life info to screw up. */
5421 emit_use (pic_reg);
5424 /* Generate code to load the address of a static var when flag_pic is set. */
5425 static rtx
5426 arm_pic_static_addr (rtx orig, rtx reg)
5428 rtx l1, labelno, offset_rtx, insn;
5430 gcc_assert (flag_pic);
5432 /* We use an UNSPEC rather than a LABEL_REF because this label
5433 never appears in the code stream. */
5434 labelno = GEN_INT (pic_labelno++);
5435 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5436 l1 = gen_rtx_CONST (VOIDmode, l1);
5438 /* On the ARM the PC register contains 'dot + 8' at the time of the
5439 addition, on the Thumb it is 'dot + 4'. */
5440 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5441 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5442 UNSPEC_SYMBOL_OFFSET);
5443 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5445 if (TARGET_32BIT)
5447 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5448 if (TARGET_ARM)
5449 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5450 else
5451 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5453 else /* TARGET_THUMB1 */
5455 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5456 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5459 return insn;
5462 /* Return nonzero if X is valid as an ARM state addressing register. */
5463 static int
5464 arm_address_register_rtx_p (rtx x, int strict_p)
5466 int regno;
5468 if (GET_CODE (x) != REG)
5469 return 0;
5471 regno = REGNO (x);
5473 if (strict_p)
5474 return ARM_REGNO_OK_FOR_BASE_P (regno);
5476 return (regno <= LAST_ARM_REGNUM
5477 || regno >= FIRST_PSEUDO_REGISTER
5478 || regno == FRAME_POINTER_REGNUM
5479 || regno == ARG_POINTER_REGNUM);
5482 /* Return TRUE if this rtx is the difference of a symbol and a label,
5483 and will reduce to a PC-relative relocation in the object file.
5484 Expressions like this can be left alone when generating PIC, rather
5485 than forced through the GOT. */
5486 static int
5487 pcrel_constant_p (rtx x)
5489 if (GET_CODE (x) == MINUS)
5490 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5492 return FALSE;
5495 /* Return true if X will surely end up in an index register after next
5496 splitting pass. */
5497 static bool
5498 will_be_in_index_register (const_rtx x)
5500 /* arm.md: calculate_pic_address will split this into a register. */
5501 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5504 /* Return nonzero if X is a valid ARM state address operand. */
5506 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5507 int strict_p)
5509 bool use_ldrd;
5510 enum rtx_code code = GET_CODE (x);
5512 if (arm_address_register_rtx_p (x, strict_p))
5513 return 1;
5515 use_ldrd = (TARGET_LDRD
5516 && (mode == DImode
5517 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5519 if (code == POST_INC || code == PRE_DEC
5520 || ((code == PRE_INC || code == POST_DEC)
5521 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5522 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5524 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5525 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5526 && GET_CODE (XEXP (x, 1)) == PLUS
5527 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5529 rtx addend = XEXP (XEXP (x, 1), 1);
5531 /* Don't allow ldrd post increment by register because it's hard
5532 to fixup invalid register choices. */
5533 if (use_ldrd
5534 && GET_CODE (x) == POST_MODIFY
5535 && GET_CODE (addend) == REG)
5536 return 0;
5538 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5539 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5542 /* After reload constants split into minipools will have addresses
5543 from a LABEL_REF. */
5544 else if (reload_completed
5545 && (code == LABEL_REF
5546 || (code == CONST
5547 && GET_CODE (XEXP (x, 0)) == PLUS
5548 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5549 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5550 return 1;
5552 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5553 return 0;
5555 else if (code == PLUS)
5557 rtx xop0 = XEXP (x, 0);
5558 rtx xop1 = XEXP (x, 1);
5560 return ((arm_address_register_rtx_p (xop0, strict_p)
5561 && ((GET_CODE(xop1) == CONST_INT
5562 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5563 || (!strict_p && will_be_in_index_register (xop1))))
5564 || (arm_address_register_rtx_p (xop1, strict_p)
5565 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5568 #if 0
5569 /* Reload currently can't handle MINUS, so disable this for now */
5570 else if (GET_CODE (x) == MINUS)
5572 rtx xop0 = XEXP (x, 0);
5573 rtx xop1 = XEXP (x, 1);
5575 return (arm_address_register_rtx_p (xop0, strict_p)
5576 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5578 #endif
5580 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5581 && code == SYMBOL_REF
5582 && CONSTANT_POOL_ADDRESS_P (x)
5583 && ! (flag_pic
5584 && symbol_mentioned_p (get_pool_constant (x))
5585 && ! pcrel_constant_p (get_pool_constant (x))))
5586 return 1;
5588 return 0;
5591 /* Return nonzero if X is a valid Thumb-2 address operand. */
5592 static int
5593 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5595 bool use_ldrd;
5596 enum rtx_code code = GET_CODE (x);
5598 if (arm_address_register_rtx_p (x, strict_p))
5599 return 1;
5601 use_ldrd = (TARGET_LDRD
5602 && (mode == DImode
5603 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5605 if (code == POST_INC || code == PRE_DEC
5606 || ((code == PRE_INC || code == POST_DEC)
5607 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5608 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5610 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5611 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5612 && GET_CODE (XEXP (x, 1)) == PLUS
5613 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5615 /* Thumb-2 only has autoincrement by constant. */
5616 rtx addend = XEXP (XEXP (x, 1), 1);
5617 HOST_WIDE_INT offset;
5619 if (GET_CODE (addend) != CONST_INT)
5620 return 0;
5622 offset = INTVAL(addend);
5623 if (GET_MODE_SIZE (mode) <= 4)
5624 return (offset > -256 && offset < 256);
5626 return (use_ldrd && offset > -1024 && offset < 1024
5627 && (offset & 3) == 0);
5630 /* After reload constants split into minipools will have addresses
5631 from a LABEL_REF. */
5632 else if (reload_completed
5633 && (code == LABEL_REF
5634 || (code == CONST
5635 && GET_CODE (XEXP (x, 0)) == PLUS
5636 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5637 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5638 return 1;
5640 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5641 return 0;
5643 else if (code == PLUS)
5645 rtx xop0 = XEXP (x, 0);
5646 rtx xop1 = XEXP (x, 1);
5648 return ((arm_address_register_rtx_p (xop0, strict_p)
5649 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5650 || (!strict_p && will_be_in_index_register (xop1))))
5651 || (arm_address_register_rtx_p (xop1, strict_p)
5652 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5655 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5656 && code == SYMBOL_REF
5657 && CONSTANT_POOL_ADDRESS_P (x)
5658 && ! (flag_pic
5659 && symbol_mentioned_p (get_pool_constant (x))
5660 && ! pcrel_constant_p (get_pool_constant (x))))
5661 return 1;
5663 return 0;
5666 /* Return nonzero if INDEX is valid for an address index operand in
5667 ARM state. */
5668 static int
5669 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5670 int strict_p)
5672 HOST_WIDE_INT range;
5673 enum rtx_code code = GET_CODE (index);
5675 /* Standard coprocessor addressing modes. */
5676 if (TARGET_HARD_FLOAT
5677 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5678 && (mode == SFmode || mode == DFmode
5679 || (TARGET_MAVERICK && mode == DImode)))
5680 return (code == CONST_INT && INTVAL (index) < 1024
5681 && INTVAL (index) > -1024
5682 && (INTVAL (index) & 3) == 0);
5684 /* For quad modes, we restrict the constant offset to be slightly less
5685 than what the instruction format permits. We do this because for
5686 quad mode moves, we will actually decompose them into two separate
5687 double-mode reads or writes. INDEX must therefore be a valid
5688 (double-mode) offset and so should INDEX+8. */
5689 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5690 return (code == CONST_INT
5691 && INTVAL (index) < 1016
5692 && INTVAL (index) > -1024
5693 && (INTVAL (index) & 3) == 0);
5695 /* We have no such constraint on double mode offsets, so we permit the
5696 full range of the instruction format. */
5697 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5698 return (code == CONST_INT
5699 && INTVAL (index) < 1024
5700 && INTVAL (index) > -1024
5701 && (INTVAL (index) & 3) == 0);
5703 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5704 return (code == CONST_INT
5705 && INTVAL (index) < 1024
5706 && INTVAL (index) > -1024
5707 && (INTVAL (index) & 3) == 0);
5709 if (arm_address_register_rtx_p (index, strict_p)
5710 && (GET_MODE_SIZE (mode) <= 4))
5711 return 1;
5713 if (mode == DImode || mode == DFmode)
5715 if (code == CONST_INT)
5717 HOST_WIDE_INT val = INTVAL (index);
5719 if (TARGET_LDRD)
5720 return val > -256 && val < 256;
5721 else
5722 return val > -4096 && val < 4092;
5725 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5728 if (GET_MODE_SIZE (mode) <= 4
5729 && ! (arm_arch4
5730 && (mode == HImode
5731 || mode == HFmode
5732 || (mode == QImode && outer == SIGN_EXTEND))))
5734 if (code == MULT)
5736 rtx xiop0 = XEXP (index, 0);
5737 rtx xiop1 = XEXP (index, 1);
5739 return ((arm_address_register_rtx_p (xiop0, strict_p)
5740 && power_of_two_operand (xiop1, SImode))
5741 || (arm_address_register_rtx_p (xiop1, strict_p)
5742 && power_of_two_operand (xiop0, SImode)));
5744 else if (code == LSHIFTRT || code == ASHIFTRT
5745 || code == ASHIFT || code == ROTATERT)
5747 rtx op = XEXP (index, 1);
5749 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5750 && GET_CODE (op) == CONST_INT
5751 && INTVAL (op) > 0
5752 && INTVAL (op) <= 31);
5756 /* For ARM v4 we may be doing a sign-extend operation during the
5757 load. */
5758 if (arm_arch4)
5760 if (mode == HImode
5761 || mode == HFmode
5762 || (outer == SIGN_EXTEND && mode == QImode))
5763 range = 256;
5764 else
5765 range = 4096;
5767 else
5768 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5770 return (code == CONST_INT
5771 && INTVAL (index) < range
5772 && INTVAL (index) > -range);
5775 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5776 index operand. i.e. 1, 2, 4 or 8. */
5777 static bool
5778 thumb2_index_mul_operand (rtx op)
5780 HOST_WIDE_INT val;
5782 if (GET_CODE(op) != CONST_INT)
5783 return false;
5785 val = INTVAL(op);
5786 return (val == 1 || val == 2 || val == 4 || val == 8);
5789 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5790 static int
5791 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5793 enum rtx_code code = GET_CODE (index);
5795 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5796 /* Standard coprocessor addressing modes. */
5797 if (TARGET_HARD_FLOAT
5798 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5799 && (mode == SFmode || mode == DFmode
5800 || (TARGET_MAVERICK && mode == DImode)))
5801 return (code == CONST_INT && INTVAL (index) < 1024
5802 /* Thumb-2 allows only > -256 index range for it's core register
5803 load/stores. Since we allow SF/DF in core registers, we have
5804 to use the intersection between -256~4096 (core) and -1024~1024
5805 (coprocessor). */
5806 && INTVAL (index) > -256
5807 && (INTVAL (index) & 3) == 0);
5809 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5811 /* For DImode assume values will usually live in core regs
5812 and only allow LDRD addressing modes. */
5813 if (!TARGET_LDRD || mode != DImode)
5814 return (code == CONST_INT
5815 && INTVAL (index) < 1024
5816 && INTVAL (index) > -1024
5817 && (INTVAL (index) & 3) == 0);
5820 /* For quad modes, we restrict the constant offset to be slightly less
5821 than what the instruction format permits. We do this because for
5822 quad mode moves, we will actually decompose them into two separate
5823 double-mode reads or writes. INDEX must therefore be a valid
5824 (double-mode) offset and so should INDEX+8. */
5825 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5826 return (code == CONST_INT
5827 && INTVAL (index) < 1016
5828 && INTVAL (index) > -1024
5829 && (INTVAL (index) & 3) == 0);
5831 /* We have no such constraint on double mode offsets, so we permit the
5832 full range of the instruction format. */
5833 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5834 return (code == CONST_INT
5835 && INTVAL (index) < 1024
5836 && INTVAL (index) > -1024
5837 && (INTVAL (index) & 3) == 0);
5839 if (arm_address_register_rtx_p (index, strict_p)
5840 && (GET_MODE_SIZE (mode) <= 4))
5841 return 1;
5843 if (mode == DImode || mode == DFmode)
5845 if (code == CONST_INT)
5847 HOST_WIDE_INT val = INTVAL (index);
5848 /* ??? Can we assume ldrd for thumb2? */
5849 /* Thumb-2 ldrd only has reg+const addressing modes. */
5850 /* ldrd supports offsets of +-1020.
5851 However the ldr fallback does not. */
5852 return val > -256 && val < 256 && (val & 3) == 0;
5854 else
5855 return 0;
5858 if (code == MULT)
5860 rtx xiop0 = XEXP (index, 0);
5861 rtx xiop1 = XEXP (index, 1);
5863 return ((arm_address_register_rtx_p (xiop0, strict_p)
5864 && thumb2_index_mul_operand (xiop1))
5865 || (arm_address_register_rtx_p (xiop1, strict_p)
5866 && thumb2_index_mul_operand (xiop0)));
5868 else if (code == ASHIFT)
5870 rtx op = XEXP (index, 1);
5872 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5873 && GET_CODE (op) == CONST_INT
5874 && INTVAL (op) > 0
5875 && INTVAL (op) <= 3);
5878 return (code == CONST_INT
5879 && INTVAL (index) < 4096
5880 && INTVAL (index) > -256);
5883 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5884 static int
5885 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5887 int regno;
5889 if (GET_CODE (x) != REG)
5890 return 0;
5892 regno = REGNO (x);
5894 if (strict_p)
5895 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5897 return (regno <= LAST_LO_REGNUM
5898 || regno > LAST_VIRTUAL_REGISTER
5899 || regno == FRAME_POINTER_REGNUM
5900 || (GET_MODE_SIZE (mode) >= 4
5901 && (regno == STACK_POINTER_REGNUM
5902 || regno >= FIRST_PSEUDO_REGISTER
5903 || x == hard_frame_pointer_rtx
5904 || x == arg_pointer_rtx)));
5907 /* Return nonzero if x is a legitimate index register. This is the case
5908 for any base register that can access a QImode object. */
5909 inline static int
5910 thumb1_index_register_rtx_p (rtx x, int strict_p)
5912 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5915 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5917 The AP may be eliminated to either the SP or the FP, so we use the
5918 least common denominator, e.g. SImode, and offsets from 0 to 64.
5920 ??? Verify whether the above is the right approach.
5922 ??? Also, the FP may be eliminated to the SP, so perhaps that
5923 needs special handling also.
5925 ??? Look at how the mips16 port solves this problem. It probably uses
5926 better ways to solve some of these problems.
5928 Although it is not incorrect, we don't accept QImode and HImode
5929 addresses based on the frame pointer or arg pointer until the
5930 reload pass starts. This is so that eliminating such addresses
5931 into stack based ones won't produce impossible code. */
5932 static int
5933 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5935 /* ??? Not clear if this is right. Experiment. */
5936 if (GET_MODE_SIZE (mode) < 4
5937 && !(reload_in_progress || reload_completed)
5938 && (reg_mentioned_p (frame_pointer_rtx, x)
5939 || reg_mentioned_p (arg_pointer_rtx, x)
5940 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5941 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5942 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5943 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5944 return 0;
5946 /* Accept any base register. SP only in SImode or larger. */
5947 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5948 return 1;
5950 /* This is PC relative data before arm_reorg runs. */
5951 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5952 && GET_CODE (x) == SYMBOL_REF
5953 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5954 return 1;
5956 /* This is PC relative data after arm_reorg runs. */
5957 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5958 && reload_completed
5959 && (GET_CODE (x) == LABEL_REF
5960 || (GET_CODE (x) == CONST
5961 && GET_CODE (XEXP (x, 0)) == PLUS
5962 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5963 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5964 return 1;
5966 /* Post-inc indexing only supported for SImode and larger. */
5967 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5968 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5969 return 1;
5971 else if (GET_CODE (x) == PLUS)
5973 /* REG+REG address can be any two index registers. */
5974 /* We disallow FRAME+REG addressing since we know that FRAME
5975 will be replaced with STACK, and SP relative addressing only
5976 permits SP+OFFSET. */
5977 if (GET_MODE_SIZE (mode) <= 4
5978 && XEXP (x, 0) != frame_pointer_rtx
5979 && XEXP (x, 1) != frame_pointer_rtx
5980 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5981 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5982 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5983 return 1;
5985 /* REG+const has 5-7 bit offset for non-SP registers. */
5986 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5987 || XEXP (x, 0) == arg_pointer_rtx)
5988 && GET_CODE (XEXP (x, 1)) == CONST_INT
5989 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5990 return 1;
5992 /* REG+const has 10-bit offset for SP, but only SImode and
5993 larger is supported. */
5994 /* ??? Should probably check for DI/DFmode overflow here
5995 just like GO_IF_LEGITIMATE_OFFSET does. */
5996 else if (GET_CODE (XEXP (x, 0)) == REG
5997 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5998 && GET_MODE_SIZE (mode) >= 4
5999 && GET_CODE (XEXP (x, 1)) == CONST_INT
6000 && INTVAL (XEXP (x, 1)) >= 0
6001 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6002 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6003 return 1;
6005 else if (GET_CODE (XEXP (x, 0)) == REG
6006 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6007 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6008 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6009 && REGNO (XEXP (x, 0))
6010 <= LAST_VIRTUAL_POINTER_REGISTER))
6011 && GET_MODE_SIZE (mode) >= 4
6012 && GET_CODE (XEXP (x, 1)) == CONST_INT
6013 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6014 return 1;
6017 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6018 && GET_MODE_SIZE (mode) == 4
6019 && GET_CODE (x) == SYMBOL_REF
6020 && CONSTANT_POOL_ADDRESS_P (x)
6021 && ! (flag_pic
6022 && symbol_mentioned_p (get_pool_constant (x))
6023 && ! pcrel_constant_p (get_pool_constant (x))))
6024 return 1;
6026 return 0;
6029 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6030 instruction of mode MODE. */
6032 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6034 switch (GET_MODE_SIZE (mode))
6036 case 1:
6037 return val >= 0 && val < 32;
6039 case 2:
6040 return val >= 0 && val < 64 && (val & 1) == 0;
6042 default:
6043 return (val >= 0
6044 && (val + GET_MODE_SIZE (mode)) <= 128
6045 && (val & 3) == 0);
6049 bool
6050 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6052 if (TARGET_ARM)
6053 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6054 else if (TARGET_THUMB2)
6055 return thumb2_legitimate_address_p (mode, x, strict_p);
6056 else /* if (TARGET_THUMB1) */
6057 return thumb1_legitimate_address_p (mode, x, strict_p);
6060 /* Build the SYMBOL_REF for __tls_get_addr. */
6062 static GTY(()) rtx tls_get_addr_libfunc;
6064 static rtx
6065 get_tls_get_addr (void)
6067 if (!tls_get_addr_libfunc)
6068 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6069 return tls_get_addr_libfunc;
6072 static rtx
6073 arm_load_tp (rtx target)
6075 if (!target)
6076 target = gen_reg_rtx (SImode);
6078 if (TARGET_HARD_TP)
6080 /* Can return in any reg. */
6081 emit_insn (gen_load_tp_hard (target));
6083 else
6085 /* Always returned in r0. Immediately copy the result into a pseudo,
6086 otherwise other uses of r0 (e.g. setting up function arguments) may
6087 clobber the value. */
6089 rtx tmp;
6091 emit_insn (gen_load_tp_soft ());
6093 tmp = gen_rtx_REG (SImode, 0);
6094 emit_move_insn (target, tmp);
6096 return target;
6099 static rtx
6100 load_tls_operand (rtx x, rtx reg)
6102 rtx tmp;
6104 if (reg == NULL_RTX)
6105 reg = gen_reg_rtx (SImode);
6107 tmp = gen_rtx_CONST (SImode, x);
6109 emit_move_insn (reg, tmp);
6111 return reg;
6114 static rtx
6115 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6117 rtx insns, label, labelno, sum;
6119 start_sequence ();
6121 labelno = GEN_INT (pic_labelno++);
6122 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6123 label = gen_rtx_CONST (VOIDmode, label);
6125 sum = gen_rtx_UNSPEC (Pmode,
6126 gen_rtvec (4, x, GEN_INT (reloc), label,
6127 GEN_INT (TARGET_ARM ? 8 : 4)),
6128 UNSPEC_TLS);
6129 reg = load_tls_operand (sum, reg);
6131 if (TARGET_ARM)
6132 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6133 else if (TARGET_THUMB2)
6134 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6135 else /* TARGET_THUMB1 */
6136 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6138 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6139 Pmode, 1, reg, Pmode);
6141 insns = get_insns ();
6142 end_sequence ();
6144 return insns;
6148 legitimize_tls_address (rtx x, rtx reg)
6150 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6151 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6153 switch (model)
6155 case TLS_MODEL_GLOBAL_DYNAMIC:
6156 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6157 dest = gen_reg_rtx (Pmode);
6158 emit_libcall_block (insns, dest, ret, x);
6159 return dest;
6161 case TLS_MODEL_LOCAL_DYNAMIC:
6162 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6164 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6165 share the LDM result with other LD model accesses. */
6166 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6167 UNSPEC_TLS);
6168 dest = gen_reg_rtx (Pmode);
6169 emit_libcall_block (insns, dest, ret, eqv);
6171 /* Load the addend. */
6172 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6173 UNSPEC_TLS);
6174 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6175 return gen_rtx_PLUS (Pmode, dest, addend);
6177 case TLS_MODEL_INITIAL_EXEC:
6178 labelno = GEN_INT (pic_labelno++);
6179 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6180 label = gen_rtx_CONST (VOIDmode, label);
6181 sum = gen_rtx_UNSPEC (Pmode,
6182 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6183 GEN_INT (TARGET_ARM ? 8 : 4)),
6184 UNSPEC_TLS);
6185 reg = load_tls_operand (sum, reg);
6187 if (TARGET_ARM)
6188 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6189 else if (TARGET_THUMB2)
6190 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6191 else
6193 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6194 emit_move_insn (reg, gen_const_mem (SImode, reg));
6197 tp = arm_load_tp (NULL_RTX);
6199 return gen_rtx_PLUS (Pmode, tp, reg);
6201 case TLS_MODEL_LOCAL_EXEC:
6202 tp = arm_load_tp (NULL_RTX);
6204 reg = gen_rtx_UNSPEC (Pmode,
6205 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6206 UNSPEC_TLS);
6207 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6209 return gen_rtx_PLUS (Pmode, tp, reg);
6211 default:
6212 abort ();
6216 /* Try machine-dependent ways of modifying an illegitimate address
6217 to be legitimate. If we find one, return the new, valid address. */
6219 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6221 if (!TARGET_ARM)
6223 /* TODO: legitimize_address for Thumb2. */
6224 if (TARGET_THUMB2)
6225 return x;
6226 return thumb_legitimize_address (x, orig_x, mode);
6229 if (arm_tls_symbol_p (x))
6230 return legitimize_tls_address (x, NULL_RTX);
6232 if (GET_CODE (x) == PLUS)
6234 rtx xop0 = XEXP (x, 0);
6235 rtx xop1 = XEXP (x, 1);
6237 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6238 xop0 = force_reg (SImode, xop0);
6240 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6241 xop1 = force_reg (SImode, xop1);
6243 if (ARM_BASE_REGISTER_RTX_P (xop0)
6244 && GET_CODE (xop1) == CONST_INT)
6246 HOST_WIDE_INT n, low_n;
6247 rtx base_reg, val;
6248 n = INTVAL (xop1);
6250 /* VFP addressing modes actually allow greater offsets, but for
6251 now we just stick with the lowest common denominator. */
6252 if (mode == DImode
6253 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6255 low_n = n & 0x0f;
6256 n &= ~0x0f;
6257 if (low_n > 4)
6259 n += 16;
6260 low_n -= 16;
6263 else
6265 low_n = ((mode) == TImode ? 0
6266 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6267 n -= low_n;
6270 base_reg = gen_reg_rtx (SImode);
6271 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6272 emit_move_insn (base_reg, val);
6273 x = plus_constant (base_reg, low_n);
6275 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6276 x = gen_rtx_PLUS (SImode, xop0, xop1);
6279 /* XXX We don't allow MINUS any more -- see comment in
6280 arm_legitimate_address_outer_p (). */
6281 else if (GET_CODE (x) == MINUS)
6283 rtx xop0 = XEXP (x, 0);
6284 rtx xop1 = XEXP (x, 1);
6286 if (CONSTANT_P (xop0))
6287 xop0 = force_reg (SImode, xop0);
6289 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6290 xop1 = force_reg (SImode, xop1);
6292 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6293 x = gen_rtx_MINUS (SImode, xop0, xop1);
6296 /* Make sure to take full advantage of the pre-indexed addressing mode
6297 with absolute addresses which often allows for the base register to
6298 be factorized for multiple adjacent memory references, and it might
6299 even allows for the mini pool to be avoided entirely. */
6300 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6302 unsigned int bits;
6303 HOST_WIDE_INT mask, base, index;
6304 rtx base_reg;
6306 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6307 use a 8-bit index. So let's use a 12-bit index for SImode only and
6308 hope that arm_gen_constant will enable ldrb to use more bits. */
6309 bits = (mode == SImode) ? 12 : 8;
6310 mask = (1 << bits) - 1;
6311 base = INTVAL (x) & ~mask;
6312 index = INTVAL (x) & mask;
6313 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6315 /* It'll most probably be more efficient to generate the base
6316 with more bits set and use a negative index instead. */
6317 base |= mask;
6318 index -= mask;
6320 base_reg = force_reg (SImode, GEN_INT (base));
6321 x = plus_constant (base_reg, index);
6324 if (flag_pic)
6326 /* We need to find and carefully transform any SYMBOL and LABEL
6327 references; so go back to the original address expression. */
6328 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6330 if (new_x != orig_x)
6331 x = new_x;
6334 return x;
6338 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6339 to be legitimate. If we find one, return the new, valid address. */
6341 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6343 if (arm_tls_symbol_p (x))
6344 return legitimize_tls_address (x, NULL_RTX);
6346 if (GET_CODE (x) == PLUS
6347 && GET_CODE (XEXP (x, 1)) == CONST_INT
6348 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6349 || INTVAL (XEXP (x, 1)) < 0))
6351 rtx xop0 = XEXP (x, 0);
6352 rtx xop1 = XEXP (x, 1);
6353 HOST_WIDE_INT offset = INTVAL (xop1);
6355 /* Try and fold the offset into a biasing of the base register and
6356 then offsetting that. Don't do this when optimizing for space
6357 since it can cause too many CSEs. */
6358 if (optimize_size && offset >= 0
6359 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6361 HOST_WIDE_INT delta;
6363 if (offset >= 256)
6364 delta = offset - (256 - GET_MODE_SIZE (mode));
6365 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6366 delta = 31 * GET_MODE_SIZE (mode);
6367 else
6368 delta = offset & (~31 * GET_MODE_SIZE (mode));
6370 xop0 = force_operand (plus_constant (xop0, offset - delta),
6371 NULL_RTX);
6372 x = plus_constant (xop0, delta);
6374 else if (offset < 0 && offset > -256)
6375 /* Small negative offsets are best done with a subtract before the
6376 dereference, forcing these into a register normally takes two
6377 instructions. */
6378 x = force_operand (x, NULL_RTX);
6379 else
6381 /* For the remaining cases, force the constant into a register. */
6382 xop1 = force_reg (SImode, xop1);
6383 x = gen_rtx_PLUS (SImode, xop0, xop1);
6386 else if (GET_CODE (x) == PLUS
6387 && s_register_operand (XEXP (x, 1), SImode)
6388 && !s_register_operand (XEXP (x, 0), SImode))
6390 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6392 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6395 if (flag_pic)
6397 /* We need to find and carefully transform any SYMBOL and LABEL
6398 references; so go back to the original address expression. */
6399 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6401 if (new_x != orig_x)
6402 x = new_x;
6405 return x;
6408 bool
6409 arm_legitimize_reload_address (rtx *p,
6410 enum machine_mode mode,
6411 int opnum, int type,
6412 int ind_levels ATTRIBUTE_UNUSED)
6414 if (GET_CODE (*p) == PLUS
6415 && GET_CODE (XEXP (*p, 0)) == REG
6416 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6417 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6419 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6420 HOST_WIDE_INT low, high;
6422 if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT))
6423 low = ((val & 0xf) ^ 0x8) - 0x8;
6424 else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)
6425 /* Need to be careful, -256 is not a valid offset. */
6426 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6427 else if (mode == SImode
6428 || (mode == SFmode && TARGET_SOFT_FLOAT)
6429 || ((mode == HImode || mode == QImode) && ! arm_arch4))
6430 /* Need to be careful, -4096 is not a valid offset. */
6431 low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff);
6432 else if ((mode == HImode || mode == QImode) && arm_arch4)
6433 /* Need to be careful, -256 is not a valid offset. */
6434 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6435 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6436 && TARGET_HARD_FLOAT && TARGET_FPA)
6437 /* Need to be careful, -1024 is not a valid offset. */
6438 low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);
6439 else
6440 return false;
6442 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6443 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6444 - (unsigned HOST_WIDE_INT) 0x80000000);
6445 /* Check for overflow or zero */
6446 if (low == 0 || high == 0 || (high + low != val))
6447 return false;
6449 /* Reload the high part into a base reg; leave the low part
6450 in the mem. */
6451 *p = gen_rtx_PLUS (GET_MODE (*p),
6452 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6453 GEN_INT (high)),
6454 GEN_INT (low));
6455 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6456 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6457 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6458 return true;
6461 return false;
6465 thumb_legitimize_reload_address (rtx *x_p,
6466 enum machine_mode mode,
6467 int opnum, int type,
6468 int ind_levels ATTRIBUTE_UNUSED)
6470 rtx x = *x_p;
6472 if (GET_CODE (x) == PLUS
6473 && GET_MODE_SIZE (mode) < 4
6474 && REG_P (XEXP (x, 0))
6475 && XEXP (x, 0) == stack_pointer_rtx
6476 && GET_CODE (XEXP (x, 1)) == CONST_INT
6477 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6479 rtx orig_x = x;
6481 x = copy_rtx (x);
6482 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6483 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6484 return x;
6487 /* If both registers are hi-regs, then it's better to reload the
6488 entire expression rather than each register individually. That
6489 only requires one reload register rather than two. */
6490 if (GET_CODE (x) == PLUS
6491 && REG_P (XEXP (x, 0))
6492 && REG_P (XEXP (x, 1))
6493 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6494 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6496 rtx orig_x = x;
6498 x = copy_rtx (x);
6499 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6500 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6501 return x;
6504 return NULL;
6507 /* Test for various thread-local symbols. */
6509 /* Return TRUE if X is a thread-local symbol. */
6511 static bool
6512 arm_tls_symbol_p (rtx x)
6514 if (! TARGET_HAVE_TLS)
6515 return false;
6517 if (GET_CODE (x) != SYMBOL_REF)
6518 return false;
6520 return SYMBOL_REF_TLS_MODEL (x) != 0;
6523 /* Helper for arm_tls_referenced_p. */
6525 static int
6526 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6528 if (GET_CODE (*x) == SYMBOL_REF)
6529 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6531 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6532 TLS offsets, not real symbol references. */
6533 if (GET_CODE (*x) == UNSPEC
6534 && XINT (*x, 1) == UNSPEC_TLS)
6535 return -1;
6537 return 0;
6540 /* Return TRUE if X contains any TLS symbol references. */
6542 bool
6543 arm_tls_referenced_p (rtx x)
6545 if (! TARGET_HAVE_TLS)
6546 return false;
6548 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6551 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6553 bool
6554 arm_cannot_force_const_mem (rtx x)
6556 rtx base, offset;
6558 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6560 split_const (x, &base, &offset);
6561 if (GET_CODE (base) == SYMBOL_REF
6562 && !offset_within_block_p (base, INTVAL (offset)))
6563 return true;
6565 return arm_tls_referenced_p (x);
6568 #define REG_OR_SUBREG_REG(X) \
6569 (GET_CODE (X) == REG \
6570 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6572 #define REG_OR_SUBREG_RTX(X) \
6573 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6575 static inline int
6576 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6578 enum machine_mode mode = GET_MODE (x);
6579 int total;
6581 switch (code)
6583 case ASHIFT:
6584 case ASHIFTRT:
6585 case LSHIFTRT:
6586 case ROTATERT:
6587 case PLUS:
6588 case MINUS:
6589 case COMPARE:
6590 case NEG:
6591 case NOT:
6592 return COSTS_N_INSNS (1);
6594 case MULT:
6595 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6597 int cycles = 0;
6598 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6600 while (i)
6602 i >>= 2;
6603 cycles++;
6605 return COSTS_N_INSNS (2) + cycles;
6607 return COSTS_N_INSNS (1) + 16;
6609 case SET:
6610 return (COSTS_N_INSNS (1)
6611 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6612 + GET_CODE (SET_DEST (x)) == MEM));
6614 case CONST_INT:
6615 if (outer == SET)
6617 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6618 return 0;
6619 if (thumb_shiftable_const (INTVAL (x)))
6620 return COSTS_N_INSNS (2);
6621 return COSTS_N_INSNS (3);
6623 else if ((outer == PLUS || outer == COMPARE)
6624 && INTVAL (x) < 256 && INTVAL (x) > -256)
6625 return 0;
6626 else if ((outer == IOR || outer == XOR || outer == AND)
6627 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6628 return COSTS_N_INSNS (1);
6629 else if (outer == AND)
6631 int i;
6632 /* This duplicates the tests in the andsi3 expander. */
6633 for (i = 9; i <= 31; i++)
6634 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6635 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6636 return COSTS_N_INSNS (2);
6638 else if (outer == ASHIFT || outer == ASHIFTRT
6639 || outer == LSHIFTRT)
6640 return 0;
6641 return COSTS_N_INSNS (2);
6643 case CONST:
6644 case CONST_DOUBLE:
6645 case LABEL_REF:
6646 case SYMBOL_REF:
6647 return COSTS_N_INSNS (3);
6649 case UDIV:
6650 case UMOD:
6651 case DIV:
6652 case MOD:
6653 return 100;
6655 case TRUNCATE:
6656 return 99;
6658 case AND:
6659 case XOR:
6660 case IOR:
6661 /* XXX guess. */
6662 return 8;
6664 case MEM:
6665 /* XXX another guess. */
6666 /* Memory costs quite a lot for the first word, but subsequent words
6667 load at the equivalent of a single insn each. */
6668 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6669 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6670 ? 4 : 0));
6672 case IF_THEN_ELSE:
6673 /* XXX a guess. */
6674 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6675 return 14;
6676 return 2;
6678 case SIGN_EXTEND:
6679 case ZERO_EXTEND:
6680 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6681 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6683 if (mode == SImode)
6684 return total;
6686 if (arm_arch6)
6687 return total + COSTS_N_INSNS (1);
6689 /* Assume a two-shift sequence. Increase the cost slightly so
6690 we prefer actual shifts over an extend operation. */
6691 return total + 1 + COSTS_N_INSNS (2);
6693 default:
6694 return 99;
6698 static inline bool
6699 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6701 enum machine_mode mode = GET_MODE (x);
6702 enum rtx_code subcode;
6703 rtx operand;
6704 enum rtx_code code = GET_CODE (x);
6705 *total = 0;
6707 switch (code)
6709 case MEM:
6710 /* Memory costs quite a lot for the first word, but subsequent words
6711 load at the equivalent of a single insn each. */
6712 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6713 return true;
6715 case DIV:
6716 case MOD:
6717 case UDIV:
6718 case UMOD:
6719 if (TARGET_HARD_FLOAT && mode == SFmode)
6720 *total = COSTS_N_INSNS (2);
6721 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6722 *total = COSTS_N_INSNS (4);
6723 else
6724 *total = COSTS_N_INSNS (20);
6725 return false;
6727 case ROTATE:
6728 if (GET_CODE (XEXP (x, 1)) == REG)
6729 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6730 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6731 *total = rtx_cost (XEXP (x, 1), code, speed);
6733 /* Fall through */
6734 case ROTATERT:
6735 if (mode != SImode)
6737 *total += COSTS_N_INSNS (4);
6738 return true;
6741 /* Fall through */
6742 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6743 *total += rtx_cost (XEXP (x, 0), code, speed);
6744 if (mode == DImode)
6746 *total += COSTS_N_INSNS (3);
6747 return true;
6750 *total += COSTS_N_INSNS (1);
6751 /* Increase the cost of complex shifts because they aren't any faster,
6752 and reduce dual issue opportunities. */
6753 if (arm_tune_cortex_a9
6754 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6755 ++*total;
6757 return true;
6759 case MINUS:
6760 if (mode == DImode)
6762 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6763 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6764 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6766 *total += rtx_cost (XEXP (x, 1), code, speed);
6767 return true;
6770 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6771 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6773 *total += rtx_cost (XEXP (x, 0), code, speed);
6774 return true;
6777 return false;
6780 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6782 if (TARGET_HARD_FLOAT
6783 && (mode == SFmode
6784 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6786 *total = COSTS_N_INSNS (1);
6787 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6788 && arm_const_double_rtx (XEXP (x, 0)))
6790 *total += rtx_cost (XEXP (x, 1), code, speed);
6791 return true;
6794 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6795 && arm_const_double_rtx (XEXP (x, 1)))
6797 *total += rtx_cost (XEXP (x, 0), code, speed);
6798 return true;
6801 return false;
6803 *total = COSTS_N_INSNS (20);
6804 return false;
6807 *total = COSTS_N_INSNS (1);
6808 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6809 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6811 *total += rtx_cost (XEXP (x, 1), code, speed);
6812 return true;
6815 subcode = GET_CODE (XEXP (x, 1));
6816 if (subcode == ASHIFT || subcode == ASHIFTRT
6817 || subcode == LSHIFTRT
6818 || subcode == ROTATE || subcode == ROTATERT)
6820 *total += rtx_cost (XEXP (x, 0), code, speed);
6821 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6822 return true;
6825 /* A shift as a part of RSB costs no more than RSB itself. */
6826 if (GET_CODE (XEXP (x, 0)) == MULT
6827 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6829 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6830 *total += rtx_cost (XEXP (x, 1), code, speed);
6831 return true;
6834 if (subcode == MULT
6835 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6837 *total += rtx_cost (XEXP (x, 0), code, speed);
6838 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6839 return true;
6842 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6843 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6845 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6846 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6847 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6848 *total += COSTS_N_INSNS (1);
6850 return true;
6853 /* Fall through */
6855 case PLUS:
6856 if (code == PLUS && arm_arch6 && mode == SImode
6857 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6858 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6860 *total = COSTS_N_INSNS (1);
6861 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6862 speed);
6863 *total += rtx_cost (XEXP (x, 1), code, speed);
6864 return true;
6867 /* MLA: All arguments must be registers. We filter out
6868 multiplication by a power of two, so that we fall down into
6869 the code below. */
6870 if (GET_CODE (XEXP (x, 0)) == MULT
6871 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6873 /* The cost comes from the cost of the multiply. */
6874 return false;
6877 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6879 if (TARGET_HARD_FLOAT
6880 && (mode == SFmode
6881 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6883 *total = COSTS_N_INSNS (1);
6884 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6885 && arm_const_double_rtx (XEXP (x, 1)))
6887 *total += rtx_cost (XEXP (x, 0), code, speed);
6888 return true;
6891 return false;
6894 *total = COSTS_N_INSNS (20);
6895 return false;
6898 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6899 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6901 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6902 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6903 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6904 *total += COSTS_N_INSNS (1);
6905 return true;
6908 /* Fall through */
6910 case AND: case XOR: case IOR:
6912 /* Normally the frame registers will be spilt into reg+const during
6913 reload, so it is a bad idea to combine them with other instructions,
6914 since then they might not be moved outside of loops. As a compromise
6915 we allow integration with ops that have a constant as their second
6916 operand. */
6917 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6918 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6919 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6920 *total = COSTS_N_INSNS (1);
6922 if (mode == DImode)
6924 *total += COSTS_N_INSNS (2);
6925 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6926 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6928 *total += rtx_cost (XEXP (x, 0), code, speed);
6929 return true;
6932 return false;
6935 *total += COSTS_N_INSNS (1);
6936 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6937 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6939 *total += rtx_cost (XEXP (x, 0), code, speed);
6940 return true;
6942 subcode = GET_CODE (XEXP (x, 0));
6943 if (subcode == ASHIFT || subcode == ASHIFTRT
6944 || subcode == LSHIFTRT
6945 || subcode == ROTATE || subcode == ROTATERT)
6947 *total += rtx_cost (XEXP (x, 1), code, speed);
6948 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6949 return true;
6952 if (subcode == MULT
6953 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6955 *total += rtx_cost (XEXP (x, 1), code, speed);
6956 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6957 return true;
6960 if (subcode == UMIN || subcode == UMAX
6961 || subcode == SMIN || subcode == SMAX)
6963 *total = COSTS_N_INSNS (3);
6964 return true;
6967 return false;
6969 case MULT:
6970 /* This should have been handled by the CPU specific routines. */
6971 gcc_unreachable ();
6973 case TRUNCATE:
6974 if (arm_arch3m && mode == SImode
6975 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6976 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6977 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6978 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6979 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6980 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6982 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6983 return true;
6985 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6986 return false;
6988 case NEG:
6989 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6991 if (TARGET_HARD_FLOAT
6992 && (mode == SFmode
6993 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6995 *total = COSTS_N_INSNS (1);
6996 return false;
6998 *total = COSTS_N_INSNS (2);
6999 return false;
7002 /* Fall through */
7003 case NOT:
7004 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7005 if (mode == SImode && code == NOT)
7007 subcode = GET_CODE (XEXP (x, 0));
7008 if (subcode == ASHIFT || subcode == ASHIFTRT
7009 || subcode == LSHIFTRT
7010 || subcode == ROTATE || subcode == ROTATERT
7011 || (subcode == MULT
7012 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7014 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7015 /* Register shifts cost an extra cycle. */
7016 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7017 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7018 subcode, speed);
7019 return true;
7023 return false;
7025 case IF_THEN_ELSE:
7026 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7028 *total = COSTS_N_INSNS (4);
7029 return true;
7032 operand = XEXP (x, 0);
7034 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7035 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7036 && GET_CODE (XEXP (operand, 0)) == REG
7037 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7038 *total += COSTS_N_INSNS (1);
7039 *total += (rtx_cost (XEXP (x, 1), code, speed)
7040 + rtx_cost (XEXP (x, 2), code, speed));
7041 return true;
7043 case NE:
7044 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7046 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7047 return true;
7049 goto scc_insn;
7051 case GE:
7052 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7053 && mode == SImode && XEXP (x, 1) == const0_rtx)
7055 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7056 return true;
7058 goto scc_insn;
7060 case LT:
7061 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7062 && mode == SImode && XEXP (x, 1) == const0_rtx)
7064 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7065 return true;
7067 goto scc_insn;
7069 case EQ:
7070 case GT:
7071 case LE:
7072 case GEU:
7073 case LTU:
7074 case GTU:
7075 case LEU:
7076 case UNORDERED:
7077 case ORDERED:
7078 case UNEQ:
7079 case UNGE:
7080 case UNLT:
7081 case UNGT:
7082 case UNLE:
7083 scc_insn:
7084 /* SCC insns. In the case where the comparison has already been
7085 performed, then they cost 2 instructions. Otherwise they need
7086 an additional comparison before them. */
7087 *total = COSTS_N_INSNS (2);
7088 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7090 return true;
7093 /* Fall through */
7094 case COMPARE:
7095 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7097 *total = 0;
7098 return true;
7101 *total += COSTS_N_INSNS (1);
7102 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7103 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7105 *total += rtx_cost (XEXP (x, 0), code, speed);
7106 return true;
7109 subcode = GET_CODE (XEXP (x, 0));
7110 if (subcode == ASHIFT || subcode == ASHIFTRT
7111 || subcode == LSHIFTRT
7112 || subcode == ROTATE || subcode == ROTATERT)
7114 *total += rtx_cost (XEXP (x, 1), code, speed);
7115 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7116 return true;
7119 if (subcode == MULT
7120 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7122 *total += rtx_cost (XEXP (x, 1), code, speed);
7123 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7124 return true;
7127 return false;
7129 case UMIN:
7130 case UMAX:
7131 case SMIN:
7132 case SMAX:
7133 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7134 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7135 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7136 *total += rtx_cost (XEXP (x, 1), code, speed);
7137 return true;
7139 case ABS:
7140 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7142 if (TARGET_HARD_FLOAT
7143 && (mode == SFmode
7144 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7146 *total = COSTS_N_INSNS (1);
7147 return false;
7149 *total = COSTS_N_INSNS (20);
7150 return false;
7152 *total = COSTS_N_INSNS (1);
7153 if (mode == DImode)
7154 *total += COSTS_N_INSNS (3);
7155 return false;
7157 case SIGN_EXTEND:
7158 case ZERO_EXTEND:
7159 *total = 0;
7160 if (GET_MODE_CLASS (mode) == MODE_INT)
7162 rtx op = XEXP (x, 0);
7163 enum machine_mode opmode = GET_MODE (op);
7165 if (mode == DImode)
7166 *total += COSTS_N_INSNS (1);
7168 if (opmode != SImode)
7170 if (MEM_P (op))
7172 /* If !arm_arch4, we use one of the extendhisi2_mem
7173 or movhi_bytes patterns for HImode. For a QImode
7174 sign extension, we first zero-extend from memory
7175 and then perform a shift sequence. */
7176 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7177 *total += COSTS_N_INSNS (2);
7179 else if (arm_arch6)
7180 *total += COSTS_N_INSNS (1);
7182 /* We don't have the necessary insn, so we need to perform some
7183 other operation. */
7184 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7185 /* An and with constant 255. */
7186 *total += COSTS_N_INSNS (1);
7187 else
7188 /* A shift sequence. Increase costs slightly to avoid
7189 combining two shifts into an extend operation. */
7190 *total += COSTS_N_INSNS (2) + 1;
7193 return false;
7196 switch (GET_MODE (XEXP (x, 0)))
7198 case V8QImode:
7199 case V4HImode:
7200 case V2SImode:
7201 case V4QImode:
7202 case V2HImode:
7203 *total = COSTS_N_INSNS (1);
7204 return false;
7206 default:
7207 gcc_unreachable ();
7209 gcc_unreachable ();
7211 case ZERO_EXTRACT:
7212 case SIGN_EXTRACT:
7213 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7214 return true;
7216 case CONST_INT:
7217 if (const_ok_for_arm (INTVAL (x))
7218 || const_ok_for_arm (~INTVAL (x)))
7219 *total = COSTS_N_INSNS (1);
7220 else
7221 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7222 INTVAL (x), NULL_RTX,
7223 NULL_RTX, 0, 0));
7224 return true;
7226 case CONST:
7227 case LABEL_REF:
7228 case SYMBOL_REF:
7229 *total = COSTS_N_INSNS (3);
7230 return true;
7232 case HIGH:
7233 *total = COSTS_N_INSNS (1);
7234 return true;
7236 case LO_SUM:
7237 *total = COSTS_N_INSNS (1);
7238 *total += rtx_cost (XEXP (x, 0), code, speed);
7239 return true;
7241 case CONST_DOUBLE:
7242 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7243 && (mode == SFmode || !TARGET_VFP_SINGLE))
7244 *total = COSTS_N_INSNS (1);
7245 else
7246 *total = COSTS_N_INSNS (4);
7247 return true;
7249 default:
7250 *total = COSTS_N_INSNS (4);
7251 return false;
7255 /* Estimates the size cost of thumb1 instructions.
7256 For now most of the code is copied from thumb1_rtx_costs. We need more
7257 fine grain tuning when we have more related test cases. */
7258 static inline int
7259 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7261 enum machine_mode mode = GET_MODE (x);
7263 switch (code)
7265 case ASHIFT:
7266 case ASHIFTRT:
7267 case LSHIFTRT:
7268 case ROTATERT:
7269 case PLUS:
7270 case MINUS:
7271 case COMPARE:
7272 case NEG:
7273 case NOT:
7274 return COSTS_N_INSNS (1);
7276 case MULT:
7277 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7279 /* Thumb1 mul instruction can't operate on const. We must Load it
7280 into a register first. */
7281 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7282 return COSTS_N_INSNS (1) + const_size;
7284 return COSTS_N_INSNS (1);
7286 case SET:
7287 return (COSTS_N_INSNS (1)
7288 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7289 + GET_CODE (SET_DEST (x)) == MEM));
7291 case CONST_INT:
7292 if (outer == SET)
7294 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7295 return COSTS_N_INSNS (1);
7296 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7297 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7298 return COSTS_N_INSNS (2);
7299 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7300 if (thumb_shiftable_const (INTVAL (x)))
7301 return COSTS_N_INSNS (2);
7302 return COSTS_N_INSNS (3);
7304 else if ((outer == PLUS || outer == COMPARE)
7305 && INTVAL (x) < 256 && INTVAL (x) > -256)
7306 return 0;
7307 else if ((outer == IOR || outer == XOR || outer == AND)
7308 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7309 return COSTS_N_INSNS (1);
7310 else if (outer == AND)
7312 int i;
7313 /* This duplicates the tests in the andsi3 expander. */
7314 for (i = 9; i <= 31; i++)
7315 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7316 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7317 return COSTS_N_INSNS (2);
7319 else if (outer == ASHIFT || outer == ASHIFTRT
7320 || outer == LSHIFTRT)
7321 return 0;
7322 return COSTS_N_INSNS (2);
7324 case CONST:
7325 case CONST_DOUBLE:
7326 case LABEL_REF:
7327 case SYMBOL_REF:
7328 return COSTS_N_INSNS (3);
7330 case UDIV:
7331 case UMOD:
7332 case DIV:
7333 case MOD:
7334 return 100;
7336 case TRUNCATE:
7337 return 99;
7339 case AND:
7340 case XOR:
7341 case IOR:
7342 /* XXX guess. */
7343 return 8;
7345 case MEM:
7346 /* XXX another guess. */
7347 /* Memory costs quite a lot for the first word, but subsequent words
7348 load at the equivalent of a single insn each. */
7349 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7350 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7351 ? 4 : 0));
7353 case IF_THEN_ELSE:
7354 /* XXX a guess. */
7355 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7356 return 14;
7357 return 2;
7359 case ZERO_EXTEND:
7360 /* XXX still guessing. */
7361 switch (GET_MODE (XEXP (x, 0)))
7363 case QImode:
7364 return (1 + (mode == DImode ? 4 : 0)
7365 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7367 case HImode:
7368 return (4 + (mode == DImode ? 4 : 0)
7369 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7371 case SImode:
7372 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7374 default:
7375 return 99;
7378 default:
7379 return 99;
7383 /* RTX costs when optimizing for size. */
7384 static bool
7385 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7386 int *total)
7388 enum machine_mode mode = GET_MODE (x);
7389 if (TARGET_THUMB1)
7391 *total = thumb1_size_rtx_costs (x, code, outer_code);
7392 return true;
7395 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7396 switch (code)
7398 case MEM:
7399 /* A memory access costs 1 insn if the mode is small, or the address is
7400 a single register, otherwise it costs one insn per word. */
7401 if (REG_P (XEXP (x, 0)))
7402 *total = COSTS_N_INSNS (1);
7403 else if (flag_pic
7404 && GET_CODE (XEXP (x, 0)) == PLUS
7405 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7406 /* This will be split into two instructions.
7407 See arm.md:calculate_pic_address. */
7408 *total = COSTS_N_INSNS (2);
7409 else
7410 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7411 return true;
7413 case DIV:
7414 case MOD:
7415 case UDIV:
7416 case UMOD:
7417 /* Needs a libcall, so it costs about this. */
7418 *total = COSTS_N_INSNS (2);
7419 return false;
7421 case ROTATE:
7422 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7424 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7425 return true;
7427 /* Fall through */
7428 case ROTATERT:
7429 case ASHIFT:
7430 case LSHIFTRT:
7431 case ASHIFTRT:
7432 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7434 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7435 return true;
7437 else if (mode == SImode)
7439 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7440 /* Slightly disparage register shifts, but not by much. */
7441 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7442 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7443 return true;
7446 /* Needs a libcall. */
7447 *total = COSTS_N_INSNS (2);
7448 return false;
7450 case MINUS:
7451 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7452 && (mode == SFmode || !TARGET_VFP_SINGLE))
7454 *total = COSTS_N_INSNS (1);
7455 return false;
7458 if (mode == SImode)
7460 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7461 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7463 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7464 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7465 || subcode1 == ROTATE || subcode1 == ROTATERT
7466 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7467 || subcode1 == ASHIFTRT)
7469 /* It's just the cost of the two operands. */
7470 *total = 0;
7471 return false;
7474 *total = COSTS_N_INSNS (1);
7475 return false;
7478 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7479 return false;
7481 case PLUS:
7482 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7483 && (mode == SFmode || !TARGET_VFP_SINGLE))
7485 *total = COSTS_N_INSNS (1);
7486 return false;
7489 /* A shift as a part of ADD costs nothing. */
7490 if (GET_CODE (XEXP (x, 0)) == MULT
7491 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7493 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7494 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7495 *total += rtx_cost (XEXP (x, 1), code, false);
7496 return true;
7499 /* Fall through */
7500 case AND: case XOR: case IOR:
7501 if (mode == SImode)
7503 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7505 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7506 || subcode == LSHIFTRT || subcode == ASHIFTRT
7507 || (code == AND && subcode == NOT))
7509 /* It's just the cost of the two operands. */
7510 *total = 0;
7511 return false;
7515 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7516 return false;
7518 case MULT:
7519 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7520 return false;
7522 case NEG:
7523 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7524 && (mode == SFmode || !TARGET_VFP_SINGLE))
7526 *total = COSTS_N_INSNS (1);
7527 return false;
7530 /* Fall through */
7531 case NOT:
7532 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7534 return false;
7536 case IF_THEN_ELSE:
7537 *total = 0;
7538 return false;
7540 case COMPARE:
7541 if (cc_register (XEXP (x, 0), VOIDmode))
7542 * total = 0;
7543 else
7544 *total = COSTS_N_INSNS (1);
7545 return false;
7547 case ABS:
7548 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7549 && (mode == SFmode || !TARGET_VFP_SINGLE))
7550 *total = COSTS_N_INSNS (1);
7551 else
7552 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7553 return false;
7555 case SIGN_EXTEND:
7556 case ZERO_EXTEND:
7557 return arm_rtx_costs_1 (x, outer_code, total, 0);
7559 case CONST_INT:
7560 if (const_ok_for_arm (INTVAL (x)))
7561 /* A multiplication by a constant requires another instruction
7562 to load the constant to a register. */
7563 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7564 ? 1 : 0);
7565 else if (const_ok_for_arm (~INTVAL (x)))
7566 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7567 else if (const_ok_for_arm (-INTVAL (x)))
7569 if (outer_code == COMPARE || outer_code == PLUS
7570 || outer_code == MINUS)
7571 *total = 0;
7572 else
7573 *total = COSTS_N_INSNS (1);
7575 else
7576 *total = COSTS_N_INSNS (2);
7577 return true;
7579 case CONST:
7580 case LABEL_REF:
7581 case SYMBOL_REF:
7582 *total = COSTS_N_INSNS (2);
7583 return true;
7585 case CONST_DOUBLE:
7586 *total = COSTS_N_INSNS (4);
7587 return true;
7589 case HIGH:
7590 case LO_SUM:
7591 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7592 cost of these slightly. */
7593 *total = COSTS_N_INSNS (1) + 1;
7594 return true;
7596 default:
7597 if (mode != VOIDmode)
7598 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7599 else
7600 *total = COSTS_N_INSNS (4); /* How knows? */
7601 return false;
7605 /* RTX costs when optimizing for size. */
7606 static bool
7607 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7608 bool speed)
7610 if (!speed)
7611 return arm_size_rtx_costs (x, (enum rtx_code) code,
7612 (enum rtx_code) outer_code, total);
7613 else
7614 return current_tune->rtx_costs (x, (enum rtx_code) code,
7615 (enum rtx_code) outer_code,
7616 total, speed);
7619 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7620 supported on any "slowmul" cores, so it can be ignored. */
7622 static bool
7623 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7624 int *total, bool speed)
7626 enum machine_mode mode = GET_MODE (x);
7628 if (TARGET_THUMB)
7630 *total = thumb1_rtx_costs (x, code, outer_code);
7631 return true;
7634 switch (code)
7636 case MULT:
7637 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7638 || mode == DImode)
7640 *total = COSTS_N_INSNS (20);
7641 return false;
7644 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7646 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7647 & (unsigned HOST_WIDE_INT) 0xffffffff);
7648 int cost, const_ok = const_ok_for_arm (i);
7649 int j, booth_unit_size;
7651 /* Tune as appropriate. */
7652 cost = const_ok ? 4 : 8;
7653 booth_unit_size = 2;
7654 for (j = 0; i && j < 32; j += booth_unit_size)
7656 i >>= booth_unit_size;
7657 cost++;
7660 *total = COSTS_N_INSNS (cost);
7661 *total += rtx_cost (XEXP (x, 0), code, speed);
7662 return true;
7665 *total = COSTS_N_INSNS (20);
7666 return false;
7668 default:
7669 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7674 /* RTX cost for cores with a fast multiply unit (M variants). */
7676 static bool
7677 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7678 int *total, bool speed)
7680 enum machine_mode mode = GET_MODE (x);
7682 if (TARGET_THUMB1)
7684 *total = thumb1_rtx_costs (x, code, outer_code);
7685 return true;
7688 /* ??? should thumb2 use different costs? */
7689 switch (code)
7691 case MULT:
7692 /* There is no point basing this on the tuning, since it is always the
7693 fast variant if it exists at all. */
7694 if (mode == DImode
7695 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7696 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7697 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7699 *total = COSTS_N_INSNS(2);
7700 return false;
7704 if (mode == DImode)
7706 *total = COSTS_N_INSNS (5);
7707 return false;
7710 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7712 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7713 & (unsigned HOST_WIDE_INT) 0xffffffff);
7714 int cost, const_ok = const_ok_for_arm (i);
7715 int j, booth_unit_size;
7717 /* Tune as appropriate. */
7718 cost = const_ok ? 4 : 8;
7719 booth_unit_size = 8;
7720 for (j = 0; i && j < 32; j += booth_unit_size)
7722 i >>= booth_unit_size;
7723 cost++;
7726 *total = COSTS_N_INSNS(cost);
7727 return false;
7730 if (mode == SImode)
7732 *total = COSTS_N_INSNS (4);
7733 return false;
7736 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7738 if (TARGET_HARD_FLOAT
7739 && (mode == SFmode
7740 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7742 *total = COSTS_N_INSNS (1);
7743 return false;
7747 /* Requires a lib call */
7748 *total = COSTS_N_INSNS (20);
7749 return false;
7751 default:
7752 return arm_rtx_costs_1 (x, outer_code, total, speed);
7757 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7758 so it can be ignored. */
7760 static bool
7761 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7762 int *total, bool speed)
7764 enum machine_mode mode = GET_MODE (x);
7766 if (TARGET_THUMB)
7768 *total = thumb1_rtx_costs (x, code, outer_code);
7769 return true;
7772 switch (code)
7774 case COMPARE:
7775 if (GET_CODE (XEXP (x, 0)) != MULT)
7776 return arm_rtx_costs_1 (x, outer_code, total, speed);
7778 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7779 will stall until the multiplication is complete. */
7780 *total = COSTS_N_INSNS (3);
7781 return false;
7783 case MULT:
7784 /* There is no point basing this on the tuning, since it is always the
7785 fast variant if it exists at all. */
7786 if (mode == DImode
7787 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7788 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7789 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7791 *total = COSTS_N_INSNS (2);
7792 return false;
7796 if (mode == DImode)
7798 *total = COSTS_N_INSNS (5);
7799 return false;
7802 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7804 /* If operand 1 is a constant we can more accurately
7805 calculate the cost of the multiply. The multiplier can
7806 retire 15 bits on the first cycle and a further 12 on the
7807 second. We do, of course, have to load the constant into
7808 a register first. */
7809 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7810 /* There's a general overhead of one cycle. */
7811 int cost = 1;
7812 unsigned HOST_WIDE_INT masked_const;
7814 if (i & 0x80000000)
7815 i = ~i;
7817 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7819 masked_const = i & 0xffff8000;
7820 if (masked_const != 0)
7822 cost++;
7823 masked_const = i & 0xf8000000;
7824 if (masked_const != 0)
7825 cost++;
7827 *total = COSTS_N_INSNS (cost);
7828 return false;
7831 if (mode == SImode)
7833 *total = COSTS_N_INSNS (3);
7834 return false;
7837 /* Requires a lib call */
7838 *total = COSTS_N_INSNS (20);
7839 return false;
7841 default:
7842 return arm_rtx_costs_1 (x, outer_code, total, speed);
7847 /* RTX costs for 9e (and later) cores. */
7849 static bool
7850 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7851 int *total, bool speed)
7853 enum machine_mode mode = GET_MODE (x);
7855 if (TARGET_THUMB1)
7857 switch (code)
7859 case MULT:
7860 *total = COSTS_N_INSNS (3);
7861 return true;
7863 default:
7864 *total = thumb1_rtx_costs (x, code, outer_code);
7865 return true;
7869 switch (code)
7871 case MULT:
7872 /* There is no point basing this on the tuning, since it is always the
7873 fast variant if it exists at all. */
7874 if (mode == DImode
7875 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7876 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7877 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7879 *total = COSTS_N_INSNS (2);
7880 return false;
7884 if (mode == DImode)
7886 *total = COSTS_N_INSNS (5);
7887 return false;
7890 if (mode == SImode)
7892 *total = COSTS_N_INSNS (2);
7893 return false;
7896 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7898 if (TARGET_HARD_FLOAT
7899 && (mode == SFmode
7900 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7902 *total = COSTS_N_INSNS (1);
7903 return false;
7907 *total = COSTS_N_INSNS (20);
7908 return false;
7910 default:
7911 return arm_rtx_costs_1 (x, outer_code, total, speed);
7914 /* All address computations that can be done are free, but rtx cost returns
7915 the same for practically all of them. So we weight the different types
7916 of address here in the order (most pref first):
7917 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7918 static inline int
7919 arm_arm_address_cost (rtx x)
7921 enum rtx_code c = GET_CODE (x);
7923 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7924 return 0;
7925 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7926 return 10;
7928 if (c == PLUS)
7930 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7931 return 2;
7933 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7934 return 3;
7936 return 4;
7939 return 6;
7942 static inline int
7943 arm_thumb_address_cost (rtx x)
7945 enum rtx_code c = GET_CODE (x);
7947 if (c == REG)
7948 return 1;
7949 if (c == PLUS
7950 && GET_CODE (XEXP (x, 0)) == REG
7951 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7952 return 1;
7954 return 2;
7957 static int
7958 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7960 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7963 /* Adjust cost hook for XScale. */
7964 static bool
7965 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7967 /* Some true dependencies can have a higher cost depending
7968 on precisely how certain input operands are used. */
7969 if (REG_NOTE_KIND(link) == 0
7970 && recog_memoized (insn) >= 0
7971 && recog_memoized (dep) >= 0)
7973 int shift_opnum = get_attr_shift (insn);
7974 enum attr_type attr_type = get_attr_type (dep);
7976 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7977 operand for INSN. If we have a shifted input operand and the
7978 instruction we depend on is another ALU instruction, then we may
7979 have to account for an additional stall. */
7980 if (shift_opnum != 0
7981 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7983 rtx shifted_operand;
7984 int opno;
7986 /* Get the shifted operand. */
7987 extract_insn (insn);
7988 shifted_operand = recog_data.operand[shift_opnum];
7990 /* Iterate over all the operands in DEP. If we write an operand
7991 that overlaps with SHIFTED_OPERAND, then we have increase the
7992 cost of this dependency. */
7993 extract_insn (dep);
7994 preprocess_constraints ();
7995 for (opno = 0; opno < recog_data.n_operands; opno++)
7997 /* We can ignore strict inputs. */
7998 if (recog_data.operand_type[opno] == OP_IN)
7999 continue;
8001 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8002 shifted_operand))
8004 *cost = 2;
8005 return false;
8010 return true;
8013 /* Adjust cost hook for Cortex A9. */
8014 static bool
8015 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8017 switch (REG_NOTE_KIND (link))
8019 case REG_DEP_ANTI:
8020 *cost = 0;
8021 return false;
8023 case REG_DEP_TRUE:
8024 case REG_DEP_OUTPUT:
8025 if (recog_memoized (insn) >= 0
8026 && recog_memoized (dep) >= 0)
8028 if (GET_CODE (PATTERN (insn)) == SET)
8030 if (GET_MODE_CLASS
8031 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8032 || GET_MODE_CLASS
8033 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8035 enum attr_type attr_type_insn = get_attr_type (insn);
8036 enum attr_type attr_type_dep = get_attr_type (dep);
8038 /* By default all dependencies of the form
8039 s0 = s0 <op> s1
8040 s0 = s0 <op> s2
8041 have an extra latency of 1 cycle because
8042 of the input and output dependency in this
8043 case. However this gets modeled as an true
8044 dependency and hence all these checks. */
8045 if (REG_P (SET_DEST (PATTERN (insn)))
8046 && REG_P (SET_DEST (PATTERN (dep)))
8047 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8048 SET_DEST (PATTERN (dep))))
8050 /* FMACS is a special case where the dependant
8051 instruction can be issued 3 cycles before
8052 the normal latency in case of an output
8053 dependency. */
8054 if ((attr_type_insn == TYPE_FMACS
8055 || attr_type_insn == TYPE_FMACD)
8056 && (attr_type_dep == TYPE_FMACS
8057 || attr_type_dep == TYPE_FMACD))
8059 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8060 *cost = insn_default_latency (dep) - 3;
8061 else
8062 *cost = insn_default_latency (dep);
8063 return false;
8065 else
8067 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8068 *cost = insn_default_latency (dep) + 1;
8069 else
8070 *cost = insn_default_latency (dep);
8072 return false;
8077 break;
8079 default:
8080 gcc_unreachable ();
8083 return true;
8086 /* Adjust cost hook for FA726TE. */
8087 static bool
8088 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8090 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8091 have penalty of 3. */
8092 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8093 && recog_memoized (insn) >= 0
8094 && recog_memoized (dep) >= 0
8095 && get_attr_conds (dep) == CONDS_SET)
8097 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8098 if (get_attr_conds (insn) == CONDS_USE
8099 && get_attr_type (insn) != TYPE_BRANCH)
8101 *cost = 3;
8102 return false;
8105 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8106 || get_attr_conds (insn) == CONDS_USE)
8108 *cost = 0;
8109 return false;
8113 return true;
8116 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8117 It corrects the value of COST based on the relationship between
8118 INSN and DEP through the dependence LINK. It returns the new
8119 value. There is a per-core adjust_cost hook to adjust scheduler costs
8120 and the per-core hook can choose to completely override the generic
8121 adjust_cost function. Only put bits of code into arm_adjust_cost that
8122 are common across all cores. */
8123 static int
8124 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8126 rtx i_pat, d_pat;
8128 /* When generating Thumb-1 code, we want to place flag-setting operations
8129 close to a conditional branch which depends on them, so that we can
8130 omit the comparison. */
8131 if (TARGET_THUMB1
8132 && REG_NOTE_KIND (link) == 0
8133 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8134 && recog_memoized (dep) >= 0
8135 && get_attr_conds (dep) == CONDS_SET)
8136 return 0;
8138 if (current_tune->sched_adjust_cost != NULL)
8140 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8141 return cost;
8144 /* XXX This is not strictly true for the FPA. */
8145 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8146 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8147 return 0;
8149 /* Call insns don't incur a stall, even if they follow a load. */
8150 if (REG_NOTE_KIND (link) == 0
8151 && GET_CODE (insn) == CALL_INSN)
8152 return 1;
8154 if ((i_pat = single_set (insn)) != NULL
8155 && GET_CODE (SET_SRC (i_pat)) == MEM
8156 && (d_pat = single_set (dep)) != NULL
8157 && GET_CODE (SET_DEST (d_pat)) == MEM)
8159 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8160 /* This is a load after a store, there is no conflict if the load reads
8161 from a cached area. Assume that loads from the stack, and from the
8162 constant pool are cached, and that others will miss. This is a
8163 hack. */
8165 if ((GET_CODE (src_mem) == SYMBOL_REF
8166 && CONSTANT_POOL_ADDRESS_P (src_mem))
8167 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8168 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8169 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8170 return 1;
8173 return cost;
8176 static int fp_consts_inited = 0;
8178 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8179 static const char * const strings_fp[8] =
8181 "0", "1", "2", "3",
8182 "4", "5", "0.5", "10"
8185 static REAL_VALUE_TYPE values_fp[8];
8187 static void
8188 init_fp_table (void)
8190 int i;
8191 REAL_VALUE_TYPE r;
8193 if (TARGET_VFP)
8194 fp_consts_inited = 1;
8195 else
8196 fp_consts_inited = 8;
8198 for (i = 0; i < fp_consts_inited; i++)
8200 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8201 values_fp[i] = r;
8205 /* Return TRUE if rtx X is a valid immediate FP constant. */
8207 arm_const_double_rtx (rtx x)
8209 REAL_VALUE_TYPE r;
8210 int i;
8212 if (!fp_consts_inited)
8213 init_fp_table ();
8215 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8216 if (REAL_VALUE_MINUS_ZERO (r))
8217 return 0;
8219 for (i = 0; i < fp_consts_inited; i++)
8220 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8221 return 1;
8223 return 0;
8226 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8228 neg_const_double_rtx_ok_for_fpa (rtx x)
8230 REAL_VALUE_TYPE r;
8231 int i;
8233 if (!fp_consts_inited)
8234 init_fp_table ();
8236 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8237 r = real_value_negate (&r);
8238 if (REAL_VALUE_MINUS_ZERO (r))
8239 return 0;
8241 for (i = 0; i < 8; i++)
8242 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8243 return 1;
8245 return 0;
8249 /* VFPv3 has a fairly wide range of representable immediates, formed from
8250 "quarter-precision" floating-point values. These can be evaluated using this
8251 formula (with ^ for exponentiation):
8253 -1^s * n * 2^-r
8255 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8256 16 <= n <= 31 and 0 <= r <= 7.
8258 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8260 - A (most-significant) is the sign bit.
8261 - BCD are the exponent (encoded as r XOR 3).
8262 - EFGH are the mantissa (encoded as n - 16).
8265 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8266 fconst[sd] instruction, or -1 if X isn't suitable. */
8267 static int
8268 vfp3_const_double_index (rtx x)
8270 REAL_VALUE_TYPE r, m;
8271 int sign, exponent;
8272 unsigned HOST_WIDE_INT mantissa, mant_hi;
8273 unsigned HOST_WIDE_INT mask;
8274 HOST_WIDE_INT m1, m2;
8275 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8277 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8278 return -1;
8280 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8282 /* We can't represent these things, so detect them first. */
8283 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8284 return -1;
8286 /* Extract sign, exponent and mantissa. */
8287 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8288 r = real_value_abs (&r);
8289 exponent = REAL_EXP (&r);
8290 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8291 highest (sign) bit, with a fixed binary point at bit point_pos.
8292 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8293 bits for the mantissa, this may fail (low bits would be lost). */
8294 real_ldexp (&m, &r, point_pos - exponent);
8295 REAL_VALUE_TO_INT (&m1, &m2, m);
8296 mantissa = m1;
8297 mant_hi = m2;
8299 /* If there are bits set in the low part of the mantissa, we can't
8300 represent this value. */
8301 if (mantissa != 0)
8302 return -1;
8304 /* Now make it so that mantissa contains the most-significant bits, and move
8305 the point_pos to indicate that the least-significant bits have been
8306 discarded. */
8307 point_pos -= HOST_BITS_PER_WIDE_INT;
8308 mantissa = mant_hi;
8310 /* We can permit four significant bits of mantissa only, plus a high bit
8311 which is always 1. */
8312 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8313 if ((mantissa & mask) != 0)
8314 return -1;
8316 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8317 mantissa >>= point_pos - 5;
8319 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8320 floating-point immediate zero with Neon using an integer-zero load, but
8321 that case is handled elsewhere.) */
8322 if (mantissa == 0)
8323 return -1;
8325 gcc_assert (mantissa >= 16 && mantissa <= 31);
8327 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8328 normalized significands are in the range [1, 2). (Our mantissa is shifted
8329 left 4 places at this point relative to normalized IEEE754 values). GCC
8330 internally uses [0.5, 1) (see real.c), so the exponent returned from
8331 REAL_EXP must be altered. */
8332 exponent = 5 - exponent;
8334 if (exponent < 0 || exponent > 7)
8335 return -1;
8337 /* Sign, mantissa and exponent are now in the correct form to plug into the
8338 formula described in the comment above. */
8339 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8342 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8344 vfp3_const_double_rtx (rtx x)
8346 if (!TARGET_VFP3)
8347 return 0;
8349 return vfp3_const_double_index (x) != -1;
8352 /* Recognize immediates which can be used in various Neon instructions. Legal
8353 immediates are described by the following table (for VMVN variants, the
8354 bitwise inverse of the constant shown is recognized. In either case, VMOV
8355 is output and the correct instruction to use for a given constant is chosen
8356 by the assembler). The constant shown is replicated across all elements of
8357 the destination vector.
8359 insn elems variant constant (binary)
8360 ---- ----- ------- -----------------
8361 vmov i32 0 00000000 00000000 00000000 abcdefgh
8362 vmov i32 1 00000000 00000000 abcdefgh 00000000
8363 vmov i32 2 00000000 abcdefgh 00000000 00000000
8364 vmov i32 3 abcdefgh 00000000 00000000 00000000
8365 vmov i16 4 00000000 abcdefgh
8366 vmov i16 5 abcdefgh 00000000
8367 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8368 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8369 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8370 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8371 vmvn i16 10 00000000 abcdefgh
8372 vmvn i16 11 abcdefgh 00000000
8373 vmov i32 12 00000000 00000000 abcdefgh 11111111
8374 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8375 vmov i32 14 00000000 abcdefgh 11111111 11111111
8376 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8377 vmov i8 16 abcdefgh
8378 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8379 eeeeeeee ffffffff gggggggg hhhhhhhh
8380 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8382 For case 18, B = !b. Representable values are exactly those accepted by
8383 vfp3_const_double_index, but are output as floating-point numbers rather
8384 than indices.
8386 Variants 0-5 (inclusive) may also be used as immediates for the second
8387 operand of VORR/VBIC instructions.
8389 The INVERSE argument causes the bitwise inverse of the given operand to be
8390 recognized instead (used for recognizing legal immediates for the VAND/VORN
8391 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8392 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8393 output, rather than the real insns vbic/vorr).
8395 INVERSE makes no difference to the recognition of float vectors.
8397 The return value is the variant of immediate as shown in the above table, or
8398 -1 if the given value doesn't match any of the listed patterns.
8400 static int
8401 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8402 rtx *modconst, int *elementwidth)
8404 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8405 matches = 1; \
8406 for (i = 0; i < idx; i += (STRIDE)) \
8407 if (!(TEST)) \
8408 matches = 0; \
8409 if (matches) \
8411 immtype = (CLASS); \
8412 elsize = (ELSIZE); \
8413 break; \
8416 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8417 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8418 unsigned char bytes[16];
8419 int immtype = -1, matches;
8420 unsigned int invmask = inverse ? 0xff : 0;
8422 /* Vectors of float constants. */
8423 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8425 rtx el0 = CONST_VECTOR_ELT (op, 0);
8426 REAL_VALUE_TYPE r0;
8428 if (!vfp3_const_double_rtx (el0))
8429 return -1;
8431 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8433 for (i = 1; i < n_elts; i++)
8435 rtx elt = CONST_VECTOR_ELT (op, i);
8436 REAL_VALUE_TYPE re;
8438 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8440 if (!REAL_VALUES_EQUAL (r0, re))
8441 return -1;
8444 if (modconst)
8445 *modconst = CONST_VECTOR_ELT (op, 0);
8447 if (elementwidth)
8448 *elementwidth = 0;
8450 return 18;
8453 /* Splat vector constant out into a byte vector. */
8454 for (i = 0; i < n_elts; i++)
8456 rtx el = CONST_VECTOR_ELT (op, i);
8457 unsigned HOST_WIDE_INT elpart;
8458 unsigned int part, parts;
8460 if (GET_CODE (el) == CONST_INT)
8462 elpart = INTVAL (el);
8463 parts = 1;
8465 else if (GET_CODE (el) == CONST_DOUBLE)
8467 elpart = CONST_DOUBLE_LOW (el);
8468 parts = 2;
8470 else
8471 gcc_unreachable ();
8473 for (part = 0; part < parts; part++)
8475 unsigned int byte;
8476 for (byte = 0; byte < innersize; byte++)
8478 bytes[idx++] = (elpart & 0xff) ^ invmask;
8479 elpart >>= BITS_PER_UNIT;
8481 if (GET_CODE (el) == CONST_DOUBLE)
8482 elpart = CONST_DOUBLE_HIGH (el);
8486 /* Sanity check. */
8487 gcc_assert (idx == GET_MODE_SIZE (mode));
8491 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8492 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8494 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8495 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8497 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8498 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8500 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8501 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8503 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8505 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8507 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8508 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8510 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8511 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8513 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8514 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8516 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8517 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8519 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8521 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8523 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8524 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8526 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8527 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8529 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8530 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8532 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8533 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8535 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8537 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8538 && bytes[i] == bytes[(i + 8) % idx]);
8540 while (0);
8542 if (immtype == -1)
8543 return -1;
8545 if (elementwidth)
8546 *elementwidth = elsize;
8548 if (modconst)
8550 unsigned HOST_WIDE_INT imm = 0;
8552 /* Un-invert bytes of recognized vector, if necessary. */
8553 if (invmask != 0)
8554 for (i = 0; i < idx; i++)
8555 bytes[i] ^= invmask;
8557 if (immtype == 17)
8559 /* FIXME: Broken on 32-bit H_W_I hosts. */
8560 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8562 for (i = 0; i < 8; i++)
8563 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8564 << (i * BITS_PER_UNIT);
8566 *modconst = GEN_INT (imm);
8568 else
8570 unsigned HOST_WIDE_INT imm = 0;
8572 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8573 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8575 *modconst = GEN_INT (imm);
8579 return immtype;
8580 #undef CHECK
8583 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8584 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8585 float elements), and a modified constant (whatever should be output for a
8586 VMOV) in *MODCONST. */
8589 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8590 rtx *modconst, int *elementwidth)
8592 rtx tmpconst;
8593 int tmpwidth;
8594 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8596 if (retval == -1)
8597 return 0;
8599 if (modconst)
8600 *modconst = tmpconst;
8602 if (elementwidth)
8603 *elementwidth = tmpwidth;
8605 return 1;
8608 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8609 the immediate is valid, write a constant suitable for using as an operand
8610 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8611 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8614 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8615 rtx *modconst, int *elementwidth)
8617 rtx tmpconst;
8618 int tmpwidth;
8619 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8621 if (retval < 0 || retval > 5)
8622 return 0;
8624 if (modconst)
8625 *modconst = tmpconst;
8627 if (elementwidth)
8628 *elementwidth = tmpwidth;
8630 return 1;
8633 /* Return a string suitable for output of Neon immediate logic operation
8634 MNEM. */
8636 char *
8637 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8638 int inverse, int quad)
8640 int width, is_valid;
8641 static char templ[40];
8643 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8645 gcc_assert (is_valid != 0);
8647 if (quad)
8648 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8649 else
8650 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8652 return templ;
8655 /* Output a sequence of pairwise operations to implement a reduction.
8656 NOTE: We do "too much work" here, because pairwise operations work on two
8657 registers-worth of operands in one go. Unfortunately we can't exploit those
8658 extra calculations to do the full operation in fewer steps, I don't think.
8659 Although all vector elements of the result but the first are ignored, we
8660 actually calculate the same result in each of the elements. An alternative
8661 such as initially loading a vector with zero to use as each of the second
8662 operands would use up an additional register and take an extra instruction,
8663 for no particular gain. */
8665 void
8666 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8667 rtx (*reduc) (rtx, rtx, rtx))
8669 enum machine_mode inner = GET_MODE_INNER (mode);
8670 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8671 rtx tmpsum = op1;
8673 for (i = parts / 2; i >= 1; i /= 2)
8675 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8676 emit_insn (reduc (dest, tmpsum, tmpsum));
8677 tmpsum = dest;
8681 /* If VALS is a vector constant that can be loaded into a register
8682 using VDUP, generate instructions to do so and return an RTX to
8683 assign to the register. Otherwise return NULL_RTX. */
8685 static rtx
8686 neon_vdup_constant (rtx vals)
8688 enum machine_mode mode = GET_MODE (vals);
8689 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8690 int n_elts = GET_MODE_NUNITS (mode);
8691 bool all_same = true;
8692 rtx x;
8693 int i;
8695 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8696 return NULL_RTX;
8698 for (i = 0; i < n_elts; ++i)
8700 x = XVECEXP (vals, 0, i);
8701 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8702 all_same = false;
8705 if (!all_same)
8706 /* The elements are not all the same. We could handle repeating
8707 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8708 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8709 vdup.i16). */
8710 return NULL_RTX;
8712 /* We can load this constant by using VDUP and a constant in a
8713 single ARM register. This will be cheaper than a vector
8714 load. */
8716 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8717 return gen_rtx_VEC_DUPLICATE (mode, x);
8720 /* Generate code to load VALS, which is a PARALLEL containing only
8721 constants (for vec_init) or CONST_VECTOR, efficiently into a
8722 register. Returns an RTX to copy into the register, or NULL_RTX
8723 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8726 neon_make_constant (rtx vals)
8728 enum machine_mode mode = GET_MODE (vals);
8729 rtx target;
8730 rtx const_vec = NULL_RTX;
8731 int n_elts = GET_MODE_NUNITS (mode);
8732 int n_const = 0;
8733 int i;
8735 if (GET_CODE (vals) == CONST_VECTOR)
8736 const_vec = vals;
8737 else if (GET_CODE (vals) == PARALLEL)
8739 /* A CONST_VECTOR must contain only CONST_INTs and
8740 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8741 Only store valid constants in a CONST_VECTOR. */
8742 for (i = 0; i < n_elts; ++i)
8744 rtx x = XVECEXP (vals, 0, i);
8745 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8746 n_const++;
8748 if (n_const == n_elts)
8749 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8751 else
8752 gcc_unreachable ();
8754 if (const_vec != NULL
8755 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8756 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8757 return const_vec;
8758 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8759 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8760 pipeline cycle; creating the constant takes one or two ARM
8761 pipeline cycles. */
8762 return target;
8763 else if (const_vec != NULL_RTX)
8764 /* Load from constant pool. On Cortex-A8 this takes two cycles
8765 (for either double or quad vectors). We can not take advantage
8766 of single-cycle VLD1 because we need a PC-relative addressing
8767 mode. */
8768 return const_vec;
8769 else
8770 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8771 We can not construct an initializer. */
8772 return NULL_RTX;
8775 /* Initialize vector TARGET to VALS. */
8777 void
8778 neon_expand_vector_init (rtx target, rtx vals)
8780 enum machine_mode mode = GET_MODE (target);
8781 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8782 int n_elts = GET_MODE_NUNITS (mode);
8783 int n_var = 0, one_var = -1;
8784 bool all_same = true;
8785 rtx x, mem;
8786 int i;
8788 for (i = 0; i < n_elts; ++i)
8790 x = XVECEXP (vals, 0, i);
8791 if (!CONSTANT_P (x))
8792 ++n_var, one_var = i;
8794 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8795 all_same = false;
8798 if (n_var == 0)
8800 rtx constant = neon_make_constant (vals);
8801 if (constant != NULL_RTX)
8803 emit_move_insn (target, constant);
8804 return;
8808 /* Splat a single non-constant element if we can. */
8809 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8811 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8812 emit_insn (gen_rtx_SET (VOIDmode, target,
8813 gen_rtx_VEC_DUPLICATE (mode, x)));
8814 return;
8817 /* One field is non-constant. Load constant then overwrite varying
8818 field. This is more efficient than using the stack. */
8819 if (n_var == 1)
8821 rtx copy = copy_rtx (vals);
8822 rtx index = GEN_INT (one_var);
8824 /* Load constant part of vector, substitute neighboring value for
8825 varying element. */
8826 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8827 neon_expand_vector_init (target, copy);
8829 /* Insert variable. */
8830 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8831 switch (mode)
8833 case V8QImode:
8834 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8835 break;
8836 case V16QImode:
8837 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8838 break;
8839 case V4HImode:
8840 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8841 break;
8842 case V8HImode:
8843 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8844 break;
8845 case V2SImode:
8846 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8847 break;
8848 case V4SImode:
8849 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8850 break;
8851 case V2SFmode:
8852 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8853 break;
8854 case V4SFmode:
8855 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8856 break;
8857 case V2DImode:
8858 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8859 break;
8860 default:
8861 gcc_unreachable ();
8863 return;
8866 /* Construct the vector in memory one field at a time
8867 and load the whole vector. */
8868 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8869 for (i = 0; i < n_elts; i++)
8870 emit_move_insn (adjust_address_nv (mem, inner_mode,
8871 i * GET_MODE_SIZE (inner_mode)),
8872 XVECEXP (vals, 0, i));
8873 emit_move_insn (target, mem);
8876 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8877 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8878 reported source locations are bogus. */
8880 static void
8881 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8882 const char *err)
8884 HOST_WIDE_INT lane;
8886 gcc_assert (GET_CODE (operand) == CONST_INT);
8888 lane = INTVAL (operand);
8890 if (lane < low || lane >= high)
8891 error (err);
8894 /* Bounds-check lanes. */
8896 void
8897 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8899 bounds_check (operand, low, high, "lane out of range");
8902 /* Bounds-check constants. */
8904 void
8905 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8907 bounds_check (operand, low, high, "constant out of range");
8910 HOST_WIDE_INT
8911 neon_element_bits (enum machine_mode mode)
8913 if (mode == DImode)
8914 return GET_MODE_BITSIZE (mode);
8915 else
8916 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8920 /* Predicates for `match_operand' and `match_operator'. */
8922 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8924 cirrus_memory_offset (rtx op)
8926 /* Reject eliminable registers. */
8927 if (! (reload_in_progress || reload_completed)
8928 && ( reg_mentioned_p (frame_pointer_rtx, op)
8929 || reg_mentioned_p (arg_pointer_rtx, op)
8930 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8931 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8932 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8933 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8934 return 0;
8936 if (GET_CODE (op) == MEM)
8938 rtx ind;
8940 ind = XEXP (op, 0);
8942 /* Match: (mem (reg)). */
8943 if (GET_CODE (ind) == REG)
8944 return 1;
8946 /* Match:
8947 (mem (plus (reg)
8948 (const))). */
8949 if (GET_CODE (ind) == PLUS
8950 && GET_CODE (XEXP (ind, 0)) == REG
8951 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8952 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8953 return 1;
8956 return 0;
8959 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8960 WB is true if full writeback address modes are allowed and is false
8961 if limited writeback address modes (POST_INC and PRE_DEC) are
8962 allowed. */
8965 arm_coproc_mem_operand (rtx op, bool wb)
8967 rtx ind;
8969 /* Reject eliminable registers. */
8970 if (! (reload_in_progress || reload_completed)
8971 && ( reg_mentioned_p (frame_pointer_rtx, op)
8972 || reg_mentioned_p (arg_pointer_rtx, op)
8973 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8974 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8975 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8976 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8977 return FALSE;
8979 /* Constants are converted into offsets from labels. */
8980 if (GET_CODE (op) != MEM)
8981 return FALSE;
8983 ind = XEXP (op, 0);
8985 if (reload_completed
8986 && (GET_CODE (ind) == LABEL_REF
8987 || (GET_CODE (ind) == CONST
8988 && GET_CODE (XEXP (ind, 0)) == PLUS
8989 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8990 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8991 return TRUE;
8993 /* Match: (mem (reg)). */
8994 if (GET_CODE (ind) == REG)
8995 return arm_address_register_rtx_p (ind, 0);
8997 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8998 acceptable in any case (subject to verification by
8999 arm_address_register_rtx_p). We need WB to be true to accept
9000 PRE_INC and POST_DEC. */
9001 if (GET_CODE (ind) == POST_INC
9002 || GET_CODE (ind) == PRE_DEC
9003 || (wb
9004 && (GET_CODE (ind) == PRE_INC
9005 || GET_CODE (ind) == POST_DEC)))
9006 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9008 if (wb
9009 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9010 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9011 && GET_CODE (XEXP (ind, 1)) == PLUS
9012 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9013 ind = XEXP (ind, 1);
9015 /* Match:
9016 (plus (reg)
9017 (const)). */
9018 if (GET_CODE (ind) == PLUS
9019 && GET_CODE (XEXP (ind, 0)) == REG
9020 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9021 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9022 && INTVAL (XEXP (ind, 1)) > -1024
9023 && INTVAL (XEXP (ind, 1)) < 1024
9024 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9025 return TRUE;
9027 return FALSE;
9030 /* Return TRUE if OP is a memory operand which we can load or store a vector
9031 to/from. TYPE is one of the following values:
9032 0 - Vector load/stor (vldr)
9033 1 - Core registers (ldm)
9034 2 - Element/structure loads (vld1)
9037 neon_vector_mem_operand (rtx op, int type)
9039 rtx ind;
9041 /* Reject eliminable registers. */
9042 if (! (reload_in_progress || reload_completed)
9043 && ( reg_mentioned_p (frame_pointer_rtx, op)
9044 || reg_mentioned_p (arg_pointer_rtx, op)
9045 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9046 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9047 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9048 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9049 return FALSE;
9051 /* Constants are converted into offsets from labels. */
9052 if (GET_CODE (op) != MEM)
9053 return FALSE;
9055 ind = XEXP (op, 0);
9057 if (reload_completed
9058 && (GET_CODE (ind) == LABEL_REF
9059 || (GET_CODE (ind) == CONST
9060 && GET_CODE (XEXP (ind, 0)) == PLUS
9061 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9062 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9063 return TRUE;
9065 /* Match: (mem (reg)). */
9066 if (GET_CODE (ind) == REG)
9067 return arm_address_register_rtx_p (ind, 0);
9069 /* Allow post-increment with Neon registers. */
9070 if ((type != 1 && GET_CODE (ind) == POST_INC)
9071 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9072 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9074 /* FIXME: vld1 allows register post-modify. */
9076 /* Match:
9077 (plus (reg)
9078 (const)). */
9079 if (type == 0
9080 && GET_CODE (ind) == PLUS
9081 && GET_CODE (XEXP (ind, 0)) == REG
9082 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9083 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9084 && INTVAL (XEXP (ind, 1)) > -1024
9085 && INTVAL (XEXP (ind, 1)) < 1016
9086 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9087 return TRUE;
9089 return FALSE;
9092 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9093 type. */
9095 neon_struct_mem_operand (rtx op)
9097 rtx ind;
9099 /* Reject eliminable registers. */
9100 if (! (reload_in_progress || reload_completed)
9101 && ( reg_mentioned_p (frame_pointer_rtx, op)
9102 || reg_mentioned_p (arg_pointer_rtx, op)
9103 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9104 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9105 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9106 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9107 return FALSE;
9109 /* Constants are converted into offsets from labels. */
9110 if (GET_CODE (op) != MEM)
9111 return FALSE;
9113 ind = XEXP (op, 0);
9115 if (reload_completed
9116 && (GET_CODE (ind) == LABEL_REF
9117 || (GET_CODE (ind) == CONST
9118 && GET_CODE (XEXP (ind, 0)) == PLUS
9119 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9120 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9121 return TRUE;
9123 /* Match: (mem (reg)). */
9124 if (GET_CODE (ind) == REG)
9125 return arm_address_register_rtx_p (ind, 0);
9127 return FALSE;
9130 /* Return true if X is a register that will be eliminated later on. */
9132 arm_eliminable_register (rtx x)
9134 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9135 || REGNO (x) == ARG_POINTER_REGNUM
9136 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9137 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9140 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9141 coprocessor registers. Otherwise return NO_REGS. */
9143 enum reg_class
9144 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9146 if (mode == HFmode)
9148 if (!TARGET_NEON_FP16)
9149 return GENERAL_REGS;
9150 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9151 return NO_REGS;
9152 return GENERAL_REGS;
9155 /* The neon move patterns handle all legitimate vector and struct
9156 addresses. */
9157 if (TARGET_NEON
9158 && MEM_P (x)
9159 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9160 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9161 || VALID_NEON_STRUCT_MODE (mode)))
9162 return NO_REGS;
9164 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9165 return NO_REGS;
9167 return GENERAL_REGS;
9170 /* Values which must be returned in the most-significant end of the return
9171 register. */
9173 static bool
9174 arm_return_in_msb (const_tree valtype)
9176 return (TARGET_AAPCS_BASED
9177 && BYTES_BIG_ENDIAN
9178 && (AGGREGATE_TYPE_P (valtype)
9179 || TREE_CODE (valtype) == COMPLEX_TYPE));
9182 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9183 Use by the Cirrus Maverick code which has to workaround
9184 a hardware bug triggered by such instructions. */
9185 static bool
9186 arm_memory_load_p (rtx insn)
9188 rtx body, lhs, rhs;;
9190 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9191 return false;
9193 body = PATTERN (insn);
9195 if (GET_CODE (body) != SET)
9196 return false;
9198 lhs = XEXP (body, 0);
9199 rhs = XEXP (body, 1);
9201 lhs = REG_OR_SUBREG_RTX (lhs);
9203 /* If the destination is not a general purpose
9204 register we do not have to worry. */
9205 if (GET_CODE (lhs) != REG
9206 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9207 return false;
9209 /* As well as loads from memory we also have to react
9210 to loads of invalid constants which will be turned
9211 into loads from the minipool. */
9212 return (GET_CODE (rhs) == MEM
9213 || GET_CODE (rhs) == SYMBOL_REF
9214 || note_invalid_constants (insn, -1, false));
9217 /* Return TRUE if INSN is a Cirrus instruction. */
9218 static bool
9219 arm_cirrus_insn_p (rtx insn)
9221 enum attr_cirrus attr;
9223 /* get_attr cannot accept USE or CLOBBER. */
9224 if (!insn
9225 || GET_CODE (insn) != INSN
9226 || GET_CODE (PATTERN (insn)) == USE
9227 || GET_CODE (PATTERN (insn)) == CLOBBER)
9228 return 0;
9230 attr = get_attr_cirrus (insn);
9232 return attr != CIRRUS_NOT;
9235 /* Cirrus reorg for invalid instruction combinations. */
9236 static void
9237 cirrus_reorg (rtx first)
9239 enum attr_cirrus attr;
9240 rtx body = PATTERN (first);
9241 rtx t;
9242 int nops;
9244 /* Any branch must be followed by 2 non Cirrus instructions. */
9245 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9247 nops = 0;
9248 t = next_nonnote_insn (first);
9250 if (arm_cirrus_insn_p (t))
9251 ++ nops;
9253 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9254 ++ nops;
9256 while (nops --)
9257 emit_insn_after (gen_nop (), first);
9259 return;
9262 /* (float (blah)) is in parallel with a clobber. */
9263 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9264 body = XVECEXP (body, 0, 0);
9266 if (GET_CODE (body) == SET)
9268 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9270 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9271 be followed by a non Cirrus insn. */
9272 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9274 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9275 emit_insn_after (gen_nop (), first);
9277 return;
9279 else if (arm_memory_load_p (first))
9281 unsigned int arm_regno;
9283 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9284 ldr/cfmv64hr combination where the Rd field is the same
9285 in both instructions must be split with a non Cirrus
9286 insn. Example:
9288 ldr r0, blah
9290 cfmvsr mvf0, r0. */
9292 /* Get Arm register number for ldr insn. */
9293 if (GET_CODE (lhs) == REG)
9294 arm_regno = REGNO (lhs);
9295 else
9297 gcc_assert (GET_CODE (rhs) == REG);
9298 arm_regno = REGNO (rhs);
9301 /* Next insn. */
9302 first = next_nonnote_insn (first);
9304 if (! arm_cirrus_insn_p (first))
9305 return;
9307 body = PATTERN (first);
9309 /* (float (blah)) is in parallel with a clobber. */
9310 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9311 body = XVECEXP (body, 0, 0);
9313 if (GET_CODE (body) == FLOAT)
9314 body = XEXP (body, 0);
9316 if (get_attr_cirrus (first) == CIRRUS_MOVE
9317 && GET_CODE (XEXP (body, 1)) == REG
9318 && arm_regno == REGNO (XEXP (body, 1)))
9319 emit_insn_after (gen_nop (), first);
9321 return;
9325 /* get_attr cannot accept USE or CLOBBER. */
9326 if (!first
9327 || GET_CODE (first) != INSN
9328 || GET_CODE (PATTERN (first)) == USE
9329 || GET_CODE (PATTERN (first)) == CLOBBER)
9330 return;
9332 attr = get_attr_cirrus (first);
9334 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9335 must be followed by a non-coprocessor instruction. */
9336 if (attr == CIRRUS_COMPARE)
9338 nops = 0;
9340 t = next_nonnote_insn (first);
9342 if (arm_cirrus_insn_p (t))
9343 ++ nops;
9345 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9346 ++ nops;
9348 while (nops --)
9349 emit_insn_after (gen_nop (), first);
9351 return;
9355 /* Return TRUE if X references a SYMBOL_REF. */
9357 symbol_mentioned_p (rtx x)
9359 const char * fmt;
9360 int i;
9362 if (GET_CODE (x) == SYMBOL_REF)
9363 return 1;
9365 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9366 are constant offsets, not symbols. */
9367 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9368 return 0;
9370 fmt = GET_RTX_FORMAT (GET_CODE (x));
9372 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9374 if (fmt[i] == 'E')
9376 int j;
9378 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9379 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9380 return 1;
9382 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9383 return 1;
9386 return 0;
9389 /* Return TRUE if X references a LABEL_REF. */
9391 label_mentioned_p (rtx x)
9393 const char * fmt;
9394 int i;
9396 if (GET_CODE (x) == LABEL_REF)
9397 return 1;
9399 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9400 instruction, but they are constant offsets, not symbols. */
9401 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9402 return 0;
9404 fmt = GET_RTX_FORMAT (GET_CODE (x));
9405 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9407 if (fmt[i] == 'E')
9409 int j;
9411 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9412 if (label_mentioned_p (XVECEXP (x, i, j)))
9413 return 1;
9415 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9416 return 1;
9419 return 0;
9423 tls_mentioned_p (rtx x)
9425 switch (GET_CODE (x))
9427 case CONST:
9428 return tls_mentioned_p (XEXP (x, 0));
9430 case UNSPEC:
9431 if (XINT (x, 1) == UNSPEC_TLS)
9432 return 1;
9434 default:
9435 return 0;
9439 /* Must not copy any rtx that uses a pc-relative address. */
9441 static int
9442 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9444 if (GET_CODE (*x) == UNSPEC
9445 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9446 return 1;
9447 return 0;
9450 static bool
9451 arm_cannot_copy_insn_p (rtx insn)
9453 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9456 enum rtx_code
9457 minmax_code (rtx x)
9459 enum rtx_code code = GET_CODE (x);
9461 switch (code)
9463 case SMAX:
9464 return GE;
9465 case SMIN:
9466 return LE;
9467 case UMIN:
9468 return LEU;
9469 case UMAX:
9470 return GEU;
9471 default:
9472 gcc_unreachable ();
9476 /* Return 1 if memory locations are adjacent. */
9478 adjacent_mem_locations (rtx a, rtx b)
9480 /* We don't guarantee to preserve the order of these memory refs. */
9481 if (volatile_refs_p (a) || volatile_refs_p (b))
9482 return 0;
9484 if ((GET_CODE (XEXP (a, 0)) == REG
9485 || (GET_CODE (XEXP (a, 0)) == PLUS
9486 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9487 && (GET_CODE (XEXP (b, 0)) == REG
9488 || (GET_CODE (XEXP (b, 0)) == PLUS
9489 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9491 HOST_WIDE_INT val0 = 0, val1 = 0;
9492 rtx reg0, reg1;
9493 int val_diff;
9495 if (GET_CODE (XEXP (a, 0)) == PLUS)
9497 reg0 = XEXP (XEXP (a, 0), 0);
9498 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9500 else
9501 reg0 = XEXP (a, 0);
9503 if (GET_CODE (XEXP (b, 0)) == PLUS)
9505 reg1 = XEXP (XEXP (b, 0), 0);
9506 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9508 else
9509 reg1 = XEXP (b, 0);
9511 /* Don't accept any offset that will require multiple
9512 instructions to handle, since this would cause the
9513 arith_adjacentmem pattern to output an overlong sequence. */
9514 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9515 return 0;
9517 /* Don't allow an eliminable register: register elimination can make
9518 the offset too large. */
9519 if (arm_eliminable_register (reg0))
9520 return 0;
9522 val_diff = val1 - val0;
9524 if (arm_ld_sched)
9526 /* If the target has load delay slots, then there's no benefit
9527 to using an ldm instruction unless the offset is zero and
9528 we are optimizing for size. */
9529 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9530 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9531 && (val_diff == 4 || val_diff == -4));
9534 return ((REGNO (reg0) == REGNO (reg1))
9535 && (val_diff == 4 || val_diff == -4));
9538 return 0;
9541 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9542 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9543 instruction. ADD_OFFSET is nonzero if the base address register needs
9544 to be modified with an add instruction before we can use it. */
9546 static bool
9547 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9548 int nops, HOST_WIDE_INT add_offset)
9550 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9551 if the offset isn't small enough. The reason 2 ldrs are faster
9552 is because these ARMs are able to do more than one cache access
9553 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9554 whilst the ARM8 has a double bandwidth cache. This means that
9555 these cores can do both an instruction fetch and a data fetch in
9556 a single cycle, so the trick of calculating the address into a
9557 scratch register (one of the result regs) and then doing a load
9558 multiple actually becomes slower (and no smaller in code size).
9559 That is the transformation
9561 ldr rd1, [rbase + offset]
9562 ldr rd2, [rbase + offset + 4]
9566 add rd1, rbase, offset
9567 ldmia rd1, {rd1, rd2}
9569 produces worse code -- '3 cycles + any stalls on rd2' instead of
9570 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9571 access per cycle, the first sequence could never complete in less
9572 than 6 cycles, whereas the ldm sequence would only take 5 and
9573 would make better use of sequential accesses if not hitting the
9574 cache.
9576 We cheat here and test 'arm_ld_sched' which we currently know to
9577 only be true for the ARM8, ARM9 and StrongARM. If this ever
9578 changes, then the test below needs to be reworked. */
9579 if (nops == 2 && arm_ld_sched && add_offset != 0)
9580 return false;
9582 /* XScale has load-store double instructions, but they have stricter
9583 alignment requirements than load-store multiple, so we cannot
9584 use them.
9586 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9587 the pipeline until completion.
9589 NREGS CYCLES
9595 An ldr instruction takes 1-3 cycles, but does not block the
9596 pipeline.
9598 NREGS CYCLES
9599 1 1-3
9600 2 2-6
9601 3 3-9
9602 4 4-12
9604 Best case ldr will always win. However, the more ldr instructions
9605 we issue, the less likely we are to be able to schedule them well.
9606 Using ldr instructions also increases code size.
9608 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9609 for counts of 3 or 4 regs. */
9610 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9611 return false;
9612 return true;
9615 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9616 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9617 an array ORDER which describes the sequence to use when accessing the
9618 offsets that produces an ascending order. In this sequence, each
9619 offset must be larger by exactly 4 than the previous one. ORDER[0]
9620 must have been filled in with the lowest offset by the caller.
9621 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9622 we use to verify that ORDER produces an ascending order of registers.
9623 Return true if it was possible to construct such an order, false if
9624 not. */
9626 static bool
9627 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9628 int *unsorted_regs)
9630 int i;
9631 for (i = 1; i < nops; i++)
9633 int j;
9635 order[i] = order[i - 1];
9636 for (j = 0; j < nops; j++)
9637 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9639 /* We must find exactly one offset that is higher than the
9640 previous one by 4. */
9641 if (order[i] != order[i - 1])
9642 return false;
9643 order[i] = j;
9645 if (order[i] == order[i - 1])
9646 return false;
9647 /* The register numbers must be ascending. */
9648 if (unsorted_regs != NULL
9649 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9650 return false;
9652 return true;
9655 /* Used to determine in a peephole whether a sequence of load
9656 instructions can be changed into a load-multiple instruction.
9657 NOPS is the number of separate load instructions we are examining. The
9658 first NOPS entries in OPERANDS are the destination registers, the
9659 next NOPS entries are memory operands. If this function is
9660 successful, *BASE is set to the common base register of the memory
9661 accesses; *LOAD_OFFSET is set to the first memory location's offset
9662 from that base register.
9663 REGS is an array filled in with the destination register numbers.
9664 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9665 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9666 the sequence of registers in REGS matches the loads from ascending memory
9667 locations, and the function verifies that the register numbers are
9668 themselves ascending. If CHECK_REGS is false, the register numbers
9669 are stored in the order they are found in the operands. */
9670 static int
9671 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9672 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9674 int unsorted_regs[MAX_LDM_STM_OPS];
9675 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9676 int order[MAX_LDM_STM_OPS];
9677 rtx base_reg_rtx = NULL;
9678 int base_reg = -1;
9679 int i, ldm_case;
9681 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9682 easily extended if required. */
9683 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9685 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9687 /* Loop over the operands and check that the memory references are
9688 suitable (i.e. immediate offsets from the same base register). At
9689 the same time, extract the target register, and the memory
9690 offsets. */
9691 for (i = 0; i < nops; i++)
9693 rtx reg;
9694 rtx offset;
9696 /* Convert a subreg of a mem into the mem itself. */
9697 if (GET_CODE (operands[nops + i]) == SUBREG)
9698 operands[nops + i] = alter_subreg (operands + (nops + i));
9700 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9702 /* Don't reorder volatile memory references; it doesn't seem worth
9703 looking for the case where the order is ok anyway. */
9704 if (MEM_VOLATILE_P (operands[nops + i]))
9705 return 0;
9707 offset = const0_rtx;
9709 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9710 || (GET_CODE (reg) == SUBREG
9711 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9712 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9713 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9714 == REG)
9715 || (GET_CODE (reg) == SUBREG
9716 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9717 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9718 == CONST_INT)))
9720 if (i == 0)
9722 base_reg = REGNO (reg);
9723 base_reg_rtx = reg;
9724 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9725 return 0;
9727 else if (base_reg != (int) REGNO (reg))
9728 /* Not addressed from the same base register. */
9729 return 0;
9731 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9732 ? REGNO (operands[i])
9733 : REGNO (SUBREG_REG (operands[i])));
9735 /* If it isn't an integer register, or if it overwrites the
9736 base register but isn't the last insn in the list, then
9737 we can't do this. */
9738 if (unsorted_regs[i] < 0
9739 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9740 || unsorted_regs[i] > 14
9741 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9742 return 0;
9744 unsorted_offsets[i] = INTVAL (offset);
9745 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9746 order[0] = i;
9748 else
9749 /* Not a suitable memory address. */
9750 return 0;
9753 /* All the useful information has now been extracted from the
9754 operands into unsorted_regs and unsorted_offsets; additionally,
9755 order[0] has been set to the lowest offset in the list. Sort
9756 the offsets into order, verifying that they are adjacent, and
9757 check that the register numbers are ascending. */
9758 if (!compute_offset_order (nops, unsorted_offsets, order,
9759 check_regs ? unsorted_regs : NULL))
9760 return 0;
9762 if (saved_order)
9763 memcpy (saved_order, order, sizeof order);
9765 if (base)
9767 *base = base_reg;
9769 for (i = 0; i < nops; i++)
9770 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9772 *load_offset = unsorted_offsets[order[0]];
9775 if (TARGET_THUMB1
9776 && !peep2_reg_dead_p (nops, base_reg_rtx))
9777 return 0;
9779 if (unsorted_offsets[order[0]] == 0)
9780 ldm_case = 1; /* ldmia */
9781 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9782 ldm_case = 2; /* ldmib */
9783 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9784 ldm_case = 3; /* ldmda */
9785 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9786 ldm_case = 4; /* ldmdb */
9787 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9788 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9789 ldm_case = 5;
9790 else
9791 return 0;
9793 if (!multiple_operation_profitable_p (false, nops,
9794 ldm_case == 5
9795 ? unsorted_offsets[order[0]] : 0))
9796 return 0;
9798 return ldm_case;
9801 /* Used to determine in a peephole whether a sequence of store instructions can
9802 be changed into a store-multiple instruction.
9803 NOPS is the number of separate store instructions we are examining.
9804 NOPS_TOTAL is the total number of instructions recognized by the peephole
9805 pattern.
9806 The first NOPS entries in OPERANDS are the source registers, the next
9807 NOPS entries are memory operands. If this function is successful, *BASE is
9808 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9809 to the first memory location's offset from that base register. REGS is an
9810 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9811 likewise filled with the corresponding rtx's.
9812 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9813 numbers to to an ascending order of stores.
9814 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9815 from ascending memory locations, and the function verifies that the register
9816 numbers are themselves ascending. If CHECK_REGS is false, the register
9817 numbers are stored in the order they are found in the operands. */
9818 static int
9819 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9820 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9821 HOST_WIDE_INT *load_offset, bool check_regs)
9823 int unsorted_regs[MAX_LDM_STM_OPS];
9824 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9825 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9826 int order[MAX_LDM_STM_OPS];
9827 int base_reg = -1;
9828 rtx base_reg_rtx = NULL;
9829 int i, stm_case;
9831 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9832 easily extended if required. */
9833 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9835 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9837 /* Loop over the operands and check that the memory references are
9838 suitable (i.e. immediate offsets from the same base register). At
9839 the same time, extract the target register, and the memory
9840 offsets. */
9841 for (i = 0; i < nops; i++)
9843 rtx reg;
9844 rtx offset;
9846 /* Convert a subreg of a mem into the mem itself. */
9847 if (GET_CODE (operands[nops + i]) == SUBREG)
9848 operands[nops + i] = alter_subreg (operands + (nops + i));
9850 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9852 /* Don't reorder volatile memory references; it doesn't seem worth
9853 looking for the case where the order is ok anyway. */
9854 if (MEM_VOLATILE_P (operands[nops + i]))
9855 return 0;
9857 offset = const0_rtx;
9859 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9860 || (GET_CODE (reg) == SUBREG
9861 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9862 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9863 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9864 == REG)
9865 || (GET_CODE (reg) == SUBREG
9866 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9867 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9868 == CONST_INT)))
9870 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9871 ? operands[i] : SUBREG_REG (operands[i]));
9872 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9874 if (i == 0)
9876 base_reg = REGNO (reg);
9877 base_reg_rtx = reg;
9878 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9879 return 0;
9881 else if (base_reg != (int) REGNO (reg))
9882 /* Not addressed from the same base register. */
9883 return 0;
9885 /* If it isn't an integer register, then we can't do this. */
9886 if (unsorted_regs[i] < 0
9887 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9888 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9889 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9890 || unsorted_regs[i] > 14)
9891 return 0;
9893 unsorted_offsets[i] = INTVAL (offset);
9894 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9895 order[0] = i;
9897 else
9898 /* Not a suitable memory address. */
9899 return 0;
9902 /* All the useful information has now been extracted from the
9903 operands into unsorted_regs and unsorted_offsets; additionally,
9904 order[0] has been set to the lowest offset in the list. Sort
9905 the offsets into order, verifying that they are adjacent, and
9906 check that the register numbers are ascending. */
9907 if (!compute_offset_order (nops, unsorted_offsets, order,
9908 check_regs ? unsorted_regs : NULL))
9909 return 0;
9911 if (saved_order)
9912 memcpy (saved_order, order, sizeof order);
9914 if (base)
9916 *base = base_reg;
9918 for (i = 0; i < nops; i++)
9920 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9921 if (reg_rtxs)
9922 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9925 *load_offset = unsorted_offsets[order[0]];
9928 if (TARGET_THUMB1
9929 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9930 return 0;
9932 if (unsorted_offsets[order[0]] == 0)
9933 stm_case = 1; /* stmia */
9934 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9935 stm_case = 2; /* stmib */
9936 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9937 stm_case = 3; /* stmda */
9938 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9939 stm_case = 4; /* stmdb */
9940 else
9941 return 0;
9943 if (!multiple_operation_profitable_p (false, nops, 0))
9944 return 0;
9946 return stm_case;
9949 /* Routines for use in generating RTL. */
9951 /* Generate a load-multiple instruction. COUNT is the number of loads in
9952 the instruction; REGS and MEMS are arrays containing the operands.
9953 BASEREG is the base register to be used in addressing the memory operands.
9954 WBACK_OFFSET is nonzero if the instruction should update the base
9955 register. */
9957 static rtx
9958 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9959 HOST_WIDE_INT wback_offset)
9961 int i = 0, j;
9962 rtx result;
9964 if (!multiple_operation_profitable_p (false, count, 0))
9966 rtx seq;
9968 start_sequence ();
9970 for (i = 0; i < count; i++)
9971 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9973 if (wback_offset != 0)
9974 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9976 seq = get_insns ();
9977 end_sequence ();
9979 return seq;
9982 result = gen_rtx_PARALLEL (VOIDmode,
9983 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9984 if (wback_offset != 0)
9986 XVECEXP (result, 0, 0)
9987 = gen_rtx_SET (VOIDmode, basereg,
9988 plus_constant (basereg, wback_offset));
9989 i = 1;
9990 count++;
9993 for (j = 0; i < count; i++, j++)
9994 XVECEXP (result, 0, i)
9995 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9997 return result;
10000 /* Generate a store-multiple instruction. COUNT is the number of stores in
10001 the instruction; REGS and MEMS are arrays containing the operands.
10002 BASEREG is the base register to be used in addressing the memory operands.
10003 WBACK_OFFSET is nonzero if the instruction should update the base
10004 register. */
10006 static rtx
10007 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10008 HOST_WIDE_INT wback_offset)
10010 int i = 0, j;
10011 rtx result;
10013 if (GET_CODE (basereg) == PLUS)
10014 basereg = XEXP (basereg, 0);
10016 if (!multiple_operation_profitable_p (false, count, 0))
10018 rtx seq;
10020 start_sequence ();
10022 for (i = 0; i < count; i++)
10023 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10025 if (wback_offset != 0)
10026 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10028 seq = get_insns ();
10029 end_sequence ();
10031 return seq;
10034 result = gen_rtx_PARALLEL (VOIDmode,
10035 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10036 if (wback_offset != 0)
10038 XVECEXP (result, 0, 0)
10039 = gen_rtx_SET (VOIDmode, basereg,
10040 plus_constant (basereg, wback_offset));
10041 i = 1;
10042 count++;
10045 for (j = 0; i < count; i++, j++)
10046 XVECEXP (result, 0, i)
10047 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10049 return result;
10052 /* Generate either a load-multiple or a store-multiple instruction. This
10053 function can be used in situations where we can start with a single MEM
10054 rtx and adjust its address upwards.
10055 COUNT is the number of operations in the instruction, not counting a
10056 possible update of the base register. REGS is an array containing the
10057 register operands.
10058 BASEREG is the base register to be used in addressing the memory operands,
10059 which are constructed from BASEMEM.
10060 WRITE_BACK specifies whether the generated instruction should include an
10061 update of the base register.
10062 OFFSETP is used to pass an offset to and from this function; this offset
10063 is not used when constructing the address (instead BASEMEM should have an
10064 appropriate offset in its address), it is used only for setting
10065 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10067 static rtx
10068 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10069 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10071 rtx mems[MAX_LDM_STM_OPS];
10072 HOST_WIDE_INT offset = *offsetp;
10073 int i;
10075 gcc_assert (count <= MAX_LDM_STM_OPS);
10077 if (GET_CODE (basereg) == PLUS)
10078 basereg = XEXP (basereg, 0);
10080 for (i = 0; i < count; i++)
10082 rtx addr = plus_constant (basereg, i * 4);
10083 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10084 offset += 4;
10087 if (write_back)
10088 *offsetp = offset;
10090 if (is_load)
10091 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10092 write_back ? 4 * count : 0);
10093 else
10094 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10095 write_back ? 4 * count : 0);
10099 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10100 rtx basemem, HOST_WIDE_INT *offsetp)
10102 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10103 offsetp);
10107 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10108 rtx basemem, HOST_WIDE_INT *offsetp)
10110 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10111 offsetp);
10114 /* Called from a peephole2 expander to turn a sequence of loads into an
10115 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10116 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10117 is true if we can reorder the registers because they are used commutatively
10118 subsequently.
10119 Returns true iff we could generate a new instruction. */
10121 bool
10122 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10124 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10125 rtx mems[MAX_LDM_STM_OPS];
10126 int i, j, base_reg;
10127 rtx base_reg_rtx;
10128 HOST_WIDE_INT offset;
10129 int write_back = FALSE;
10130 int ldm_case;
10131 rtx addr;
10133 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10134 &base_reg, &offset, !sort_regs);
10136 if (ldm_case == 0)
10137 return false;
10139 if (sort_regs)
10140 for (i = 0; i < nops - 1; i++)
10141 for (j = i + 1; j < nops; j++)
10142 if (regs[i] > regs[j])
10144 int t = regs[i];
10145 regs[i] = regs[j];
10146 regs[j] = t;
10148 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10150 if (TARGET_THUMB1)
10152 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10153 gcc_assert (ldm_case == 1 || ldm_case == 5);
10154 write_back = TRUE;
10157 if (ldm_case == 5)
10159 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10160 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10161 offset = 0;
10162 if (!TARGET_THUMB1)
10164 base_reg = regs[0];
10165 base_reg_rtx = newbase;
10169 for (i = 0; i < nops; i++)
10171 addr = plus_constant (base_reg_rtx, offset + i * 4);
10172 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10173 SImode, addr, 0);
10175 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10176 write_back ? offset + i * 4 : 0));
10177 return true;
10180 /* Called from a peephole2 expander to turn a sequence of stores into an
10181 STM instruction. OPERANDS are the operands found by the peephole matcher;
10182 NOPS indicates how many separate stores we are trying to combine.
10183 Returns true iff we could generate a new instruction. */
10185 bool
10186 gen_stm_seq (rtx *operands, int nops)
10188 int i;
10189 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10190 rtx mems[MAX_LDM_STM_OPS];
10191 int base_reg;
10192 rtx base_reg_rtx;
10193 HOST_WIDE_INT offset;
10194 int write_back = FALSE;
10195 int stm_case;
10196 rtx addr;
10197 bool base_reg_dies;
10199 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10200 mem_order, &base_reg, &offset, true);
10202 if (stm_case == 0)
10203 return false;
10205 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10207 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10208 if (TARGET_THUMB1)
10210 gcc_assert (base_reg_dies);
10211 write_back = TRUE;
10214 if (stm_case == 5)
10216 gcc_assert (base_reg_dies);
10217 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10218 offset = 0;
10221 addr = plus_constant (base_reg_rtx, offset);
10223 for (i = 0; i < nops; i++)
10225 addr = plus_constant (base_reg_rtx, offset + i * 4);
10226 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10227 SImode, addr, 0);
10229 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10230 write_back ? offset + i * 4 : 0));
10231 return true;
10234 /* Called from a peephole2 expander to turn a sequence of stores that are
10235 preceded by constant loads into an STM instruction. OPERANDS are the
10236 operands found by the peephole matcher; NOPS indicates how many
10237 separate stores we are trying to combine; there are 2 * NOPS
10238 instructions in the peephole.
10239 Returns true iff we could generate a new instruction. */
10241 bool
10242 gen_const_stm_seq (rtx *operands, int nops)
10244 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10245 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10246 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10247 rtx mems[MAX_LDM_STM_OPS];
10248 int base_reg;
10249 rtx base_reg_rtx;
10250 HOST_WIDE_INT offset;
10251 int write_back = FALSE;
10252 int stm_case;
10253 rtx addr;
10254 bool base_reg_dies;
10255 int i, j;
10256 HARD_REG_SET allocated;
10258 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10259 mem_order, &base_reg, &offset, false);
10261 if (stm_case == 0)
10262 return false;
10264 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10266 /* If the same register is used more than once, try to find a free
10267 register. */
10268 CLEAR_HARD_REG_SET (allocated);
10269 for (i = 0; i < nops; i++)
10271 for (j = i + 1; j < nops; j++)
10272 if (regs[i] == regs[j])
10274 rtx t = peep2_find_free_register (0, nops * 2,
10275 TARGET_THUMB1 ? "l" : "r",
10276 SImode, &allocated);
10277 if (t == NULL_RTX)
10278 return false;
10279 reg_rtxs[i] = t;
10280 regs[i] = REGNO (t);
10284 /* Compute an ordering that maps the register numbers to an ascending
10285 sequence. */
10286 reg_order[0] = 0;
10287 for (i = 0; i < nops; i++)
10288 if (regs[i] < regs[reg_order[0]])
10289 reg_order[0] = i;
10291 for (i = 1; i < nops; i++)
10293 int this_order = reg_order[i - 1];
10294 for (j = 0; j < nops; j++)
10295 if (regs[j] > regs[reg_order[i - 1]]
10296 && (this_order == reg_order[i - 1]
10297 || regs[j] < regs[this_order]))
10298 this_order = j;
10299 reg_order[i] = this_order;
10302 /* Ensure that registers that must be live after the instruction end
10303 up with the correct value. */
10304 for (i = 0; i < nops; i++)
10306 int this_order = reg_order[i];
10307 if ((this_order != mem_order[i]
10308 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10309 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10310 return false;
10313 /* Load the constants. */
10314 for (i = 0; i < nops; i++)
10316 rtx op = operands[2 * nops + mem_order[i]];
10317 sorted_regs[i] = regs[reg_order[i]];
10318 emit_move_insn (reg_rtxs[reg_order[i]], op);
10321 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10323 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10324 if (TARGET_THUMB1)
10326 gcc_assert (base_reg_dies);
10327 write_back = TRUE;
10330 if (stm_case == 5)
10332 gcc_assert (base_reg_dies);
10333 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10334 offset = 0;
10337 addr = plus_constant (base_reg_rtx, offset);
10339 for (i = 0; i < nops; i++)
10341 addr = plus_constant (base_reg_rtx, offset + i * 4);
10342 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10343 SImode, addr, 0);
10345 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10346 write_back ? offset + i * 4 : 0));
10347 return true;
10351 arm_gen_movmemqi (rtx *operands)
10353 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10354 HOST_WIDE_INT srcoffset, dstoffset;
10355 int i;
10356 rtx src, dst, srcbase, dstbase;
10357 rtx part_bytes_reg = NULL;
10358 rtx mem;
10360 if (GET_CODE (operands[2]) != CONST_INT
10361 || GET_CODE (operands[3]) != CONST_INT
10362 || INTVAL (operands[2]) > 64
10363 || INTVAL (operands[3]) & 3)
10364 return 0;
10366 dstbase = operands[0];
10367 srcbase = operands[1];
10369 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10370 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10372 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10373 out_words_to_go = INTVAL (operands[2]) / 4;
10374 last_bytes = INTVAL (operands[2]) & 3;
10375 dstoffset = srcoffset = 0;
10377 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10378 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10380 for (i = 0; in_words_to_go >= 2; i+=4)
10382 if (in_words_to_go > 4)
10383 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10384 TRUE, srcbase, &srcoffset));
10385 else
10386 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10387 src, FALSE, srcbase,
10388 &srcoffset));
10390 if (out_words_to_go)
10392 if (out_words_to_go > 4)
10393 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10394 TRUE, dstbase, &dstoffset));
10395 else if (out_words_to_go != 1)
10396 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10397 out_words_to_go, dst,
10398 (last_bytes == 0
10399 ? FALSE : TRUE),
10400 dstbase, &dstoffset));
10401 else
10403 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10404 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10405 if (last_bytes != 0)
10407 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10408 dstoffset += 4;
10413 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10414 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10417 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10418 if (out_words_to_go)
10420 rtx sreg;
10422 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10423 sreg = copy_to_reg (mem);
10425 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10426 emit_move_insn (mem, sreg);
10427 in_words_to_go--;
10429 gcc_assert (!in_words_to_go); /* Sanity check */
10432 if (in_words_to_go)
10434 gcc_assert (in_words_to_go > 0);
10436 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10437 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10440 gcc_assert (!last_bytes || part_bytes_reg);
10442 if (BYTES_BIG_ENDIAN && last_bytes)
10444 rtx tmp = gen_reg_rtx (SImode);
10446 /* The bytes we want are in the top end of the word. */
10447 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10448 GEN_INT (8 * (4 - last_bytes))));
10449 part_bytes_reg = tmp;
10451 while (last_bytes)
10453 mem = adjust_automodify_address (dstbase, QImode,
10454 plus_constant (dst, last_bytes - 1),
10455 dstoffset + last_bytes - 1);
10456 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10458 if (--last_bytes)
10460 tmp = gen_reg_rtx (SImode);
10461 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10462 part_bytes_reg = tmp;
10467 else
10469 if (last_bytes > 1)
10471 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10472 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10473 last_bytes -= 2;
10474 if (last_bytes)
10476 rtx tmp = gen_reg_rtx (SImode);
10477 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10478 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10479 part_bytes_reg = tmp;
10480 dstoffset += 2;
10484 if (last_bytes)
10486 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10487 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10491 return 1;
10494 /* Select a dominance comparison mode if possible for a test of the general
10495 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10496 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10497 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10498 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10499 In all cases OP will be either EQ or NE, but we don't need to know which
10500 here. If we are unable to support a dominance comparison we return
10501 CC mode. This will then fail to match for the RTL expressions that
10502 generate this call. */
10503 enum machine_mode
10504 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10506 enum rtx_code cond1, cond2;
10507 int swapped = 0;
10509 /* Currently we will probably get the wrong result if the individual
10510 comparisons are not simple. This also ensures that it is safe to
10511 reverse a comparison if necessary. */
10512 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10513 != CCmode)
10514 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10515 != CCmode))
10516 return CCmode;
10518 /* The if_then_else variant of this tests the second condition if the
10519 first passes, but is true if the first fails. Reverse the first
10520 condition to get a true "inclusive-or" expression. */
10521 if (cond_or == DOM_CC_NX_OR_Y)
10522 cond1 = reverse_condition (cond1);
10524 /* If the comparisons are not equal, and one doesn't dominate the other,
10525 then we can't do this. */
10526 if (cond1 != cond2
10527 && !comparison_dominates_p (cond1, cond2)
10528 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10529 return CCmode;
10531 if (swapped)
10533 enum rtx_code temp = cond1;
10534 cond1 = cond2;
10535 cond2 = temp;
10538 switch (cond1)
10540 case EQ:
10541 if (cond_or == DOM_CC_X_AND_Y)
10542 return CC_DEQmode;
10544 switch (cond2)
10546 case EQ: return CC_DEQmode;
10547 case LE: return CC_DLEmode;
10548 case LEU: return CC_DLEUmode;
10549 case GE: return CC_DGEmode;
10550 case GEU: return CC_DGEUmode;
10551 default: gcc_unreachable ();
10554 case LT:
10555 if (cond_or == DOM_CC_X_AND_Y)
10556 return CC_DLTmode;
10558 switch (cond2)
10560 case LT:
10561 return CC_DLTmode;
10562 case LE:
10563 return CC_DLEmode;
10564 case NE:
10565 return CC_DNEmode;
10566 default:
10567 gcc_unreachable ();
10570 case GT:
10571 if (cond_or == DOM_CC_X_AND_Y)
10572 return CC_DGTmode;
10574 switch (cond2)
10576 case GT:
10577 return CC_DGTmode;
10578 case GE:
10579 return CC_DGEmode;
10580 case NE:
10581 return CC_DNEmode;
10582 default:
10583 gcc_unreachable ();
10586 case LTU:
10587 if (cond_or == DOM_CC_X_AND_Y)
10588 return CC_DLTUmode;
10590 switch (cond2)
10592 case LTU:
10593 return CC_DLTUmode;
10594 case LEU:
10595 return CC_DLEUmode;
10596 case NE:
10597 return CC_DNEmode;
10598 default:
10599 gcc_unreachable ();
10602 case GTU:
10603 if (cond_or == DOM_CC_X_AND_Y)
10604 return CC_DGTUmode;
10606 switch (cond2)
10608 case GTU:
10609 return CC_DGTUmode;
10610 case GEU:
10611 return CC_DGEUmode;
10612 case NE:
10613 return CC_DNEmode;
10614 default:
10615 gcc_unreachable ();
10618 /* The remaining cases only occur when both comparisons are the
10619 same. */
10620 case NE:
10621 gcc_assert (cond1 == cond2);
10622 return CC_DNEmode;
10624 case LE:
10625 gcc_assert (cond1 == cond2);
10626 return CC_DLEmode;
10628 case GE:
10629 gcc_assert (cond1 == cond2);
10630 return CC_DGEmode;
10632 case LEU:
10633 gcc_assert (cond1 == cond2);
10634 return CC_DLEUmode;
10636 case GEU:
10637 gcc_assert (cond1 == cond2);
10638 return CC_DGEUmode;
10640 default:
10641 gcc_unreachable ();
10645 enum machine_mode
10646 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10648 /* All floating point compares return CCFP if it is an equality
10649 comparison, and CCFPE otherwise. */
10650 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10652 switch (op)
10654 case EQ:
10655 case NE:
10656 case UNORDERED:
10657 case ORDERED:
10658 case UNLT:
10659 case UNLE:
10660 case UNGT:
10661 case UNGE:
10662 case UNEQ:
10663 case LTGT:
10664 return CCFPmode;
10666 case LT:
10667 case LE:
10668 case GT:
10669 case GE:
10670 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10671 return CCFPmode;
10672 return CCFPEmode;
10674 default:
10675 gcc_unreachable ();
10679 /* A compare with a shifted operand. Because of canonicalization, the
10680 comparison will have to be swapped when we emit the assembler. */
10681 if (GET_MODE (y) == SImode
10682 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10683 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10684 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10685 || GET_CODE (x) == ROTATERT))
10686 return CC_SWPmode;
10688 /* This operation is performed swapped, but since we only rely on the Z
10689 flag we don't need an additional mode. */
10690 if (GET_MODE (y) == SImode
10691 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10692 && GET_CODE (x) == NEG
10693 && (op == EQ || op == NE))
10694 return CC_Zmode;
10696 /* This is a special case that is used by combine to allow a
10697 comparison of a shifted byte load to be split into a zero-extend
10698 followed by a comparison of the shifted integer (only valid for
10699 equalities and unsigned inequalities). */
10700 if (GET_MODE (x) == SImode
10701 && GET_CODE (x) == ASHIFT
10702 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10703 && GET_CODE (XEXP (x, 0)) == SUBREG
10704 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10705 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10706 && (op == EQ || op == NE
10707 || op == GEU || op == GTU || op == LTU || op == LEU)
10708 && GET_CODE (y) == CONST_INT)
10709 return CC_Zmode;
10711 /* A construct for a conditional compare, if the false arm contains
10712 0, then both conditions must be true, otherwise either condition
10713 must be true. Not all conditions are possible, so CCmode is
10714 returned if it can't be done. */
10715 if (GET_CODE (x) == IF_THEN_ELSE
10716 && (XEXP (x, 2) == const0_rtx
10717 || XEXP (x, 2) == const1_rtx)
10718 && COMPARISON_P (XEXP (x, 0))
10719 && COMPARISON_P (XEXP (x, 1)))
10720 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10721 INTVAL (XEXP (x, 2)));
10723 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10724 if (GET_CODE (x) == AND
10725 && (op == EQ || op == NE)
10726 && COMPARISON_P (XEXP (x, 0))
10727 && COMPARISON_P (XEXP (x, 1)))
10728 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10729 DOM_CC_X_AND_Y);
10731 if (GET_CODE (x) == IOR
10732 && (op == EQ || op == NE)
10733 && COMPARISON_P (XEXP (x, 0))
10734 && COMPARISON_P (XEXP (x, 1)))
10735 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10736 DOM_CC_X_OR_Y);
10738 /* An operation (on Thumb) where we want to test for a single bit.
10739 This is done by shifting that bit up into the top bit of a
10740 scratch register; we can then branch on the sign bit. */
10741 if (TARGET_THUMB1
10742 && GET_MODE (x) == SImode
10743 && (op == EQ || op == NE)
10744 && GET_CODE (x) == ZERO_EXTRACT
10745 && XEXP (x, 1) == const1_rtx)
10746 return CC_Nmode;
10748 /* An operation that sets the condition codes as a side-effect, the
10749 V flag is not set correctly, so we can only use comparisons where
10750 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10751 instead.) */
10752 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10753 if (GET_MODE (x) == SImode
10754 && y == const0_rtx
10755 && (op == EQ || op == NE || op == LT || op == GE)
10756 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10757 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10758 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10759 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10760 || GET_CODE (x) == LSHIFTRT
10761 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10762 || GET_CODE (x) == ROTATERT
10763 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10764 return CC_NOOVmode;
10766 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10767 return CC_Zmode;
10769 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10770 && GET_CODE (x) == PLUS
10771 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10772 return CC_Cmode;
10774 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10776 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10777 available. */
10778 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10779 return CCmode;
10781 switch (op)
10783 case EQ:
10784 case NE:
10785 /* A DImode comparison against zero can be implemented by
10786 or'ing the two halves together. */
10787 if (y == const0_rtx)
10788 return CC_Zmode;
10790 /* We can do an equality test in three Thumb instructions. */
10791 if (!TARGET_ARM)
10792 return CC_Zmode;
10794 /* FALLTHROUGH */
10796 case LTU:
10797 case LEU:
10798 case GTU:
10799 case GEU:
10800 /* DImode unsigned comparisons can be implemented by cmp +
10801 cmpeq without a scratch register. Not worth doing in
10802 Thumb-2. */
10803 if (TARGET_ARM)
10804 return CC_CZmode;
10806 /* FALLTHROUGH */
10808 case LT:
10809 case LE:
10810 case GT:
10811 case GE:
10812 /* DImode signed and unsigned comparisons can be implemented
10813 by cmp + sbcs with a scratch register, but that does not
10814 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10815 gcc_assert (op != EQ && op != NE);
10816 return CC_NCVmode;
10818 default:
10819 gcc_unreachable ();
10823 return CCmode;
10826 /* X and Y are two things to compare using CODE. Emit the compare insn and
10827 return the rtx for register 0 in the proper mode. FP means this is a
10828 floating point compare: I don't think that it is needed on the arm. */
10830 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10832 enum machine_mode mode;
10833 rtx cc_reg;
10834 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10836 /* We might have X as a constant, Y as a register because of the predicates
10837 used for cmpdi. If so, force X to a register here. */
10838 if (dimode_comparison && !REG_P (x))
10839 x = force_reg (DImode, x);
10841 mode = SELECT_CC_MODE (code, x, y);
10842 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10844 if (dimode_comparison
10845 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10846 && mode != CC_CZmode)
10848 rtx clobber, set;
10850 /* To compare two non-zero values for equality, XOR them and
10851 then compare against zero. Not used for ARM mode; there
10852 CC_CZmode is cheaper. */
10853 if (mode == CC_Zmode && y != const0_rtx)
10855 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10856 y = const0_rtx;
10858 /* A scratch register is required. */
10859 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10860 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10861 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10863 else
10864 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10866 return cc_reg;
10869 /* Generate a sequence of insns that will generate the correct return
10870 address mask depending on the physical architecture that the program
10871 is running on. */
10873 arm_gen_return_addr_mask (void)
10875 rtx reg = gen_reg_rtx (Pmode);
10877 emit_insn (gen_return_addr_mask (reg));
10878 return reg;
10881 void
10882 arm_reload_in_hi (rtx *operands)
10884 rtx ref = operands[1];
10885 rtx base, scratch;
10886 HOST_WIDE_INT offset = 0;
10888 if (GET_CODE (ref) == SUBREG)
10890 offset = SUBREG_BYTE (ref);
10891 ref = SUBREG_REG (ref);
10894 if (GET_CODE (ref) == REG)
10896 /* We have a pseudo which has been spilt onto the stack; there
10897 are two cases here: the first where there is a simple
10898 stack-slot replacement and a second where the stack-slot is
10899 out of range, or is used as a subreg. */
10900 if (reg_equiv_mem (REGNO (ref)))
10902 ref = reg_equiv_mem (REGNO (ref));
10903 base = find_replacement (&XEXP (ref, 0));
10905 else
10906 /* The slot is out of range, or was dressed up in a SUBREG. */
10907 base = reg_equiv_address (REGNO (ref));
10909 else
10910 base = find_replacement (&XEXP (ref, 0));
10912 /* Handle the case where the address is too complex to be offset by 1. */
10913 if (GET_CODE (base) == MINUS
10914 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10916 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10918 emit_set_insn (base_plus, base);
10919 base = base_plus;
10921 else if (GET_CODE (base) == PLUS)
10923 /* The addend must be CONST_INT, or we would have dealt with it above. */
10924 HOST_WIDE_INT hi, lo;
10926 offset += INTVAL (XEXP (base, 1));
10927 base = XEXP (base, 0);
10929 /* Rework the address into a legal sequence of insns. */
10930 /* Valid range for lo is -4095 -> 4095 */
10931 lo = (offset >= 0
10932 ? (offset & 0xfff)
10933 : -((-offset) & 0xfff));
10935 /* Corner case, if lo is the max offset then we would be out of range
10936 once we have added the additional 1 below, so bump the msb into the
10937 pre-loading insn(s). */
10938 if (lo == 4095)
10939 lo &= 0x7ff;
10941 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10942 ^ (HOST_WIDE_INT) 0x80000000)
10943 - (HOST_WIDE_INT) 0x80000000);
10945 gcc_assert (hi + lo == offset);
10947 if (hi != 0)
10949 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10951 /* Get the base address; addsi3 knows how to handle constants
10952 that require more than one insn. */
10953 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10954 base = base_plus;
10955 offset = lo;
10959 /* Operands[2] may overlap operands[0] (though it won't overlap
10960 operands[1]), that's why we asked for a DImode reg -- so we can
10961 use the bit that does not overlap. */
10962 if (REGNO (operands[2]) == REGNO (operands[0]))
10963 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10964 else
10965 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10967 emit_insn (gen_zero_extendqisi2 (scratch,
10968 gen_rtx_MEM (QImode,
10969 plus_constant (base,
10970 offset))));
10971 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10972 gen_rtx_MEM (QImode,
10973 plus_constant (base,
10974 offset + 1))));
10975 if (!BYTES_BIG_ENDIAN)
10976 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10977 gen_rtx_IOR (SImode,
10978 gen_rtx_ASHIFT
10979 (SImode,
10980 gen_rtx_SUBREG (SImode, operands[0], 0),
10981 GEN_INT (8)),
10982 scratch));
10983 else
10984 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10985 gen_rtx_IOR (SImode,
10986 gen_rtx_ASHIFT (SImode, scratch,
10987 GEN_INT (8)),
10988 gen_rtx_SUBREG (SImode, operands[0], 0)));
10991 /* Handle storing a half-word to memory during reload by synthesizing as two
10992 byte stores. Take care not to clobber the input values until after we
10993 have moved them somewhere safe. This code assumes that if the DImode
10994 scratch in operands[2] overlaps either the input value or output address
10995 in some way, then that value must die in this insn (we absolutely need
10996 two scratch registers for some corner cases). */
10997 void
10998 arm_reload_out_hi (rtx *operands)
11000 rtx ref = operands[0];
11001 rtx outval = operands[1];
11002 rtx base, scratch;
11003 HOST_WIDE_INT offset = 0;
11005 if (GET_CODE (ref) == SUBREG)
11007 offset = SUBREG_BYTE (ref);
11008 ref = SUBREG_REG (ref);
11011 if (GET_CODE (ref) == REG)
11013 /* We have a pseudo which has been spilt onto the stack; there
11014 are two cases here: the first where there is a simple
11015 stack-slot replacement and a second where the stack-slot is
11016 out of range, or is used as a subreg. */
11017 if (reg_equiv_mem (REGNO (ref)))
11019 ref = reg_equiv_mem (REGNO (ref));
11020 base = find_replacement (&XEXP (ref, 0));
11022 else
11023 /* The slot is out of range, or was dressed up in a SUBREG. */
11024 base = reg_equiv_address (REGNO (ref));
11026 else
11027 base = find_replacement (&XEXP (ref, 0));
11029 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11031 /* Handle the case where the address is too complex to be offset by 1. */
11032 if (GET_CODE (base) == MINUS
11033 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11035 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11037 /* Be careful not to destroy OUTVAL. */
11038 if (reg_overlap_mentioned_p (base_plus, outval))
11040 /* Updating base_plus might destroy outval, see if we can
11041 swap the scratch and base_plus. */
11042 if (!reg_overlap_mentioned_p (scratch, outval))
11044 rtx tmp = scratch;
11045 scratch = base_plus;
11046 base_plus = tmp;
11048 else
11050 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11052 /* Be conservative and copy OUTVAL into the scratch now,
11053 this should only be necessary if outval is a subreg
11054 of something larger than a word. */
11055 /* XXX Might this clobber base? I can't see how it can,
11056 since scratch is known to overlap with OUTVAL, and
11057 must be wider than a word. */
11058 emit_insn (gen_movhi (scratch_hi, outval));
11059 outval = scratch_hi;
11063 emit_set_insn (base_plus, base);
11064 base = base_plus;
11066 else if (GET_CODE (base) == PLUS)
11068 /* The addend must be CONST_INT, or we would have dealt with it above. */
11069 HOST_WIDE_INT hi, lo;
11071 offset += INTVAL (XEXP (base, 1));
11072 base = XEXP (base, 0);
11074 /* Rework the address into a legal sequence of insns. */
11075 /* Valid range for lo is -4095 -> 4095 */
11076 lo = (offset >= 0
11077 ? (offset & 0xfff)
11078 : -((-offset) & 0xfff));
11080 /* Corner case, if lo is the max offset then we would be out of range
11081 once we have added the additional 1 below, so bump the msb into the
11082 pre-loading insn(s). */
11083 if (lo == 4095)
11084 lo &= 0x7ff;
11086 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11087 ^ (HOST_WIDE_INT) 0x80000000)
11088 - (HOST_WIDE_INT) 0x80000000);
11090 gcc_assert (hi + lo == offset);
11092 if (hi != 0)
11094 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11096 /* Be careful not to destroy OUTVAL. */
11097 if (reg_overlap_mentioned_p (base_plus, outval))
11099 /* Updating base_plus might destroy outval, see if we
11100 can swap the scratch and base_plus. */
11101 if (!reg_overlap_mentioned_p (scratch, outval))
11103 rtx tmp = scratch;
11104 scratch = base_plus;
11105 base_plus = tmp;
11107 else
11109 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11111 /* Be conservative and copy outval into scratch now,
11112 this should only be necessary if outval is a
11113 subreg of something larger than a word. */
11114 /* XXX Might this clobber base? I can't see how it
11115 can, since scratch is known to overlap with
11116 outval. */
11117 emit_insn (gen_movhi (scratch_hi, outval));
11118 outval = scratch_hi;
11122 /* Get the base address; addsi3 knows how to handle constants
11123 that require more than one insn. */
11124 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11125 base = base_plus;
11126 offset = lo;
11130 if (BYTES_BIG_ENDIAN)
11132 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11133 plus_constant (base, offset + 1)),
11134 gen_lowpart (QImode, outval)));
11135 emit_insn (gen_lshrsi3 (scratch,
11136 gen_rtx_SUBREG (SImode, outval, 0),
11137 GEN_INT (8)));
11138 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11139 gen_lowpart (QImode, scratch)));
11141 else
11143 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11144 gen_lowpart (QImode, outval)));
11145 emit_insn (gen_lshrsi3 (scratch,
11146 gen_rtx_SUBREG (SImode, outval, 0),
11147 GEN_INT (8)));
11148 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11149 plus_constant (base, offset + 1)),
11150 gen_lowpart (QImode, scratch)));
11154 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11155 (padded to the size of a word) should be passed in a register. */
11157 static bool
11158 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11160 if (TARGET_AAPCS_BASED)
11161 return must_pass_in_stack_var_size (mode, type);
11162 else
11163 return must_pass_in_stack_var_size_or_pad (mode, type);
11167 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11168 Return true if an argument passed on the stack should be padded upwards,
11169 i.e. if the least-significant byte has useful data.
11170 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11171 aggregate types are placed in the lowest memory address. */
11173 bool
11174 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11176 if (!TARGET_AAPCS_BASED)
11177 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11179 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11180 return false;
11182 return true;
11186 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11187 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11188 byte of the register has useful data, and return the opposite if the
11189 most significant byte does.
11190 For AAPCS, small aggregates and small complex types are always padded
11191 upwards. */
11193 bool
11194 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11195 tree type, int first ATTRIBUTE_UNUSED)
11197 if (TARGET_AAPCS_BASED
11198 && BYTES_BIG_ENDIAN
11199 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11200 && int_size_in_bytes (type) <= 4)
11201 return true;
11203 /* Otherwise, use default padding. */
11204 return !BYTES_BIG_ENDIAN;
11208 /* Print a symbolic form of X to the debug file, F. */
11209 static void
11210 arm_print_value (FILE *f, rtx x)
11212 switch (GET_CODE (x))
11214 case CONST_INT:
11215 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11216 return;
11218 case CONST_DOUBLE:
11219 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11220 return;
11222 case CONST_VECTOR:
11224 int i;
11226 fprintf (f, "<");
11227 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11229 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11230 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11231 fputc (',', f);
11233 fprintf (f, ">");
11235 return;
11237 case CONST_STRING:
11238 fprintf (f, "\"%s\"", XSTR (x, 0));
11239 return;
11241 case SYMBOL_REF:
11242 fprintf (f, "`%s'", XSTR (x, 0));
11243 return;
11245 case LABEL_REF:
11246 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11247 return;
11249 case CONST:
11250 arm_print_value (f, XEXP (x, 0));
11251 return;
11253 case PLUS:
11254 arm_print_value (f, XEXP (x, 0));
11255 fprintf (f, "+");
11256 arm_print_value (f, XEXP (x, 1));
11257 return;
11259 case PC:
11260 fprintf (f, "pc");
11261 return;
11263 default:
11264 fprintf (f, "????");
11265 return;
11269 /* Routines for manipulation of the constant pool. */
11271 /* Arm instructions cannot load a large constant directly into a
11272 register; they have to come from a pc relative load. The constant
11273 must therefore be placed in the addressable range of the pc
11274 relative load. Depending on the precise pc relative load
11275 instruction the range is somewhere between 256 bytes and 4k. This
11276 means that we often have to dump a constant inside a function, and
11277 generate code to branch around it.
11279 It is important to minimize this, since the branches will slow
11280 things down and make the code larger.
11282 Normally we can hide the table after an existing unconditional
11283 branch so that there is no interruption of the flow, but in the
11284 worst case the code looks like this:
11286 ldr rn, L1
11288 b L2
11289 align
11290 L1: .long value
11294 ldr rn, L3
11296 b L4
11297 align
11298 L3: .long value
11302 We fix this by performing a scan after scheduling, which notices
11303 which instructions need to have their operands fetched from the
11304 constant table and builds the table.
11306 The algorithm starts by building a table of all the constants that
11307 need fixing up and all the natural barriers in the function (places
11308 where a constant table can be dropped without breaking the flow).
11309 For each fixup we note how far the pc-relative replacement will be
11310 able to reach and the offset of the instruction into the function.
11312 Having built the table we then group the fixes together to form
11313 tables that are as large as possible (subject to addressing
11314 constraints) and emit each table of constants after the last
11315 barrier that is within range of all the instructions in the group.
11316 If a group does not contain a barrier, then we forcibly create one
11317 by inserting a jump instruction into the flow. Once the table has
11318 been inserted, the insns are then modified to reference the
11319 relevant entry in the pool.
11321 Possible enhancements to the algorithm (not implemented) are:
11323 1) For some processors and object formats, there may be benefit in
11324 aligning the pools to the start of cache lines; this alignment
11325 would need to be taken into account when calculating addressability
11326 of a pool. */
11328 /* These typedefs are located at the start of this file, so that
11329 they can be used in the prototypes there. This comment is to
11330 remind readers of that fact so that the following structures
11331 can be understood more easily.
11333 typedef struct minipool_node Mnode;
11334 typedef struct minipool_fixup Mfix; */
11336 struct minipool_node
11338 /* Doubly linked chain of entries. */
11339 Mnode * next;
11340 Mnode * prev;
11341 /* The maximum offset into the code that this entry can be placed. While
11342 pushing fixes for forward references, all entries are sorted in order
11343 of increasing max_address. */
11344 HOST_WIDE_INT max_address;
11345 /* Similarly for an entry inserted for a backwards ref. */
11346 HOST_WIDE_INT min_address;
11347 /* The number of fixes referencing this entry. This can become zero
11348 if we "unpush" an entry. In this case we ignore the entry when we
11349 come to emit the code. */
11350 int refcount;
11351 /* The offset from the start of the minipool. */
11352 HOST_WIDE_INT offset;
11353 /* The value in table. */
11354 rtx value;
11355 /* The mode of value. */
11356 enum machine_mode mode;
11357 /* The size of the value. With iWMMXt enabled
11358 sizes > 4 also imply an alignment of 8-bytes. */
11359 int fix_size;
11362 struct minipool_fixup
11364 Mfix * next;
11365 rtx insn;
11366 HOST_WIDE_INT address;
11367 rtx * loc;
11368 enum machine_mode mode;
11369 int fix_size;
11370 rtx value;
11371 Mnode * minipool;
11372 HOST_WIDE_INT forwards;
11373 HOST_WIDE_INT backwards;
11376 /* Fixes less than a word need padding out to a word boundary. */
11377 #define MINIPOOL_FIX_SIZE(mode) \
11378 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11380 static Mnode * minipool_vector_head;
11381 static Mnode * minipool_vector_tail;
11382 static rtx minipool_vector_label;
11383 static int minipool_pad;
11385 /* The linked list of all minipool fixes required for this function. */
11386 Mfix * minipool_fix_head;
11387 Mfix * minipool_fix_tail;
11388 /* The fix entry for the current minipool, once it has been placed. */
11389 Mfix * minipool_barrier;
11391 /* Determines if INSN is the start of a jump table. Returns the end
11392 of the TABLE or NULL_RTX. */
11393 static rtx
11394 is_jump_table (rtx insn)
11396 rtx table;
11398 if (GET_CODE (insn) == JUMP_INSN
11399 && JUMP_LABEL (insn) != NULL
11400 && ((table = next_real_insn (JUMP_LABEL (insn)))
11401 == next_real_insn (insn))
11402 && table != NULL
11403 && GET_CODE (table) == JUMP_INSN
11404 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11405 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11406 return table;
11408 return NULL_RTX;
11411 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11412 #define JUMP_TABLES_IN_TEXT_SECTION 0
11413 #endif
11415 static HOST_WIDE_INT
11416 get_jump_table_size (rtx insn)
11418 /* ADDR_VECs only take room if read-only data does into the text
11419 section. */
11420 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11422 rtx body = PATTERN (insn);
11423 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11424 HOST_WIDE_INT size;
11425 HOST_WIDE_INT modesize;
11427 modesize = GET_MODE_SIZE (GET_MODE (body));
11428 size = modesize * XVECLEN (body, elt);
11429 switch (modesize)
11431 case 1:
11432 /* Round up size of TBB table to a halfword boundary. */
11433 size = (size + 1) & ~(HOST_WIDE_INT)1;
11434 break;
11435 case 2:
11436 /* No padding necessary for TBH. */
11437 break;
11438 case 4:
11439 /* Add two bytes for alignment on Thumb. */
11440 if (TARGET_THUMB)
11441 size += 2;
11442 break;
11443 default:
11444 gcc_unreachable ();
11446 return size;
11449 return 0;
11452 /* Move a minipool fix MP from its current location to before MAX_MP.
11453 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11454 constraints may need updating. */
11455 static Mnode *
11456 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11457 HOST_WIDE_INT max_address)
11459 /* The code below assumes these are different. */
11460 gcc_assert (mp != max_mp);
11462 if (max_mp == NULL)
11464 if (max_address < mp->max_address)
11465 mp->max_address = max_address;
11467 else
11469 if (max_address > max_mp->max_address - mp->fix_size)
11470 mp->max_address = max_mp->max_address - mp->fix_size;
11471 else
11472 mp->max_address = max_address;
11474 /* Unlink MP from its current position. Since max_mp is non-null,
11475 mp->prev must be non-null. */
11476 mp->prev->next = mp->next;
11477 if (mp->next != NULL)
11478 mp->next->prev = mp->prev;
11479 else
11480 minipool_vector_tail = mp->prev;
11482 /* Re-insert it before MAX_MP. */
11483 mp->next = max_mp;
11484 mp->prev = max_mp->prev;
11485 max_mp->prev = mp;
11487 if (mp->prev != NULL)
11488 mp->prev->next = mp;
11489 else
11490 minipool_vector_head = mp;
11493 /* Save the new entry. */
11494 max_mp = mp;
11496 /* Scan over the preceding entries and adjust their addresses as
11497 required. */
11498 while (mp->prev != NULL
11499 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11501 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11502 mp = mp->prev;
11505 return max_mp;
11508 /* Add a constant to the minipool for a forward reference. Returns the
11509 node added or NULL if the constant will not fit in this pool. */
11510 static Mnode *
11511 add_minipool_forward_ref (Mfix *fix)
11513 /* If set, max_mp is the first pool_entry that has a lower
11514 constraint than the one we are trying to add. */
11515 Mnode * max_mp = NULL;
11516 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11517 Mnode * mp;
11519 /* If the minipool starts before the end of FIX->INSN then this FIX
11520 can not be placed into the current pool. Furthermore, adding the
11521 new constant pool entry may cause the pool to start FIX_SIZE bytes
11522 earlier. */
11523 if (minipool_vector_head &&
11524 (fix->address + get_attr_length (fix->insn)
11525 >= minipool_vector_head->max_address - fix->fix_size))
11526 return NULL;
11528 /* Scan the pool to see if a constant with the same value has
11529 already been added. While we are doing this, also note the
11530 location where we must insert the constant if it doesn't already
11531 exist. */
11532 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11534 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11535 && fix->mode == mp->mode
11536 && (GET_CODE (fix->value) != CODE_LABEL
11537 || (CODE_LABEL_NUMBER (fix->value)
11538 == CODE_LABEL_NUMBER (mp->value)))
11539 && rtx_equal_p (fix->value, mp->value))
11541 /* More than one fix references this entry. */
11542 mp->refcount++;
11543 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11546 /* Note the insertion point if necessary. */
11547 if (max_mp == NULL
11548 && mp->max_address > max_address)
11549 max_mp = mp;
11551 /* If we are inserting an 8-bytes aligned quantity and
11552 we have not already found an insertion point, then
11553 make sure that all such 8-byte aligned quantities are
11554 placed at the start of the pool. */
11555 if (ARM_DOUBLEWORD_ALIGN
11556 && max_mp == NULL
11557 && fix->fix_size >= 8
11558 && mp->fix_size < 8)
11560 max_mp = mp;
11561 max_address = mp->max_address;
11565 /* The value is not currently in the minipool, so we need to create
11566 a new entry for it. If MAX_MP is NULL, the entry will be put on
11567 the end of the list since the placement is less constrained than
11568 any existing entry. Otherwise, we insert the new fix before
11569 MAX_MP and, if necessary, adjust the constraints on the other
11570 entries. */
11571 mp = XNEW (Mnode);
11572 mp->fix_size = fix->fix_size;
11573 mp->mode = fix->mode;
11574 mp->value = fix->value;
11575 mp->refcount = 1;
11576 /* Not yet required for a backwards ref. */
11577 mp->min_address = -65536;
11579 if (max_mp == NULL)
11581 mp->max_address = max_address;
11582 mp->next = NULL;
11583 mp->prev = minipool_vector_tail;
11585 if (mp->prev == NULL)
11587 minipool_vector_head = mp;
11588 minipool_vector_label = gen_label_rtx ();
11590 else
11591 mp->prev->next = mp;
11593 minipool_vector_tail = mp;
11595 else
11597 if (max_address > max_mp->max_address - mp->fix_size)
11598 mp->max_address = max_mp->max_address - mp->fix_size;
11599 else
11600 mp->max_address = max_address;
11602 mp->next = max_mp;
11603 mp->prev = max_mp->prev;
11604 max_mp->prev = mp;
11605 if (mp->prev != NULL)
11606 mp->prev->next = mp;
11607 else
11608 minipool_vector_head = mp;
11611 /* Save the new entry. */
11612 max_mp = mp;
11614 /* Scan over the preceding entries and adjust their addresses as
11615 required. */
11616 while (mp->prev != NULL
11617 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11619 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11620 mp = mp->prev;
11623 return max_mp;
11626 static Mnode *
11627 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11628 HOST_WIDE_INT min_address)
11630 HOST_WIDE_INT offset;
11632 /* The code below assumes these are different. */
11633 gcc_assert (mp != min_mp);
11635 if (min_mp == NULL)
11637 if (min_address > mp->min_address)
11638 mp->min_address = min_address;
11640 else
11642 /* We will adjust this below if it is too loose. */
11643 mp->min_address = min_address;
11645 /* Unlink MP from its current position. Since min_mp is non-null,
11646 mp->next must be non-null. */
11647 mp->next->prev = mp->prev;
11648 if (mp->prev != NULL)
11649 mp->prev->next = mp->next;
11650 else
11651 minipool_vector_head = mp->next;
11653 /* Reinsert it after MIN_MP. */
11654 mp->prev = min_mp;
11655 mp->next = min_mp->next;
11656 min_mp->next = mp;
11657 if (mp->next != NULL)
11658 mp->next->prev = mp;
11659 else
11660 minipool_vector_tail = mp;
11663 min_mp = mp;
11665 offset = 0;
11666 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11668 mp->offset = offset;
11669 if (mp->refcount > 0)
11670 offset += mp->fix_size;
11672 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11673 mp->next->min_address = mp->min_address + mp->fix_size;
11676 return min_mp;
11679 /* Add a constant to the minipool for a backward reference. Returns the
11680 node added or NULL if the constant will not fit in this pool.
11682 Note that the code for insertion for a backwards reference can be
11683 somewhat confusing because the calculated offsets for each fix do
11684 not take into account the size of the pool (which is still under
11685 construction. */
11686 static Mnode *
11687 add_minipool_backward_ref (Mfix *fix)
11689 /* If set, min_mp is the last pool_entry that has a lower constraint
11690 than the one we are trying to add. */
11691 Mnode *min_mp = NULL;
11692 /* This can be negative, since it is only a constraint. */
11693 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11694 Mnode *mp;
11696 /* If we can't reach the current pool from this insn, or if we can't
11697 insert this entry at the end of the pool without pushing other
11698 fixes out of range, then we don't try. This ensures that we
11699 can't fail later on. */
11700 if (min_address >= minipool_barrier->address
11701 || (minipool_vector_tail->min_address + fix->fix_size
11702 >= minipool_barrier->address))
11703 return NULL;
11705 /* Scan the pool to see if a constant with the same value has
11706 already been added. While we are doing this, also note the
11707 location where we must insert the constant if it doesn't already
11708 exist. */
11709 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11711 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11712 && fix->mode == mp->mode
11713 && (GET_CODE (fix->value) != CODE_LABEL
11714 || (CODE_LABEL_NUMBER (fix->value)
11715 == CODE_LABEL_NUMBER (mp->value)))
11716 && rtx_equal_p (fix->value, mp->value)
11717 /* Check that there is enough slack to move this entry to the
11718 end of the table (this is conservative). */
11719 && (mp->max_address
11720 > (minipool_barrier->address
11721 + minipool_vector_tail->offset
11722 + minipool_vector_tail->fix_size)))
11724 mp->refcount++;
11725 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11728 if (min_mp != NULL)
11729 mp->min_address += fix->fix_size;
11730 else
11732 /* Note the insertion point if necessary. */
11733 if (mp->min_address < min_address)
11735 /* For now, we do not allow the insertion of 8-byte alignment
11736 requiring nodes anywhere but at the start of the pool. */
11737 if (ARM_DOUBLEWORD_ALIGN
11738 && fix->fix_size >= 8 && mp->fix_size < 8)
11739 return NULL;
11740 else
11741 min_mp = mp;
11743 else if (mp->max_address
11744 < minipool_barrier->address + mp->offset + fix->fix_size)
11746 /* Inserting before this entry would push the fix beyond
11747 its maximum address (which can happen if we have
11748 re-located a forwards fix); force the new fix to come
11749 after it. */
11750 if (ARM_DOUBLEWORD_ALIGN
11751 && fix->fix_size >= 8 && mp->fix_size < 8)
11752 return NULL;
11753 else
11755 min_mp = mp;
11756 min_address = mp->min_address + fix->fix_size;
11759 /* Do not insert a non-8-byte aligned quantity before 8-byte
11760 aligned quantities. */
11761 else if (ARM_DOUBLEWORD_ALIGN
11762 && fix->fix_size < 8
11763 && mp->fix_size >= 8)
11765 min_mp = mp;
11766 min_address = mp->min_address + fix->fix_size;
11771 /* We need to create a new entry. */
11772 mp = XNEW (Mnode);
11773 mp->fix_size = fix->fix_size;
11774 mp->mode = fix->mode;
11775 mp->value = fix->value;
11776 mp->refcount = 1;
11777 mp->max_address = minipool_barrier->address + 65536;
11779 mp->min_address = min_address;
11781 if (min_mp == NULL)
11783 mp->prev = NULL;
11784 mp->next = minipool_vector_head;
11786 if (mp->next == NULL)
11788 minipool_vector_tail = mp;
11789 minipool_vector_label = gen_label_rtx ();
11791 else
11792 mp->next->prev = mp;
11794 minipool_vector_head = mp;
11796 else
11798 mp->next = min_mp->next;
11799 mp->prev = min_mp;
11800 min_mp->next = mp;
11802 if (mp->next != NULL)
11803 mp->next->prev = mp;
11804 else
11805 minipool_vector_tail = mp;
11808 /* Save the new entry. */
11809 min_mp = mp;
11811 if (mp->prev)
11812 mp = mp->prev;
11813 else
11814 mp->offset = 0;
11816 /* Scan over the following entries and adjust their offsets. */
11817 while (mp->next != NULL)
11819 if (mp->next->min_address < mp->min_address + mp->fix_size)
11820 mp->next->min_address = mp->min_address + mp->fix_size;
11822 if (mp->refcount)
11823 mp->next->offset = mp->offset + mp->fix_size;
11824 else
11825 mp->next->offset = mp->offset;
11827 mp = mp->next;
11830 return min_mp;
11833 static void
11834 assign_minipool_offsets (Mfix *barrier)
11836 HOST_WIDE_INT offset = 0;
11837 Mnode *mp;
11839 minipool_barrier = barrier;
11841 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11843 mp->offset = offset;
11845 if (mp->refcount > 0)
11846 offset += mp->fix_size;
11850 /* Output the literal table */
11851 static void
11852 dump_minipool (rtx scan)
11854 Mnode * mp;
11855 Mnode * nmp;
11856 int align64 = 0;
11858 if (ARM_DOUBLEWORD_ALIGN)
11859 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11860 if (mp->refcount > 0 && mp->fix_size >= 8)
11862 align64 = 1;
11863 break;
11866 if (dump_file)
11867 fprintf (dump_file,
11868 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11869 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11871 scan = emit_label_after (gen_label_rtx (), scan);
11872 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11873 scan = emit_label_after (minipool_vector_label, scan);
11875 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11877 if (mp->refcount > 0)
11879 if (dump_file)
11881 fprintf (dump_file,
11882 ";; Offset %u, min %ld, max %ld ",
11883 (unsigned) mp->offset, (unsigned long) mp->min_address,
11884 (unsigned long) mp->max_address);
11885 arm_print_value (dump_file, mp->value);
11886 fputc ('\n', dump_file);
11889 switch (mp->fix_size)
11891 #ifdef HAVE_consttable_1
11892 case 1:
11893 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11894 break;
11896 #endif
11897 #ifdef HAVE_consttable_2
11898 case 2:
11899 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11900 break;
11902 #endif
11903 #ifdef HAVE_consttable_4
11904 case 4:
11905 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11906 break;
11908 #endif
11909 #ifdef HAVE_consttable_8
11910 case 8:
11911 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11912 break;
11914 #endif
11915 #ifdef HAVE_consttable_16
11916 case 16:
11917 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11918 break;
11920 #endif
11921 default:
11922 gcc_unreachable ();
11926 nmp = mp->next;
11927 free (mp);
11930 minipool_vector_head = minipool_vector_tail = NULL;
11931 scan = emit_insn_after (gen_consttable_end (), scan);
11932 scan = emit_barrier_after (scan);
11935 /* Return the cost of forcibly inserting a barrier after INSN. */
11936 static int
11937 arm_barrier_cost (rtx insn)
11939 /* Basing the location of the pool on the loop depth is preferable,
11940 but at the moment, the basic block information seems to be
11941 corrupt by this stage of the compilation. */
11942 int base_cost = 50;
11943 rtx next = next_nonnote_insn (insn);
11945 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11946 base_cost -= 20;
11948 switch (GET_CODE (insn))
11950 case CODE_LABEL:
11951 /* It will always be better to place the table before the label, rather
11952 than after it. */
11953 return 50;
11955 case INSN:
11956 case CALL_INSN:
11957 return base_cost;
11959 case JUMP_INSN:
11960 return base_cost - 10;
11962 default:
11963 return base_cost + 10;
11967 /* Find the best place in the insn stream in the range
11968 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11969 Create the barrier by inserting a jump and add a new fix entry for
11970 it. */
11971 static Mfix *
11972 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11974 HOST_WIDE_INT count = 0;
11975 rtx barrier;
11976 rtx from = fix->insn;
11977 /* The instruction after which we will insert the jump. */
11978 rtx selected = NULL;
11979 int selected_cost;
11980 /* The address at which the jump instruction will be placed. */
11981 HOST_WIDE_INT selected_address;
11982 Mfix * new_fix;
11983 HOST_WIDE_INT max_count = max_address - fix->address;
11984 rtx label = gen_label_rtx ();
11986 selected_cost = arm_barrier_cost (from);
11987 selected_address = fix->address;
11989 while (from && count < max_count)
11991 rtx tmp;
11992 int new_cost;
11994 /* This code shouldn't have been called if there was a natural barrier
11995 within range. */
11996 gcc_assert (GET_CODE (from) != BARRIER);
11998 /* Count the length of this insn. */
11999 count += get_attr_length (from);
12001 /* If there is a jump table, add its length. */
12002 tmp = is_jump_table (from);
12003 if (tmp != NULL)
12005 count += get_jump_table_size (tmp);
12007 /* Jump tables aren't in a basic block, so base the cost on
12008 the dispatch insn. If we select this location, we will
12009 still put the pool after the table. */
12010 new_cost = arm_barrier_cost (from);
12012 if (count < max_count
12013 && (!selected || new_cost <= selected_cost))
12015 selected = tmp;
12016 selected_cost = new_cost;
12017 selected_address = fix->address + count;
12020 /* Continue after the dispatch table. */
12021 from = NEXT_INSN (tmp);
12022 continue;
12025 new_cost = arm_barrier_cost (from);
12027 if (count < max_count
12028 && (!selected || new_cost <= selected_cost))
12030 selected = from;
12031 selected_cost = new_cost;
12032 selected_address = fix->address + count;
12035 from = NEXT_INSN (from);
12038 /* Make sure that we found a place to insert the jump. */
12039 gcc_assert (selected);
12041 /* Make sure we do not split a call and its corresponding
12042 CALL_ARG_LOCATION note. */
12043 if (CALL_P (selected))
12045 rtx next = NEXT_INSN (selected);
12046 if (next && NOTE_P (next)
12047 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12048 selected = next;
12051 /* Create a new JUMP_INSN that branches around a barrier. */
12052 from = emit_jump_insn_after (gen_jump (label), selected);
12053 JUMP_LABEL (from) = label;
12054 barrier = emit_barrier_after (from);
12055 emit_label_after (label, barrier);
12057 /* Create a minipool barrier entry for the new barrier. */
12058 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12059 new_fix->insn = barrier;
12060 new_fix->address = selected_address;
12061 new_fix->next = fix->next;
12062 fix->next = new_fix;
12064 return new_fix;
12067 /* Record that there is a natural barrier in the insn stream at
12068 ADDRESS. */
12069 static void
12070 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12072 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12074 fix->insn = insn;
12075 fix->address = address;
12077 fix->next = NULL;
12078 if (minipool_fix_head != NULL)
12079 minipool_fix_tail->next = fix;
12080 else
12081 minipool_fix_head = fix;
12083 minipool_fix_tail = fix;
12086 /* Record INSN, which will need fixing up to load a value from the
12087 minipool. ADDRESS is the offset of the insn since the start of the
12088 function; LOC is a pointer to the part of the insn which requires
12089 fixing; VALUE is the constant that must be loaded, which is of type
12090 MODE. */
12091 static void
12092 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12093 enum machine_mode mode, rtx value)
12095 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12097 fix->insn = insn;
12098 fix->address = address;
12099 fix->loc = loc;
12100 fix->mode = mode;
12101 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12102 fix->value = value;
12103 fix->forwards = get_attr_pool_range (insn);
12104 fix->backwards = get_attr_neg_pool_range (insn);
12105 fix->minipool = NULL;
12107 /* If an insn doesn't have a range defined for it, then it isn't
12108 expecting to be reworked by this code. Better to stop now than
12109 to generate duff assembly code. */
12110 gcc_assert (fix->forwards || fix->backwards);
12112 /* If an entry requires 8-byte alignment then assume all constant pools
12113 require 4 bytes of padding. Trying to do this later on a per-pool
12114 basis is awkward because existing pool entries have to be modified. */
12115 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12116 minipool_pad = 4;
12118 if (dump_file)
12120 fprintf (dump_file,
12121 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12122 GET_MODE_NAME (mode),
12123 INSN_UID (insn), (unsigned long) address,
12124 -1 * (long)fix->backwards, (long)fix->forwards);
12125 arm_print_value (dump_file, fix->value);
12126 fprintf (dump_file, "\n");
12129 /* Add it to the chain of fixes. */
12130 fix->next = NULL;
12132 if (minipool_fix_head != NULL)
12133 minipool_fix_tail->next = fix;
12134 else
12135 minipool_fix_head = fix;
12137 minipool_fix_tail = fix;
12140 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12141 Returns the number of insns needed, or 99 if we don't know how to
12142 do it. */
12144 arm_const_double_inline_cost (rtx val)
12146 rtx lowpart, highpart;
12147 enum machine_mode mode;
12149 mode = GET_MODE (val);
12151 if (mode == VOIDmode)
12152 mode = DImode;
12154 gcc_assert (GET_MODE_SIZE (mode) == 8);
12156 lowpart = gen_lowpart (SImode, val);
12157 highpart = gen_highpart_mode (SImode, mode, val);
12159 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12160 gcc_assert (GET_CODE (highpart) == CONST_INT);
12162 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12163 NULL_RTX, NULL_RTX, 0, 0)
12164 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12165 NULL_RTX, NULL_RTX, 0, 0));
12168 /* Return true if it is worthwhile to split a 64-bit constant into two
12169 32-bit operations. This is the case if optimizing for size, or
12170 if we have load delay slots, or if one 32-bit part can be done with
12171 a single data operation. */
12172 bool
12173 arm_const_double_by_parts (rtx val)
12175 enum machine_mode mode = GET_MODE (val);
12176 rtx part;
12178 if (optimize_size || arm_ld_sched)
12179 return true;
12181 if (mode == VOIDmode)
12182 mode = DImode;
12184 part = gen_highpart_mode (SImode, mode, val);
12186 gcc_assert (GET_CODE (part) == CONST_INT);
12188 if (const_ok_for_arm (INTVAL (part))
12189 || const_ok_for_arm (~INTVAL (part)))
12190 return true;
12192 part = gen_lowpart (SImode, val);
12194 gcc_assert (GET_CODE (part) == CONST_INT);
12196 if (const_ok_for_arm (INTVAL (part))
12197 || const_ok_for_arm (~INTVAL (part)))
12198 return true;
12200 return false;
12203 /* Return true if it is possible to inline both the high and low parts
12204 of a 64-bit constant into 32-bit data processing instructions. */
12205 bool
12206 arm_const_double_by_immediates (rtx val)
12208 enum machine_mode mode = GET_MODE (val);
12209 rtx part;
12211 if (mode == VOIDmode)
12212 mode = DImode;
12214 part = gen_highpart_mode (SImode, mode, val);
12216 gcc_assert (GET_CODE (part) == CONST_INT);
12218 if (!const_ok_for_arm (INTVAL (part)))
12219 return false;
12221 part = gen_lowpart (SImode, val);
12223 gcc_assert (GET_CODE (part) == CONST_INT);
12225 if (!const_ok_for_arm (INTVAL (part)))
12226 return false;
12228 return true;
12231 /* Scan INSN and note any of its operands that need fixing.
12232 If DO_PUSHES is false we do not actually push any of the fixups
12233 needed. The function returns TRUE if any fixups were needed/pushed.
12234 This is used by arm_memory_load_p() which needs to know about loads
12235 of constants that will be converted into minipool loads. */
12236 static bool
12237 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12239 bool result = false;
12240 int opno;
12242 extract_insn (insn);
12244 if (!constrain_operands (1))
12245 fatal_insn_not_found (insn);
12247 if (recog_data.n_alternatives == 0)
12248 return false;
12250 /* Fill in recog_op_alt with information about the constraints of
12251 this insn. */
12252 preprocess_constraints ();
12254 for (opno = 0; opno < recog_data.n_operands; opno++)
12256 /* Things we need to fix can only occur in inputs. */
12257 if (recog_data.operand_type[opno] != OP_IN)
12258 continue;
12260 /* If this alternative is a memory reference, then any mention
12261 of constants in this alternative is really to fool reload
12262 into allowing us to accept one there. We need to fix them up
12263 now so that we output the right code. */
12264 if (recog_op_alt[opno][which_alternative].memory_ok)
12266 rtx op = recog_data.operand[opno];
12268 if (CONSTANT_P (op))
12270 if (do_pushes)
12271 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12272 recog_data.operand_mode[opno], op);
12273 result = true;
12275 else if (GET_CODE (op) == MEM
12276 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12277 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12279 if (do_pushes)
12281 rtx cop = avoid_constant_pool_reference (op);
12283 /* Casting the address of something to a mode narrower
12284 than a word can cause avoid_constant_pool_reference()
12285 to return the pool reference itself. That's no good to
12286 us here. Lets just hope that we can use the
12287 constant pool value directly. */
12288 if (op == cop)
12289 cop = get_pool_constant (XEXP (op, 0));
12291 push_minipool_fix (insn, address,
12292 recog_data.operand_loc[opno],
12293 recog_data.operand_mode[opno], cop);
12296 result = true;
12301 return result;
12304 /* Convert instructions to their cc-clobbering variant if possible, since
12305 that allows us to use smaller encodings. */
12307 static void
12308 thumb2_reorg (void)
12310 basic_block bb;
12311 regset_head live;
12313 INIT_REG_SET (&live);
12315 /* We are freeing block_for_insn in the toplev to keep compatibility
12316 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12317 compute_bb_for_insn ();
12318 df_analyze ();
12320 FOR_EACH_BB (bb)
12322 rtx insn;
12324 COPY_REG_SET (&live, DF_LR_OUT (bb));
12325 df_simulate_initialize_backwards (bb, &live);
12326 FOR_BB_INSNS_REVERSE (bb, insn)
12328 if (NONJUMP_INSN_P (insn)
12329 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12331 rtx pat = PATTERN (insn);
12332 if (GET_CODE (pat) == SET
12333 && low_register_operand (XEXP (pat, 0), SImode)
12334 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12335 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12336 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12338 rtx dst = XEXP (pat, 0);
12339 rtx src = XEXP (pat, 1);
12340 rtx op0 = XEXP (src, 0);
12341 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12342 ? XEXP (src, 1) : NULL);
12344 if (rtx_equal_p (dst, op0)
12345 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12347 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12348 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12349 rtvec vec = gen_rtvec (2, pat, clobber);
12351 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12352 INSN_CODE (insn) = -1;
12354 /* We can also handle a commutative operation where the
12355 second operand matches the destination. */
12356 else if (op1 && rtx_equal_p (dst, op1))
12358 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12359 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12360 rtvec vec;
12362 src = copy_rtx (src);
12363 XEXP (src, 0) = op1;
12364 XEXP (src, 1) = op0;
12365 pat = gen_rtx_SET (VOIDmode, dst, src);
12366 vec = gen_rtvec (2, pat, clobber);
12367 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12368 INSN_CODE (insn) = -1;
12373 if (NONDEBUG_INSN_P (insn))
12374 df_simulate_one_insn_backwards (bb, insn, &live);
12378 CLEAR_REG_SET (&live);
12381 /* Gcc puts the pool in the wrong place for ARM, since we can only
12382 load addresses a limited distance around the pc. We do some
12383 special munging to move the constant pool values to the correct
12384 point in the code. */
12385 static void
12386 arm_reorg (void)
12388 rtx insn;
12389 HOST_WIDE_INT address = 0;
12390 Mfix * fix;
12392 if (TARGET_THUMB2)
12393 thumb2_reorg ();
12395 minipool_fix_head = minipool_fix_tail = NULL;
12397 /* The first insn must always be a note, or the code below won't
12398 scan it properly. */
12399 insn = get_insns ();
12400 gcc_assert (GET_CODE (insn) == NOTE);
12401 minipool_pad = 0;
12403 /* Scan all the insns and record the operands that will need fixing. */
12404 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12406 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12407 && (arm_cirrus_insn_p (insn)
12408 || GET_CODE (insn) == JUMP_INSN
12409 || arm_memory_load_p (insn)))
12410 cirrus_reorg (insn);
12412 if (GET_CODE (insn) == BARRIER)
12413 push_minipool_barrier (insn, address);
12414 else if (INSN_P (insn))
12416 rtx table;
12418 note_invalid_constants (insn, address, true);
12419 address += get_attr_length (insn);
12421 /* If the insn is a vector jump, add the size of the table
12422 and skip the table. */
12423 if ((table = is_jump_table (insn)) != NULL)
12425 address += get_jump_table_size (table);
12426 insn = table;
12431 fix = minipool_fix_head;
12433 /* Now scan the fixups and perform the required changes. */
12434 while (fix)
12436 Mfix * ftmp;
12437 Mfix * fdel;
12438 Mfix * last_added_fix;
12439 Mfix * last_barrier = NULL;
12440 Mfix * this_fix;
12442 /* Skip any further barriers before the next fix. */
12443 while (fix && GET_CODE (fix->insn) == BARRIER)
12444 fix = fix->next;
12446 /* No more fixes. */
12447 if (fix == NULL)
12448 break;
12450 last_added_fix = NULL;
12452 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12454 if (GET_CODE (ftmp->insn) == BARRIER)
12456 if (ftmp->address >= minipool_vector_head->max_address)
12457 break;
12459 last_barrier = ftmp;
12461 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12462 break;
12464 last_added_fix = ftmp; /* Keep track of the last fix added. */
12467 /* If we found a barrier, drop back to that; any fixes that we
12468 could have reached but come after the barrier will now go in
12469 the next mini-pool. */
12470 if (last_barrier != NULL)
12472 /* Reduce the refcount for those fixes that won't go into this
12473 pool after all. */
12474 for (fdel = last_barrier->next;
12475 fdel && fdel != ftmp;
12476 fdel = fdel->next)
12478 fdel->minipool->refcount--;
12479 fdel->minipool = NULL;
12482 ftmp = last_barrier;
12484 else
12486 /* ftmp is first fix that we can't fit into this pool and
12487 there no natural barriers that we could use. Insert a
12488 new barrier in the code somewhere between the previous
12489 fix and this one, and arrange to jump around it. */
12490 HOST_WIDE_INT max_address;
12492 /* The last item on the list of fixes must be a barrier, so
12493 we can never run off the end of the list of fixes without
12494 last_barrier being set. */
12495 gcc_assert (ftmp);
12497 max_address = minipool_vector_head->max_address;
12498 /* Check that there isn't another fix that is in range that
12499 we couldn't fit into this pool because the pool was
12500 already too large: we need to put the pool before such an
12501 instruction. The pool itself may come just after the
12502 fix because create_fix_barrier also allows space for a
12503 jump instruction. */
12504 if (ftmp->address < max_address)
12505 max_address = ftmp->address + 1;
12507 last_barrier = create_fix_barrier (last_added_fix, max_address);
12510 assign_minipool_offsets (last_barrier);
12512 while (ftmp)
12514 if (GET_CODE (ftmp->insn) != BARRIER
12515 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12516 == NULL))
12517 break;
12519 ftmp = ftmp->next;
12522 /* Scan over the fixes we have identified for this pool, fixing them
12523 up and adding the constants to the pool itself. */
12524 for (this_fix = fix; this_fix && ftmp != this_fix;
12525 this_fix = this_fix->next)
12526 if (GET_CODE (this_fix->insn) != BARRIER)
12528 rtx addr
12529 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12530 minipool_vector_label),
12531 this_fix->minipool->offset);
12532 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12535 dump_minipool (last_barrier->insn);
12536 fix = ftmp;
12539 /* From now on we must synthesize any constants that we can't handle
12540 directly. This can happen if the RTL gets split during final
12541 instruction generation. */
12542 after_arm_reorg = 1;
12544 /* Free the minipool memory. */
12545 obstack_free (&minipool_obstack, minipool_startobj);
12548 /* Routines to output assembly language. */
12550 /* If the rtx is the correct value then return the string of the number.
12551 In this way we can ensure that valid double constants are generated even
12552 when cross compiling. */
12553 const char *
12554 fp_immediate_constant (rtx x)
12556 REAL_VALUE_TYPE r;
12557 int i;
12559 if (!fp_consts_inited)
12560 init_fp_table ();
12562 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12563 for (i = 0; i < 8; i++)
12564 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12565 return strings_fp[i];
12567 gcc_unreachable ();
12570 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12571 static const char *
12572 fp_const_from_val (REAL_VALUE_TYPE *r)
12574 int i;
12576 if (!fp_consts_inited)
12577 init_fp_table ();
12579 for (i = 0; i < 8; i++)
12580 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12581 return strings_fp[i];
12583 gcc_unreachable ();
12586 /* Output the operands of a LDM/STM instruction to STREAM.
12587 MASK is the ARM register set mask of which only bits 0-15 are important.
12588 REG is the base register, either the frame pointer or the stack pointer,
12589 INSTR is the possibly suffixed load or store instruction.
12590 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12592 static void
12593 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12594 unsigned long mask, int rfe)
12596 unsigned i;
12597 bool not_first = FALSE;
12599 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12600 fputc ('\t', stream);
12601 asm_fprintf (stream, instr, reg);
12602 fputc ('{', stream);
12604 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12605 if (mask & (1 << i))
12607 if (not_first)
12608 fprintf (stream, ", ");
12610 asm_fprintf (stream, "%r", i);
12611 not_first = TRUE;
12614 if (rfe)
12615 fprintf (stream, "}^\n");
12616 else
12617 fprintf (stream, "}\n");
12621 /* Output a FLDMD instruction to STREAM.
12622 BASE if the register containing the address.
12623 REG and COUNT specify the register range.
12624 Extra registers may be added to avoid hardware bugs.
12626 We output FLDMD even for ARMv5 VFP implementations. Although
12627 FLDMD is technically not supported until ARMv6, it is believed
12628 that all VFP implementations support its use in this context. */
12630 static void
12631 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12633 int i;
12635 /* Workaround ARM10 VFPr1 bug. */
12636 if (count == 2 && !arm_arch6)
12638 if (reg == 15)
12639 reg--;
12640 count++;
12643 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12644 load into multiple parts if we have to handle more than 16 registers. */
12645 if (count > 16)
12647 vfp_output_fldmd (stream, base, reg, 16);
12648 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12649 return;
12652 fputc ('\t', stream);
12653 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12655 for (i = reg; i < reg + count; i++)
12657 if (i > reg)
12658 fputs (", ", stream);
12659 asm_fprintf (stream, "d%d", i);
12661 fputs ("}\n", stream);
12666 /* Output the assembly for a store multiple. */
12668 const char *
12669 vfp_output_fstmd (rtx * operands)
12671 char pattern[100];
12672 int p;
12673 int base;
12674 int i;
12676 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12677 p = strlen (pattern);
12679 gcc_assert (GET_CODE (operands[1]) == REG);
12681 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12682 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12684 p += sprintf (&pattern[p], ", d%d", base + i);
12686 strcpy (&pattern[p], "}");
12688 output_asm_insn (pattern, operands);
12689 return "";
12693 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12694 number of bytes pushed. */
12696 static int
12697 vfp_emit_fstmd (int base_reg, int count)
12699 rtx par;
12700 rtx dwarf;
12701 rtx tmp, reg;
12702 int i;
12704 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12705 register pairs are stored by a store multiple insn. We avoid this
12706 by pushing an extra pair. */
12707 if (count == 2 && !arm_arch6)
12709 if (base_reg == LAST_VFP_REGNUM - 3)
12710 base_reg -= 2;
12711 count++;
12714 /* FSTMD may not store more than 16 doubleword registers at once. Split
12715 larger stores into multiple parts (up to a maximum of two, in
12716 practice). */
12717 if (count > 16)
12719 int saved;
12720 /* NOTE: base_reg is an internal register number, so each D register
12721 counts as 2. */
12722 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12723 saved += vfp_emit_fstmd (base_reg, 16);
12724 return saved;
12727 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12728 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12730 reg = gen_rtx_REG (DFmode, base_reg);
12731 base_reg += 2;
12733 XVECEXP (par, 0, 0)
12734 = gen_rtx_SET (VOIDmode,
12735 gen_frame_mem
12736 (BLKmode,
12737 gen_rtx_PRE_MODIFY (Pmode,
12738 stack_pointer_rtx,
12739 plus_constant
12740 (stack_pointer_rtx,
12741 - (count * 8)))
12743 gen_rtx_UNSPEC (BLKmode,
12744 gen_rtvec (1, reg),
12745 UNSPEC_PUSH_MULT));
12747 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12748 plus_constant (stack_pointer_rtx, -(count * 8)));
12749 RTX_FRAME_RELATED_P (tmp) = 1;
12750 XVECEXP (dwarf, 0, 0) = tmp;
12752 tmp = gen_rtx_SET (VOIDmode,
12753 gen_frame_mem (DFmode, stack_pointer_rtx),
12754 reg);
12755 RTX_FRAME_RELATED_P (tmp) = 1;
12756 XVECEXP (dwarf, 0, 1) = tmp;
12758 for (i = 1; i < count; i++)
12760 reg = gen_rtx_REG (DFmode, base_reg);
12761 base_reg += 2;
12762 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12764 tmp = gen_rtx_SET (VOIDmode,
12765 gen_frame_mem (DFmode,
12766 plus_constant (stack_pointer_rtx,
12767 i * 8)),
12768 reg);
12769 RTX_FRAME_RELATED_P (tmp) = 1;
12770 XVECEXP (dwarf, 0, i + 1) = tmp;
12773 par = emit_insn (par);
12774 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12775 RTX_FRAME_RELATED_P (par) = 1;
12777 return count * 8;
12780 /* Emit a call instruction with pattern PAT. ADDR is the address of
12781 the call target. */
12783 void
12784 arm_emit_call_insn (rtx pat, rtx addr)
12786 rtx insn;
12788 insn = emit_call_insn (pat);
12790 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12791 If the call might use such an entry, add a use of the PIC register
12792 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12793 if (TARGET_VXWORKS_RTP
12794 && flag_pic
12795 && GET_CODE (addr) == SYMBOL_REF
12796 && (SYMBOL_REF_DECL (addr)
12797 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12798 : !SYMBOL_REF_LOCAL_P (addr)))
12800 require_pic_register ();
12801 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12805 /* Output a 'call' insn. */
12806 const char *
12807 output_call (rtx *operands)
12809 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12811 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12812 if (REGNO (operands[0]) == LR_REGNUM)
12814 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12815 output_asm_insn ("mov%?\t%0, %|lr", operands);
12818 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12820 if (TARGET_INTERWORK || arm_arch4t)
12821 output_asm_insn ("bx%?\t%0", operands);
12822 else
12823 output_asm_insn ("mov%?\t%|pc, %0", operands);
12825 return "";
12828 /* Output a 'call' insn that is a reference in memory. This is
12829 disabled for ARMv5 and we prefer a blx instead because otherwise
12830 there's a significant performance overhead. */
12831 const char *
12832 output_call_mem (rtx *operands)
12834 gcc_assert (!arm_arch5);
12835 if (TARGET_INTERWORK)
12837 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12838 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12839 output_asm_insn ("bx%?\t%|ip", operands);
12841 else if (regno_use_in (LR_REGNUM, operands[0]))
12843 /* LR is used in the memory address. We load the address in the
12844 first instruction. It's safe to use IP as the target of the
12845 load since the call will kill it anyway. */
12846 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12847 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12848 if (arm_arch4t)
12849 output_asm_insn ("bx%?\t%|ip", operands);
12850 else
12851 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12853 else
12855 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12856 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12859 return "";
12863 /* Output a move from arm registers to an fpa registers.
12864 OPERANDS[0] is an fpa register.
12865 OPERANDS[1] is the first registers of an arm register pair. */
12866 const char *
12867 output_mov_long_double_fpa_from_arm (rtx *operands)
12869 int arm_reg0 = REGNO (operands[1]);
12870 rtx ops[3];
12872 gcc_assert (arm_reg0 != IP_REGNUM);
12874 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12875 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12876 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12878 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12879 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12881 return "";
12884 /* Output a move from an fpa register to arm registers.
12885 OPERANDS[0] is the first registers of an arm register pair.
12886 OPERANDS[1] is an fpa register. */
12887 const char *
12888 output_mov_long_double_arm_from_fpa (rtx *operands)
12890 int arm_reg0 = REGNO (operands[0]);
12891 rtx ops[3];
12893 gcc_assert (arm_reg0 != IP_REGNUM);
12895 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12896 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12897 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12899 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12900 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12901 return "";
12904 /* Output a move from arm registers to arm registers of a long double
12905 OPERANDS[0] is the destination.
12906 OPERANDS[1] is the source. */
12907 const char *
12908 output_mov_long_double_arm_from_arm (rtx *operands)
12910 /* We have to be careful here because the two might overlap. */
12911 int dest_start = REGNO (operands[0]);
12912 int src_start = REGNO (operands[1]);
12913 rtx ops[2];
12914 int i;
12916 if (dest_start < src_start)
12918 for (i = 0; i < 3; i++)
12920 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12921 ops[1] = gen_rtx_REG (SImode, src_start + i);
12922 output_asm_insn ("mov%?\t%0, %1", ops);
12925 else
12927 for (i = 2; i >= 0; i--)
12929 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12930 ops[1] = gen_rtx_REG (SImode, src_start + i);
12931 output_asm_insn ("mov%?\t%0, %1", ops);
12935 return "";
12938 void
12939 arm_emit_movpair (rtx dest, rtx src)
12941 /* If the src is an immediate, simplify it. */
12942 if (CONST_INT_P (src))
12944 HOST_WIDE_INT val = INTVAL (src);
12945 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12946 if ((val >> 16) & 0x0000ffff)
12947 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12948 GEN_INT (16)),
12949 GEN_INT ((val >> 16) & 0x0000ffff));
12950 return;
12952 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12953 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12956 /* Output a move from arm registers to an fpa registers.
12957 OPERANDS[0] is an fpa register.
12958 OPERANDS[1] is the first registers of an arm register pair. */
12959 const char *
12960 output_mov_double_fpa_from_arm (rtx *operands)
12962 int arm_reg0 = REGNO (operands[1]);
12963 rtx ops[2];
12965 gcc_assert (arm_reg0 != IP_REGNUM);
12967 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12968 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12969 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12970 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12971 return "";
12974 /* Output a move from an fpa register to arm registers.
12975 OPERANDS[0] is the first registers of an arm register pair.
12976 OPERANDS[1] is an fpa register. */
12977 const char *
12978 output_mov_double_arm_from_fpa (rtx *operands)
12980 int arm_reg0 = REGNO (operands[0]);
12981 rtx ops[2];
12983 gcc_assert (arm_reg0 != IP_REGNUM);
12985 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12986 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12987 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12988 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12989 return "";
12992 /* Output a move between double words. It must be REG<-MEM
12993 or MEM<-REG. */
12994 const char *
12995 output_move_double (rtx *operands)
12997 enum rtx_code code0 = GET_CODE (operands[0]);
12998 enum rtx_code code1 = GET_CODE (operands[1]);
12999 rtx otherops[3];
13001 if (code0 == REG)
13003 unsigned int reg0 = REGNO (operands[0]);
13005 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13007 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13009 switch (GET_CODE (XEXP (operands[1], 0)))
13011 case REG:
13012 if (TARGET_LDRD
13013 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13014 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13015 else
13016 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13017 break;
13019 case PRE_INC:
13020 gcc_assert (TARGET_LDRD);
13021 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13022 break;
13024 case PRE_DEC:
13025 if (TARGET_LDRD)
13026 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13027 else
13028 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13029 break;
13031 case POST_INC:
13032 if (TARGET_LDRD)
13033 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13034 else
13035 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13036 break;
13038 case POST_DEC:
13039 gcc_assert (TARGET_LDRD);
13040 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13041 break;
13043 case PRE_MODIFY:
13044 case POST_MODIFY:
13045 /* Autoicrement addressing modes should never have overlapping
13046 base and destination registers, and overlapping index registers
13047 are already prohibited, so this doesn't need to worry about
13048 fix_cm3_ldrd. */
13049 otherops[0] = operands[0];
13050 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13051 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13053 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13055 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13057 /* Registers overlap so split out the increment. */
13058 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13059 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13061 else
13063 /* Use a single insn if we can.
13064 FIXME: IWMMXT allows offsets larger than ldrd can
13065 handle, fix these up with a pair of ldr. */
13066 if (TARGET_THUMB2
13067 || GET_CODE (otherops[2]) != CONST_INT
13068 || (INTVAL (otherops[2]) > -256
13069 && INTVAL (otherops[2]) < 256))
13070 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13071 else
13073 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13074 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13078 else
13080 /* Use a single insn if we can.
13081 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13082 fix these up with a pair of ldr. */
13083 if (TARGET_THUMB2
13084 || GET_CODE (otherops[2]) != CONST_INT
13085 || (INTVAL (otherops[2]) > -256
13086 && INTVAL (otherops[2]) < 256))
13087 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13088 else
13090 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13091 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13094 break;
13096 case LABEL_REF:
13097 case CONST:
13098 /* We might be able to use ldrd %0, %1 here. However the range is
13099 different to ldr/adr, and it is broken on some ARMv7-M
13100 implementations. */
13101 /* Use the second register of the pair to avoid problematic
13102 overlap. */
13103 otherops[1] = operands[1];
13104 output_asm_insn ("adr%?\t%0, %1", otherops);
13105 operands[1] = otherops[0];
13106 if (TARGET_LDRD)
13107 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13108 else
13109 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13110 break;
13112 /* ??? This needs checking for thumb2. */
13113 default:
13114 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13115 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13117 otherops[0] = operands[0];
13118 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13119 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13121 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13123 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13125 switch ((int) INTVAL (otherops[2]))
13127 case -8:
13128 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13129 return "";
13130 case -4:
13131 if (TARGET_THUMB2)
13132 break;
13133 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13134 return "";
13135 case 4:
13136 if (TARGET_THUMB2)
13137 break;
13138 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13139 return "";
13142 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13143 operands[1] = otherops[0];
13144 if (TARGET_LDRD
13145 && (GET_CODE (otherops[2]) == REG
13146 || TARGET_THUMB2
13147 || (GET_CODE (otherops[2]) == CONST_INT
13148 && INTVAL (otherops[2]) > -256
13149 && INTVAL (otherops[2]) < 256)))
13151 if (reg_overlap_mentioned_p (operands[0],
13152 otherops[2]))
13154 rtx tmp;
13155 /* Swap base and index registers over to
13156 avoid a conflict. */
13157 tmp = otherops[1];
13158 otherops[1] = otherops[2];
13159 otherops[2] = tmp;
13161 /* If both registers conflict, it will usually
13162 have been fixed by a splitter. */
13163 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13164 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13166 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13167 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13169 else
13171 otherops[0] = operands[0];
13172 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13174 return "";
13177 if (GET_CODE (otherops[2]) == CONST_INT)
13179 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13180 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13181 else
13182 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13184 else
13185 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13187 else
13188 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13190 if (TARGET_LDRD)
13191 return "ldr%(d%)\t%0, [%1]";
13193 return "ldm%(ia%)\t%1, %M0";
13195 else
13197 otherops[1] = adjust_address (operands[1], SImode, 4);
13198 /* Take care of overlapping base/data reg. */
13199 if (reg_mentioned_p (operands[0], operands[1]))
13201 output_asm_insn ("ldr%?\t%0, %1", otherops);
13202 output_asm_insn ("ldr%?\t%0, %1", operands);
13204 else
13206 output_asm_insn ("ldr%?\t%0, %1", operands);
13207 output_asm_insn ("ldr%?\t%0, %1", otherops);
13212 else
13214 /* Constraints should ensure this. */
13215 gcc_assert (code0 == MEM && code1 == REG);
13216 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13218 switch (GET_CODE (XEXP (operands[0], 0)))
13220 case REG:
13221 if (TARGET_LDRD)
13222 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13223 else
13224 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13225 break;
13227 case PRE_INC:
13228 gcc_assert (TARGET_LDRD);
13229 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13230 break;
13232 case PRE_DEC:
13233 if (TARGET_LDRD)
13234 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13235 else
13236 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13237 break;
13239 case POST_INC:
13240 if (TARGET_LDRD)
13241 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13242 else
13243 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13244 break;
13246 case POST_DEC:
13247 gcc_assert (TARGET_LDRD);
13248 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13249 break;
13251 case PRE_MODIFY:
13252 case POST_MODIFY:
13253 otherops[0] = operands[1];
13254 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13255 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13257 /* IWMMXT allows offsets larger than ldrd can handle,
13258 fix these up with a pair of ldr. */
13259 if (!TARGET_THUMB2
13260 && GET_CODE (otherops[2]) == CONST_INT
13261 && (INTVAL(otherops[2]) <= -256
13262 || INTVAL(otherops[2]) >= 256))
13264 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13266 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13267 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13269 else
13271 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13272 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13275 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13276 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13277 else
13278 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13279 break;
13281 case PLUS:
13282 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13283 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13285 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13287 case -8:
13288 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13289 return "";
13291 case -4:
13292 if (TARGET_THUMB2)
13293 break;
13294 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13295 return "";
13297 case 4:
13298 if (TARGET_THUMB2)
13299 break;
13300 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13301 return "";
13304 if (TARGET_LDRD
13305 && (GET_CODE (otherops[2]) == REG
13306 || TARGET_THUMB2
13307 || (GET_CODE (otherops[2]) == CONST_INT
13308 && INTVAL (otherops[2]) > -256
13309 && INTVAL (otherops[2]) < 256)))
13311 otherops[0] = operands[1];
13312 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13313 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13314 return "";
13316 /* Fall through */
13318 default:
13319 otherops[0] = adjust_address (operands[0], SImode, 4);
13320 otherops[1] = operands[1];
13321 output_asm_insn ("str%?\t%1, %0", operands);
13322 output_asm_insn ("str%?\t%H1, %0", otherops);
13326 return "";
13329 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13330 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13332 const char *
13333 output_move_quad (rtx *operands)
13335 if (REG_P (operands[0]))
13337 /* Load, or reg->reg move. */
13339 if (MEM_P (operands[1]))
13341 switch (GET_CODE (XEXP (operands[1], 0)))
13343 case REG:
13344 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13345 break;
13347 case LABEL_REF:
13348 case CONST:
13349 output_asm_insn ("adr%?\t%0, %1", operands);
13350 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13351 break;
13353 default:
13354 gcc_unreachable ();
13357 else
13359 rtx ops[2];
13360 int dest, src, i;
13362 gcc_assert (REG_P (operands[1]));
13364 dest = REGNO (operands[0]);
13365 src = REGNO (operands[1]);
13367 /* This seems pretty dumb, but hopefully GCC won't try to do it
13368 very often. */
13369 if (dest < src)
13370 for (i = 0; i < 4; i++)
13372 ops[0] = gen_rtx_REG (SImode, dest + i);
13373 ops[1] = gen_rtx_REG (SImode, src + i);
13374 output_asm_insn ("mov%?\t%0, %1", ops);
13376 else
13377 for (i = 3; i >= 0; i--)
13379 ops[0] = gen_rtx_REG (SImode, dest + i);
13380 ops[1] = gen_rtx_REG (SImode, src + i);
13381 output_asm_insn ("mov%?\t%0, %1", ops);
13385 else
13387 gcc_assert (MEM_P (operands[0]));
13388 gcc_assert (REG_P (operands[1]));
13389 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13391 switch (GET_CODE (XEXP (operands[0], 0)))
13393 case REG:
13394 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13395 break;
13397 default:
13398 gcc_unreachable ();
13402 return "";
13405 /* Output a VFP load or store instruction. */
13407 const char *
13408 output_move_vfp (rtx *operands)
13410 rtx reg, mem, addr, ops[2];
13411 int load = REG_P (operands[0]);
13412 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13413 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13414 const char *templ;
13415 char buff[50];
13416 enum machine_mode mode;
13418 reg = operands[!load];
13419 mem = operands[load];
13421 mode = GET_MODE (reg);
13423 gcc_assert (REG_P (reg));
13424 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13425 gcc_assert (mode == SFmode
13426 || mode == DFmode
13427 || mode == SImode
13428 || mode == DImode
13429 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13430 gcc_assert (MEM_P (mem));
13432 addr = XEXP (mem, 0);
13434 switch (GET_CODE (addr))
13436 case PRE_DEC:
13437 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13438 ops[0] = XEXP (addr, 0);
13439 ops[1] = reg;
13440 break;
13442 case POST_INC:
13443 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13444 ops[0] = XEXP (addr, 0);
13445 ops[1] = reg;
13446 break;
13448 default:
13449 templ = "f%s%c%%?\t%%%s0, %%1%s";
13450 ops[0] = reg;
13451 ops[1] = mem;
13452 break;
13455 sprintf (buff, templ,
13456 load ? "ld" : "st",
13457 dp ? 'd' : 's',
13458 dp ? "P" : "",
13459 integer_p ? "\t%@ int" : "");
13460 output_asm_insn (buff, ops);
13462 return "";
13465 /* Output a Neon quad-word load or store, or a load or store for
13466 larger structure modes.
13468 WARNING: The ordering of elements is weird in big-endian mode,
13469 because we use VSTM, as required by the EABI. GCC RTL defines
13470 element ordering based on in-memory order. This can be differ
13471 from the architectural ordering of elements within a NEON register.
13472 The intrinsics defined in arm_neon.h use the NEON register element
13473 ordering, not the GCC RTL element ordering.
13475 For example, the in-memory ordering of a big-endian a quadword
13476 vector with 16-bit elements when stored from register pair {d0,d1}
13477 will be (lowest address first, d0[N] is NEON register element N):
13479 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13481 When necessary, quadword registers (dN, dN+1) are moved to ARM
13482 registers from rN in the order:
13484 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13486 So that STM/LDM can be used on vectors in ARM registers, and the
13487 same memory layout will result as if VSTM/VLDM were used. */
13489 const char *
13490 output_move_neon (rtx *operands)
13492 rtx reg, mem, addr, ops[2];
13493 int regno, load = REG_P (operands[0]);
13494 const char *templ;
13495 char buff[50];
13496 enum machine_mode mode;
13498 reg = operands[!load];
13499 mem = operands[load];
13501 mode = GET_MODE (reg);
13503 gcc_assert (REG_P (reg));
13504 regno = REGNO (reg);
13505 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13506 || NEON_REGNO_OK_FOR_QUAD (regno));
13507 gcc_assert (VALID_NEON_DREG_MODE (mode)
13508 || VALID_NEON_QREG_MODE (mode)
13509 || VALID_NEON_STRUCT_MODE (mode));
13510 gcc_assert (MEM_P (mem));
13512 addr = XEXP (mem, 0);
13514 /* Strip off const from addresses like (const (plus (...))). */
13515 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13516 addr = XEXP (addr, 0);
13518 switch (GET_CODE (addr))
13520 case POST_INC:
13521 templ = "v%smia%%?\t%%0!, %%h1";
13522 ops[0] = XEXP (addr, 0);
13523 ops[1] = reg;
13524 break;
13526 case PRE_DEC:
13527 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13528 templ = "v%smdb%%?\t%%0!, %%h1";
13529 ops[0] = XEXP (addr, 0);
13530 ops[1] = reg;
13531 break;
13533 case POST_MODIFY:
13534 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13535 gcc_unreachable ();
13537 case LABEL_REF:
13538 case PLUS:
13540 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13541 int i;
13542 int overlap = -1;
13543 for (i = 0; i < nregs; i++)
13545 /* We're only using DImode here because it's a convenient size. */
13546 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13547 ops[1] = adjust_address (mem, DImode, 8 * i);
13548 if (reg_overlap_mentioned_p (ops[0], mem))
13550 gcc_assert (overlap == -1);
13551 overlap = i;
13553 else
13555 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13556 output_asm_insn (buff, ops);
13559 if (overlap != -1)
13561 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13562 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13563 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13564 output_asm_insn (buff, ops);
13567 return "";
13570 default:
13571 templ = "v%smia%%?\t%%m0, %%h1";
13572 ops[0] = mem;
13573 ops[1] = reg;
13576 sprintf (buff, templ, load ? "ld" : "st");
13577 output_asm_insn (buff, ops);
13579 return "";
13582 /* Compute and return the length of neon_mov<mode>, where <mode> is
13583 one of VSTRUCT modes: EI, OI, CI or XI. */
13585 arm_attr_length_move_neon (rtx insn)
13587 rtx reg, mem, addr;
13588 int load;
13589 enum machine_mode mode;
13591 extract_insn_cached (insn);
13593 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13595 mode = GET_MODE (recog_data.operand[0]);
13596 switch (mode)
13598 case EImode:
13599 case OImode:
13600 return 8;
13601 case CImode:
13602 return 12;
13603 case XImode:
13604 return 16;
13605 default:
13606 gcc_unreachable ();
13610 load = REG_P (recog_data.operand[0]);
13611 reg = recog_data.operand[!load];
13612 mem = recog_data.operand[load];
13614 gcc_assert (MEM_P (mem));
13616 mode = GET_MODE (reg);
13617 addr = XEXP (mem, 0);
13619 /* Strip off const from addresses like (const (plus (...))). */
13620 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13621 addr = XEXP (addr, 0);
13623 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13625 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13626 return insns * 4;
13628 else
13629 return 4;
13632 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13633 return zero. */
13636 arm_address_offset_is_imm (rtx insn)
13638 rtx mem, addr;
13640 extract_insn_cached (insn);
13642 if (REG_P (recog_data.operand[0]))
13643 return 0;
13645 mem = recog_data.operand[0];
13647 gcc_assert (MEM_P (mem));
13649 addr = XEXP (mem, 0);
13651 if (GET_CODE (addr) == REG
13652 || (GET_CODE (addr) == PLUS
13653 && GET_CODE (XEXP (addr, 0)) == REG
13654 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13655 return 1;
13656 else
13657 return 0;
13660 /* Output an ADD r, s, #n where n may be too big for one instruction.
13661 If adding zero to one register, output nothing. */
13662 const char *
13663 output_add_immediate (rtx *operands)
13665 HOST_WIDE_INT n = INTVAL (operands[2]);
13667 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13669 if (n < 0)
13670 output_multi_immediate (operands,
13671 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13672 -n);
13673 else
13674 output_multi_immediate (operands,
13675 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13679 return "";
13682 /* Output a multiple immediate operation.
13683 OPERANDS is the vector of operands referred to in the output patterns.
13684 INSTR1 is the output pattern to use for the first constant.
13685 INSTR2 is the output pattern to use for subsequent constants.
13686 IMMED_OP is the index of the constant slot in OPERANDS.
13687 N is the constant value. */
13688 static const char *
13689 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13690 int immed_op, HOST_WIDE_INT n)
13692 #if HOST_BITS_PER_WIDE_INT > 32
13693 n &= 0xffffffff;
13694 #endif
13696 if (n == 0)
13698 /* Quick and easy output. */
13699 operands[immed_op] = const0_rtx;
13700 output_asm_insn (instr1, operands);
13702 else
13704 int i;
13705 const char * instr = instr1;
13707 /* Note that n is never zero here (which would give no output). */
13708 for (i = 0; i < 32; i += 2)
13710 if (n & (3 << i))
13712 operands[immed_op] = GEN_INT (n & (255 << i));
13713 output_asm_insn (instr, operands);
13714 instr = instr2;
13715 i += 6;
13720 return "";
13723 /* Return the name of a shifter operation. */
13724 static const char *
13725 arm_shift_nmem(enum rtx_code code)
13727 switch (code)
13729 case ASHIFT:
13730 return ARM_LSL_NAME;
13732 case ASHIFTRT:
13733 return "asr";
13735 case LSHIFTRT:
13736 return "lsr";
13738 case ROTATERT:
13739 return "ror";
13741 default:
13742 abort();
13746 /* Return the appropriate ARM instruction for the operation code.
13747 The returned result should not be overwritten. OP is the rtx of the
13748 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13749 was shifted. */
13750 const char *
13751 arithmetic_instr (rtx op, int shift_first_arg)
13753 switch (GET_CODE (op))
13755 case PLUS:
13756 return "add";
13758 case MINUS:
13759 return shift_first_arg ? "rsb" : "sub";
13761 case IOR:
13762 return "orr";
13764 case XOR:
13765 return "eor";
13767 case AND:
13768 return "and";
13770 case ASHIFT:
13771 case ASHIFTRT:
13772 case LSHIFTRT:
13773 case ROTATERT:
13774 return arm_shift_nmem(GET_CODE(op));
13776 default:
13777 gcc_unreachable ();
13781 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13782 for the operation code. The returned result should not be overwritten.
13783 OP is the rtx code of the shift.
13784 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13785 shift. */
13786 static const char *
13787 shift_op (rtx op, HOST_WIDE_INT *amountp)
13789 const char * mnem;
13790 enum rtx_code code = GET_CODE (op);
13792 switch (GET_CODE (XEXP (op, 1)))
13794 case REG:
13795 case SUBREG:
13796 *amountp = -1;
13797 break;
13799 case CONST_INT:
13800 *amountp = INTVAL (XEXP (op, 1));
13801 break;
13803 default:
13804 gcc_unreachable ();
13807 switch (code)
13809 case ROTATE:
13810 gcc_assert (*amountp != -1);
13811 *amountp = 32 - *amountp;
13812 code = ROTATERT;
13814 /* Fall through. */
13816 case ASHIFT:
13817 case ASHIFTRT:
13818 case LSHIFTRT:
13819 case ROTATERT:
13820 mnem = arm_shift_nmem(code);
13821 break;
13823 case MULT:
13824 /* We never have to worry about the amount being other than a
13825 power of 2, since this case can never be reloaded from a reg. */
13826 gcc_assert (*amountp != -1);
13827 *amountp = int_log2 (*amountp);
13828 return ARM_LSL_NAME;
13830 default:
13831 gcc_unreachable ();
13834 if (*amountp != -1)
13836 /* This is not 100% correct, but follows from the desire to merge
13837 multiplication by a power of 2 with the recognizer for a
13838 shift. >=32 is not a valid shift for "lsl", so we must try and
13839 output a shift that produces the correct arithmetical result.
13840 Using lsr #32 is identical except for the fact that the carry bit
13841 is not set correctly if we set the flags; but we never use the
13842 carry bit from such an operation, so we can ignore that. */
13843 if (code == ROTATERT)
13844 /* Rotate is just modulo 32. */
13845 *amountp &= 31;
13846 else if (*amountp != (*amountp & 31))
13848 if (code == ASHIFT)
13849 mnem = "lsr";
13850 *amountp = 32;
13853 /* Shifts of 0 are no-ops. */
13854 if (*amountp == 0)
13855 return NULL;
13858 return mnem;
13861 /* Obtain the shift from the POWER of two. */
13863 static HOST_WIDE_INT
13864 int_log2 (HOST_WIDE_INT power)
13866 HOST_WIDE_INT shift = 0;
13868 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13870 gcc_assert (shift <= 31);
13871 shift++;
13874 return shift;
13877 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13878 because /bin/as is horribly restrictive. The judgement about
13879 whether or not each character is 'printable' (and can be output as
13880 is) or not (and must be printed with an octal escape) must be made
13881 with reference to the *host* character set -- the situation is
13882 similar to that discussed in the comments above pp_c_char in
13883 c-pretty-print.c. */
13885 #define MAX_ASCII_LEN 51
13887 void
13888 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13890 int i;
13891 int len_so_far = 0;
13893 fputs ("\t.ascii\t\"", stream);
13895 for (i = 0; i < len; i++)
13897 int c = p[i];
13899 if (len_so_far >= MAX_ASCII_LEN)
13901 fputs ("\"\n\t.ascii\t\"", stream);
13902 len_so_far = 0;
13905 if (ISPRINT (c))
13907 if (c == '\\' || c == '\"')
13909 putc ('\\', stream);
13910 len_so_far++;
13912 putc (c, stream);
13913 len_so_far++;
13915 else
13917 fprintf (stream, "\\%03o", c);
13918 len_so_far += 4;
13922 fputs ("\"\n", stream);
13925 /* Compute the register save mask for registers 0 through 12
13926 inclusive. This code is used by arm_compute_save_reg_mask. */
13928 static unsigned long
13929 arm_compute_save_reg0_reg12_mask (void)
13931 unsigned long func_type = arm_current_func_type ();
13932 unsigned long save_reg_mask = 0;
13933 unsigned int reg;
13935 if (IS_INTERRUPT (func_type))
13937 unsigned int max_reg;
13938 /* Interrupt functions must not corrupt any registers,
13939 even call clobbered ones. If this is a leaf function
13940 we can just examine the registers used by the RTL, but
13941 otherwise we have to assume that whatever function is
13942 called might clobber anything, and so we have to save
13943 all the call-clobbered registers as well. */
13944 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13945 /* FIQ handlers have registers r8 - r12 banked, so
13946 we only need to check r0 - r7, Normal ISRs only
13947 bank r14 and r15, so we must check up to r12.
13948 r13 is the stack pointer which is always preserved,
13949 so we do not need to consider it here. */
13950 max_reg = 7;
13951 else
13952 max_reg = 12;
13954 for (reg = 0; reg <= max_reg; reg++)
13955 if (df_regs_ever_live_p (reg)
13956 || (! current_function_is_leaf && call_used_regs[reg]))
13957 save_reg_mask |= (1 << reg);
13959 /* Also save the pic base register if necessary. */
13960 if (flag_pic
13961 && !TARGET_SINGLE_PIC_BASE
13962 && arm_pic_register != INVALID_REGNUM
13963 && crtl->uses_pic_offset_table)
13964 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13966 else if (IS_VOLATILE(func_type))
13968 /* For noreturn functions we historically omitted register saves
13969 altogether. However this really messes up debugging. As a
13970 compromise save just the frame pointers. Combined with the link
13971 register saved elsewhere this should be sufficient to get
13972 a backtrace. */
13973 if (frame_pointer_needed)
13974 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13975 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13976 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13977 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13978 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13980 else
13982 /* In the normal case we only need to save those registers
13983 which are call saved and which are used by this function. */
13984 for (reg = 0; reg <= 11; reg++)
13985 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13986 save_reg_mask |= (1 << reg);
13988 /* Handle the frame pointer as a special case. */
13989 if (frame_pointer_needed)
13990 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13992 /* If we aren't loading the PIC register,
13993 don't stack it even though it may be live. */
13994 if (flag_pic
13995 && !TARGET_SINGLE_PIC_BASE
13996 && arm_pic_register != INVALID_REGNUM
13997 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13998 || crtl->uses_pic_offset_table))
13999 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14001 /* The prologue will copy SP into R0, so save it. */
14002 if (IS_STACKALIGN (func_type))
14003 save_reg_mask |= 1;
14006 /* Save registers so the exception handler can modify them. */
14007 if (crtl->calls_eh_return)
14009 unsigned int i;
14011 for (i = 0; ; i++)
14013 reg = EH_RETURN_DATA_REGNO (i);
14014 if (reg == INVALID_REGNUM)
14015 break;
14016 save_reg_mask |= 1 << reg;
14020 return save_reg_mask;
14024 /* Compute the number of bytes used to store the static chain register on the
14025 stack, above the stack frame. We need to know this accurately to get the
14026 alignment of the rest of the stack frame correct. */
14028 static int arm_compute_static_chain_stack_bytes (void)
14030 unsigned long func_type = arm_current_func_type ();
14031 int static_chain_stack_bytes = 0;
14033 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
14034 IS_NESTED (func_type) &&
14035 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
14036 static_chain_stack_bytes = 4;
14038 return static_chain_stack_bytes;
14042 /* Compute a bit mask of which registers need to be
14043 saved on the stack for the current function.
14044 This is used by arm_get_frame_offsets, which may add extra registers. */
14046 static unsigned long
14047 arm_compute_save_reg_mask (void)
14049 unsigned int save_reg_mask = 0;
14050 unsigned long func_type = arm_current_func_type ();
14051 unsigned int reg;
14053 if (IS_NAKED (func_type))
14054 /* This should never really happen. */
14055 return 0;
14057 /* If we are creating a stack frame, then we must save the frame pointer,
14058 IP (which will hold the old stack pointer), LR and the PC. */
14059 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14060 save_reg_mask |=
14061 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
14062 | (1 << IP_REGNUM)
14063 | (1 << LR_REGNUM)
14064 | (1 << PC_REGNUM);
14066 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
14068 /* Decide if we need to save the link register.
14069 Interrupt routines have their own banked link register,
14070 so they never need to save it.
14071 Otherwise if we do not use the link register we do not need to save
14072 it. If we are pushing other registers onto the stack however, we
14073 can save an instruction in the epilogue by pushing the link register
14074 now and then popping it back into the PC. This incurs extra memory
14075 accesses though, so we only do it when optimizing for size, and only
14076 if we know that we will not need a fancy return sequence. */
14077 if (df_regs_ever_live_p (LR_REGNUM)
14078 || (save_reg_mask
14079 && optimize_size
14080 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14081 && !crtl->calls_eh_return))
14082 save_reg_mask |= 1 << LR_REGNUM;
14084 if (cfun->machine->lr_save_eliminated)
14085 save_reg_mask &= ~ (1 << LR_REGNUM);
14087 if (TARGET_REALLY_IWMMXT
14088 && ((bit_count (save_reg_mask)
14089 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14090 arm_compute_static_chain_stack_bytes())
14091 ) % 2) != 0)
14093 /* The total number of registers that are going to be pushed
14094 onto the stack is odd. We need to ensure that the stack
14095 is 64-bit aligned before we start to save iWMMXt registers,
14096 and also before we start to create locals. (A local variable
14097 might be a double or long long which we will load/store using
14098 an iWMMXt instruction). Therefore we need to push another
14099 ARM register, so that the stack will be 64-bit aligned. We
14100 try to avoid using the arg registers (r0 -r3) as they might be
14101 used to pass values in a tail call. */
14102 for (reg = 4; reg <= 12; reg++)
14103 if ((save_reg_mask & (1 << reg)) == 0)
14104 break;
14106 if (reg <= 12)
14107 save_reg_mask |= (1 << reg);
14108 else
14110 cfun->machine->sibcall_blocked = 1;
14111 save_reg_mask |= (1 << 3);
14115 /* We may need to push an additional register for use initializing the
14116 PIC base register. */
14117 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14118 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14120 reg = thumb_find_work_register (1 << 4);
14121 if (!call_used_regs[reg])
14122 save_reg_mask |= (1 << reg);
14125 return save_reg_mask;
14129 /* Compute a bit mask of which registers need to be
14130 saved on the stack for the current function. */
14131 static unsigned long
14132 thumb1_compute_save_reg_mask (void)
14134 unsigned long mask;
14135 unsigned reg;
14137 mask = 0;
14138 for (reg = 0; reg < 12; reg ++)
14139 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14140 mask |= 1 << reg;
14142 if (flag_pic
14143 && !TARGET_SINGLE_PIC_BASE
14144 && arm_pic_register != INVALID_REGNUM
14145 && crtl->uses_pic_offset_table)
14146 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14148 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14149 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14150 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14152 /* LR will also be pushed if any lo regs are pushed. */
14153 if (mask & 0xff || thumb_force_lr_save ())
14154 mask |= (1 << LR_REGNUM);
14156 /* Make sure we have a low work register if we need one.
14157 We will need one if we are going to push a high register,
14158 but we are not currently intending to push a low register. */
14159 if ((mask & 0xff) == 0
14160 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14162 /* Use thumb_find_work_register to choose which register
14163 we will use. If the register is live then we will
14164 have to push it. Use LAST_LO_REGNUM as our fallback
14165 choice for the register to select. */
14166 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14167 /* Make sure the register returned by thumb_find_work_register is
14168 not part of the return value. */
14169 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14170 reg = LAST_LO_REGNUM;
14172 if (! call_used_regs[reg])
14173 mask |= 1 << reg;
14176 /* The 504 below is 8 bytes less than 512 because there are two possible
14177 alignment words. We can't tell here if they will be present or not so we
14178 have to play it safe and assume that they are. */
14179 if ((CALLER_INTERWORKING_SLOT_SIZE +
14180 ROUND_UP_WORD (get_frame_size ()) +
14181 crtl->outgoing_args_size) >= 504)
14183 /* This is the same as the code in thumb1_expand_prologue() which
14184 determines which register to use for stack decrement. */
14185 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14186 if (mask & (1 << reg))
14187 break;
14189 if (reg > LAST_LO_REGNUM)
14191 /* Make sure we have a register available for stack decrement. */
14192 mask |= 1 << LAST_LO_REGNUM;
14196 return mask;
14200 /* Return the number of bytes required to save VFP registers. */
14201 static int
14202 arm_get_vfp_saved_size (void)
14204 unsigned int regno;
14205 int count;
14206 int saved;
14208 saved = 0;
14209 /* Space for saved VFP registers. */
14210 if (TARGET_HARD_FLOAT && TARGET_VFP)
14212 count = 0;
14213 for (regno = FIRST_VFP_REGNUM;
14214 regno < LAST_VFP_REGNUM;
14215 regno += 2)
14217 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14218 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14220 if (count > 0)
14222 /* Workaround ARM10 VFPr1 bug. */
14223 if (count == 2 && !arm_arch6)
14224 count++;
14225 saved += count * 8;
14227 count = 0;
14229 else
14230 count++;
14232 if (count > 0)
14234 if (count == 2 && !arm_arch6)
14235 count++;
14236 saved += count * 8;
14239 return saved;
14243 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14244 everything bar the final return instruction. */
14245 const char *
14246 output_return_instruction (rtx operand, int really_return, int reverse)
14248 char conditional[10];
14249 char instr[100];
14250 unsigned reg;
14251 unsigned long live_regs_mask;
14252 unsigned long func_type;
14253 arm_stack_offsets *offsets;
14255 func_type = arm_current_func_type ();
14257 if (IS_NAKED (func_type))
14258 return "";
14260 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14262 /* If this function was declared non-returning, and we have
14263 found a tail call, then we have to trust that the called
14264 function won't return. */
14265 if (really_return)
14267 rtx ops[2];
14269 /* Otherwise, trap an attempted return by aborting. */
14270 ops[0] = operand;
14271 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14272 : "abort");
14273 assemble_external_libcall (ops[1]);
14274 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14277 return "";
14280 gcc_assert (!cfun->calls_alloca || really_return);
14282 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14284 cfun->machine->return_used_this_function = 1;
14286 offsets = arm_get_frame_offsets ();
14287 live_regs_mask = offsets->saved_regs_mask;
14289 if (live_regs_mask)
14291 const char * return_reg;
14293 /* If we do not have any special requirements for function exit
14294 (e.g. interworking) then we can load the return address
14295 directly into the PC. Otherwise we must load it into LR. */
14296 if (really_return
14297 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14298 return_reg = reg_names[PC_REGNUM];
14299 else
14300 return_reg = reg_names[LR_REGNUM];
14302 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14304 /* There are three possible reasons for the IP register
14305 being saved. 1) a stack frame was created, in which case
14306 IP contains the old stack pointer, or 2) an ISR routine
14307 corrupted it, or 3) it was saved to align the stack on
14308 iWMMXt. In case 1, restore IP into SP, otherwise just
14309 restore IP. */
14310 if (frame_pointer_needed)
14312 live_regs_mask &= ~ (1 << IP_REGNUM);
14313 live_regs_mask |= (1 << SP_REGNUM);
14315 else
14316 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14319 /* On some ARM architectures it is faster to use LDR rather than
14320 LDM to load a single register. On other architectures, the
14321 cost is the same. In 26 bit mode, or for exception handlers,
14322 we have to use LDM to load the PC so that the CPSR is also
14323 restored. */
14324 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14325 if (live_regs_mask == (1U << reg))
14326 break;
14328 if (reg <= LAST_ARM_REGNUM
14329 && (reg != LR_REGNUM
14330 || ! really_return
14331 || ! IS_INTERRUPT (func_type)))
14333 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14334 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14336 else
14338 char *p;
14339 int first = 1;
14341 /* Generate the load multiple instruction to restore the
14342 registers. Note we can get here, even if
14343 frame_pointer_needed is true, but only if sp already
14344 points to the base of the saved core registers. */
14345 if (live_regs_mask & (1 << SP_REGNUM))
14347 unsigned HOST_WIDE_INT stack_adjust;
14349 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14350 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14352 if (stack_adjust && arm_arch5 && TARGET_ARM)
14353 if (TARGET_UNIFIED_ASM)
14354 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14355 else
14356 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14357 else
14359 /* If we can't use ldmib (SA110 bug),
14360 then try to pop r3 instead. */
14361 if (stack_adjust)
14362 live_regs_mask |= 1 << 3;
14364 if (TARGET_UNIFIED_ASM)
14365 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14366 else
14367 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14370 else
14371 if (TARGET_UNIFIED_ASM)
14372 sprintf (instr, "pop%s\t{", conditional);
14373 else
14374 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14376 p = instr + strlen (instr);
14378 for (reg = 0; reg <= SP_REGNUM; reg++)
14379 if (live_regs_mask & (1 << reg))
14381 int l = strlen (reg_names[reg]);
14383 if (first)
14384 first = 0;
14385 else
14387 memcpy (p, ", ", 2);
14388 p += 2;
14391 memcpy (p, "%|", 2);
14392 memcpy (p + 2, reg_names[reg], l);
14393 p += l + 2;
14396 if (live_regs_mask & (1 << LR_REGNUM))
14398 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14399 /* If returning from an interrupt, restore the CPSR. */
14400 if (IS_INTERRUPT (func_type))
14401 strcat (p, "^");
14403 else
14404 strcpy (p, "}");
14407 output_asm_insn (instr, & operand);
14409 /* See if we need to generate an extra instruction to
14410 perform the actual function return. */
14411 if (really_return
14412 && func_type != ARM_FT_INTERWORKED
14413 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14415 /* The return has already been handled
14416 by loading the LR into the PC. */
14417 really_return = 0;
14421 if (really_return)
14423 switch ((int) ARM_FUNC_TYPE (func_type))
14425 case ARM_FT_ISR:
14426 case ARM_FT_FIQ:
14427 /* ??? This is wrong for unified assembly syntax. */
14428 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14429 break;
14431 case ARM_FT_INTERWORKED:
14432 sprintf (instr, "bx%s\t%%|lr", conditional);
14433 break;
14435 case ARM_FT_EXCEPTION:
14436 /* ??? This is wrong for unified assembly syntax. */
14437 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14438 break;
14440 default:
14441 /* Use bx if it's available. */
14442 if (arm_arch5 || arm_arch4t)
14443 sprintf (instr, "bx%s\t%%|lr", conditional);
14444 else
14445 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14446 break;
14449 output_asm_insn (instr, & operand);
14452 return "";
14455 /* Write the function name into the code section, directly preceding
14456 the function prologue.
14458 Code will be output similar to this:
14460 .ascii "arm_poke_function_name", 0
14461 .align
14463 .word 0xff000000 + (t1 - t0)
14464 arm_poke_function_name
14465 mov ip, sp
14466 stmfd sp!, {fp, ip, lr, pc}
14467 sub fp, ip, #4
14469 When performing a stack backtrace, code can inspect the value
14470 of 'pc' stored at 'fp' + 0. If the trace function then looks
14471 at location pc - 12 and the top 8 bits are set, then we know
14472 that there is a function name embedded immediately preceding this
14473 location and has length ((pc[-3]) & 0xff000000).
14475 We assume that pc is declared as a pointer to an unsigned long.
14477 It is of no benefit to output the function name if we are assembling
14478 a leaf function. These function types will not contain a stack
14479 backtrace structure, therefore it is not possible to determine the
14480 function name. */
14481 void
14482 arm_poke_function_name (FILE *stream, const char *name)
14484 unsigned long alignlength;
14485 unsigned long length;
14486 rtx x;
14488 length = strlen (name) + 1;
14489 alignlength = ROUND_UP_WORD (length);
14491 ASM_OUTPUT_ASCII (stream, name, length);
14492 ASM_OUTPUT_ALIGN (stream, 2);
14493 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14494 assemble_aligned_integer (UNITS_PER_WORD, x);
14497 /* Place some comments into the assembler stream
14498 describing the current function. */
14499 static void
14500 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14502 unsigned long func_type;
14504 if (TARGET_THUMB1)
14506 thumb1_output_function_prologue (f, frame_size);
14507 return;
14510 /* Sanity check. */
14511 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14513 func_type = arm_current_func_type ();
14515 switch ((int) ARM_FUNC_TYPE (func_type))
14517 default:
14518 case ARM_FT_NORMAL:
14519 break;
14520 case ARM_FT_INTERWORKED:
14521 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14522 break;
14523 case ARM_FT_ISR:
14524 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14525 break;
14526 case ARM_FT_FIQ:
14527 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14528 break;
14529 case ARM_FT_EXCEPTION:
14530 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14531 break;
14534 if (IS_NAKED (func_type))
14535 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14537 if (IS_VOLATILE (func_type))
14538 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14540 if (IS_NESTED (func_type))
14541 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14542 if (IS_STACKALIGN (func_type))
14543 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14545 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14546 crtl->args.size,
14547 crtl->args.pretend_args_size, frame_size);
14549 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14550 frame_pointer_needed,
14551 cfun->machine->uses_anonymous_args);
14553 if (cfun->machine->lr_save_eliminated)
14554 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14556 if (crtl->calls_eh_return)
14557 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14561 const char *
14562 arm_output_epilogue (rtx sibling)
14564 int reg;
14565 unsigned long saved_regs_mask;
14566 unsigned long func_type;
14567 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14568 frame that is $fp + 4 for a non-variadic function. */
14569 int floats_offset = 0;
14570 rtx operands[3];
14571 FILE * f = asm_out_file;
14572 unsigned int lrm_count = 0;
14573 int really_return = (sibling == NULL);
14574 int start_reg;
14575 arm_stack_offsets *offsets;
14577 /* If we have already generated the return instruction
14578 then it is futile to generate anything else. */
14579 if (use_return_insn (FALSE, sibling) &&
14580 (cfun->machine->return_used_this_function != 0))
14581 return "";
14583 func_type = arm_current_func_type ();
14585 if (IS_NAKED (func_type))
14586 /* Naked functions don't have epilogues. */
14587 return "";
14589 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14591 rtx op;
14593 /* A volatile function should never return. Call abort. */
14594 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14595 assemble_external_libcall (op);
14596 output_asm_insn ("bl\t%a0", &op);
14598 return "";
14601 /* If we are throwing an exception, then we really must be doing a
14602 return, so we can't tail-call. */
14603 gcc_assert (!crtl->calls_eh_return || really_return);
14605 offsets = arm_get_frame_offsets ();
14606 saved_regs_mask = offsets->saved_regs_mask;
14608 if (TARGET_IWMMXT)
14609 lrm_count = bit_count (saved_regs_mask);
14611 floats_offset = offsets->saved_args;
14612 /* Compute how far away the floats will be. */
14613 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14614 if (saved_regs_mask & (1 << reg))
14615 floats_offset += 4;
14617 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14619 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14620 int vfp_offset = offsets->frame;
14622 if (TARGET_FPA_EMU2)
14624 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14625 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14627 floats_offset += 12;
14628 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14629 reg, FP_REGNUM, floats_offset - vfp_offset);
14632 else
14634 start_reg = LAST_FPA_REGNUM;
14636 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14638 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14640 floats_offset += 12;
14642 /* We can't unstack more than four registers at once. */
14643 if (start_reg - reg == 3)
14645 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14646 reg, FP_REGNUM, floats_offset - vfp_offset);
14647 start_reg = reg - 1;
14650 else
14652 if (reg != start_reg)
14653 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14654 reg + 1, start_reg - reg,
14655 FP_REGNUM, floats_offset - vfp_offset);
14656 start_reg = reg - 1;
14660 /* Just in case the last register checked also needs unstacking. */
14661 if (reg != start_reg)
14662 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14663 reg + 1, start_reg - reg,
14664 FP_REGNUM, floats_offset - vfp_offset);
14667 if (TARGET_HARD_FLOAT && TARGET_VFP)
14669 int saved_size;
14671 /* The fldmd insns do not have base+offset addressing
14672 modes, so we use IP to hold the address. */
14673 saved_size = arm_get_vfp_saved_size ();
14675 if (saved_size > 0)
14677 floats_offset += saved_size;
14678 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14679 FP_REGNUM, floats_offset - vfp_offset);
14681 start_reg = FIRST_VFP_REGNUM;
14682 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14684 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14685 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14687 if (start_reg != reg)
14688 vfp_output_fldmd (f, IP_REGNUM,
14689 (start_reg - FIRST_VFP_REGNUM) / 2,
14690 (reg - start_reg) / 2);
14691 start_reg = reg + 2;
14694 if (start_reg != reg)
14695 vfp_output_fldmd (f, IP_REGNUM,
14696 (start_reg - FIRST_VFP_REGNUM) / 2,
14697 (reg - start_reg) / 2);
14700 if (TARGET_IWMMXT)
14702 /* The frame pointer is guaranteed to be non-double-word aligned.
14703 This is because it is set to (old_stack_pointer - 4) and the
14704 old_stack_pointer was double word aligned. Thus the offset to
14705 the iWMMXt registers to be loaded must also be non-double-word
14706 sized, so that the resultant address *is* double-word aligned.
14707 We can ignore floats_offset since that was already included in
14708 the live_regs_mask. */
14709 lrm_count += (lrm_count % 2 ? 2 : 1);
14711 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14712 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14714 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14715 reg, FP_REGNUM, lrm_count * 4);
14716 lrm_count += 2;
14720 /* saved_regs_mask should contain the IP, which at the time of stack
14721 frame generation actually contains the old stack pointer. So a
14722 quick way to unwind the stack is just pop the IP register directly
14723 into the stack pointer. */
14724 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14725 saved_regs_mask &= ~ (1 << IP_REGNUM);
14726 saved_regs_mask |= (1 << SP_REGNUM);
14728 /* There are two registers left in saved_regs_mask - LR and PC. We
14729 only need to restore the LR register (the return address), but to
14730 save time we can load it directly into the PC, unless we need a
14731 special function exit sequence, or we are not really returning. */
14732 if (really_return
14733 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14734 && !crtl->calls_eh_return)
14735 /* Delete the LR from the register mask, so that the LR on
14736 the stack is loaded into the PC in the register mask. */
14737 saved_regs_mask &= ~ (1 << LR_REGNUM);
14738 else
14739 saved_regs_mask &= ~ (1 << PC_REGNUM);
14741 /* We must use SP as the base register, because SP is one of the
14742 registers being restored. If an interrupt or page fault
14743 happens in the ldm instruction, the SP might or might not
14744 have been restored. That would be bad, as then SP will no
14745 longer indicate the safe area of stack, and we can get stack
14746 corruption. Using SP as the base register means that it will
14747 be reset correctly to the original value, should an interrupt
14748 occur. If the stack pointer already points at the right
14749 place, then omit the subtraction. */
14750 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14751 || cfun->calls_alloca)
14752 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14753 4 * bit_count (saved_regs_mask));
14754 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14756 if (IS_INTERRUPT (func_type))
14757 /* Interrupt handlers will have pushed the
14758 IP onto the stack, so restore it now. */
14759 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14761 else
14763 /* This branch is executed for ARM mode (non-apcs frames) and
14764 Thumb-2 mode. Frame layout is essentially the same for those
14765 cases, except that in ARM mode frame pointer points to the
14766 first saved register, while in Thumb-2 mode the frame pointer points
14767 to the last saved register.
14769 It is possible to make frame pointer point to last saved
14770 register in both cases, and remove some conditionals below.
14771 That means that fp setup in prologue would be just "mov fp, sp"
14772 and sp restore in epilogue would be just "mov sp, fp", whereas
14773 now we have to use add/sub in those cases. However, the value
14774 of that would be marginal, as both mov and add/sub are 32-bit
14775 in ARM mode, and it would require extra conditionals
14776 in arm_expand_prologue to distingish ARM-apcs-frame case
14777 (where frame pointer is required to point at first register)
14778 and ARM-non-apcs-frame. Therefore, such change is postponed
14779 until real need arise. */
14780 unsigned HOST_WIDE_INT amount;
14781 int rfe;
14782 /* Restore stack pointer if necessary. */
14783 if (TARGET_ARM && frame_pointer_needed)
14785 operands[0] = stack_pointer_rtx;
14786 operands[1] = hard_frame_pointer_rtx;
14788 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14789 output_add_immediate (operands);
14791 else
14793 if (frame_pointer_needed)
14795 /* For Thumb-2 restore sp from the frame pointer.
14796 Operand restrictions mean we have to incrememnt FP, then copy
14797 to SP. */
14798 amount = offsets->locals_base - offsets->saved_regs;
14799 operands[0] = hard_frame_pointer_rtx;
14801 else
14803 unsigned long count;
14804 operands[0] = stack_pointer_rtx;
14805 amount = offsets->outgoing_args - offsets->saved_regs;
14806 /* pop call clobbered registers if it avoids a
14807 separate stack adjustment. */
14808 count = offsets->saved_regs - offsets->saved_args;
14809 if (optimize_size
14810 && count != 0
14811 && !crtl->calls_eh_return
14812 && bit_count(saved_regs_mask) * 4 == count
14813 && !IS_INTERRUPT (func_type)
14814 && !crtl->tail_call_emit)
14816 unsigned long mask;
14817 /* Preserve return values, of any size. */
14818 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14819 mask ^= 0xf;
14820 mask &= ~saved_regs_mask;
14821 reg = 0;
14822 while (bit_count (mask) * 4 > amount)
14824 while ((mask & (1 << reg)) == 0)
14825 reg++;
14826 mask &= ~(1 << reg);
14828 if (bit_count (mask) * 4 == amount) {
14829 amount = 0;
14830 saved_regs_mask |= mask;
14835 if (amount)
14837 operands[1] = operands[0];
14838 operands[2] = GEN_INT (amount);
14839 output_add_immediate (operands);
14841 if (frame_pointer_needed)
14842 asm_fprintf (f, "\tmov\t%r, %r\n",
14843 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14846 if (TARGET_FPA_EMU2)
14848 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14849 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14850 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14851 reg, SP_REGNUM);
14853 else
14855 start_reg = FIRST_FPA_REGNUM;
14857 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14859 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14861 if (reg - start_reg == 3)
14863 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14864 start_reg, SP_REGNUM);
14865 start_reg = reg + 1;
14868 else
14870 if (reg != start_reg)
14871 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14872 start_reg, reg - start_reg,
14873 SP_REGNUM);
14875 start_reg = reg + 1;
14879 /* Just in case the last register checked also needs unstacking. */
14880 if (reg != start_reg)
14881 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14882 start_reg, reg - start_reg, SP_REGNUM);
14885 if (TARGET_HARD_FLOAT && TARGET_VFP)
14887 int end_reg = LAST_VFP_REGNUM + 1;
14889 /* Scan the registers in reverse order. We need to match
14890 any groupings made in the prologue and generate matching
14891 pop operations. */
14892 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14894 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14895 && (!df_regs_ever_live_p (reg + 1)
14896 || call_used_regs[reg + 1]))
14898 if (end_reg > reg + 2)
14899 vfp_output_fldmd (f, SP_REGNUM,
14900 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14901 (end_reg - (reg + 2)) / 2);
14902 end_reg = reg;
14905 if (end_reg > reg + 2)
14906 vfp_output_fldmd (f, SP_REGNUM, 0,
14907 (end_reg - (reg + 2)) / 2);
14910 if (TARGET_IWMMXT)
14911 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14912 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14913 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14915 /* If we can, restore the LR into the PC. */
14916 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14917 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14918 && !IS_STACKALIGN (func_type)
14919 && really_return
14920 && crtl->args.pretend_args_size == 0
14921 && saved_regs_mask & (1 << LR_REGNUM)
14922 && !crtl->calls_eh_return)
14924 saved_regs_mask &= ~ (1 << LR_REGNUM);
14925 saved_regs_mask |= (1 << PC_REGNUM);
14926 rfe = IS_INTERRUPT (func_type);
14928 else
14929 rfe = 0;
14931 /* Load the registers off the stack. If we only have one register
14932 to load use the LDR instruction - it is faster. For Thumb-2
14933 always use pop and the assembler will pick the best instruction.*/
14934 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14935 && !IS_INTERRUPT(func_type))
14937 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14939 else if (saved_regs_mask)
14941 if (saved_regs_mask & (1 << SP_REGNUM))
14942 /* Note - write back to the stack register is not enabled
14943 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14944 in the list of registers and if we add writeback the
14945 instruction becomes UNPREDICTABLE. */
14946 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14947 rfe);
14948 else if (TARGET_ARM)
14949 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14950 rfe);
14951 else
14952 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14955 if (crtl->args.pretend_args_size)
14957 /* Unwind the pre-pushed regs. */
14958 operands[0] = operands[1] = stack_pointer_rtx;
14959 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14960 output_add_immediate (operands);
14964 /* We may have already restored PC directly from the stack. */
14965 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14966 return "";
14968 /* Stack adjustment for exception handler. */
14969 if (crtl->calls_eh_return)
14970 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14971 ARM_EH_STACKADJ_REGNUM);
14973 /* Generate the return instruction. */
14974 switch ((int) ARM_FUNC_TYPE (func_type))
14976 case ARM_FT_ISR:
14977 case ARM_FT_FIQ:
14978 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14979 break;
14981 case ARM_FT_EXCEPTION:
14982 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14983 break;
14985 case ARM_FT_INTERWORKED:
14986 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14987 break;
14989 default:
14990 if (IS_STACKALIGN (func_type))
14992 /* See comment in arm_expand_prologue. */
14993 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14995 if (arm_arch5 || arm_arch4t)
14996 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14997 else
14998 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14999 break;
15002 return "";
15005 static void
15006 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15007 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15009 arm_stack_offsets *offsets;
15011 if (TARGET_THUMB1)
15013 int regno;
15015 /* Emit any call-via-reg trampolines that are needed for v4t support
15016 of call_reg and call_value_reg type insns. */
15017 for (regno = 0; regno < LR_REGNUM; regno++)
15019 rtx label = cfun->machine->call_via[regno];
15021 if (label != NULL)
15023 switch_to_section (function_section (current_function_decl));
15024 targetm.asm_out.internal_label (asm_out_file, "L",
15025 CODE_LABEL_NUMBER (label));
15026 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15030 /* ??? Probably not safe to set this here, since it assumes that a
15031 function will be emitted as assembly immediately after we generate
15032 RTL for it. This does not happen for inline functions. */
15033 cfun->machine->return_used_this_function = 0;
15035 else /* TARGET_32BIT */
15037 /* We need to take into account any stack-frame rounding. */
15038 offsets = arm_get_frame_offsets ();
15040 gcc_assert (!use_return_insn (FALSE, NULL)
15041 || (cfun->machine->return_used_this_function != 0)
15042 || offsets->saved_regs == offsets->outgoing_args
15043 || frame_pointer_needed);
15045 /* Reset the ARM-specific per-function variables. */
15046 after_arm_reorg = 0;
15050 /* Generate and emit an insn that we will recognize as a push_multi.
15051 Unfortunately, since this insn does not reflect very well the actual
15052 semantics of the operation, we need to annotate the insn for the benefit
15053 of DWARF2 frame unwind information. */
15054 static rtx
15055 emit_multi_reg_push (unsigned long mask)
15057 int num_regs = 0;
15058 int num_dwarf_regs;
15059 int i, j;
15060 rtx par;
15061 rtx dwarf;
15062 int dwarf_par_index;
15063 rtx tmp, reg;
15065 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15066 if (mask & (1 << i))
15067 num_regs++;
15069 gcc_assert (num_regs && num_regs <= 16);
15071 /* We don't record the PC in the dwarf frame information. */
15072 num_dwarf_regs = num_regs;
15073 if (mask & (1 << PC_REGNUM))
15074 num_dwarf_regs--;
15076 /* For the body of the insn we are going to generate an UNSPEC in
15077 parallel with several USEs. This allows the insn to be recognized
15078 by the push_multi pattern in the arm.md file.
15080 The body of the insn looks something like this:
15082 (parallel [
15083 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15084 (const_int:SI <num>)))
15085 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15086 (use (reg:SI XX))
15087 (use (reg:SI YY))
15091 For the frame note however, we try to be more explicit and actually
15092 show each register being stored into the stack frame, plus a (single)
15093 decrement of the stack pointer. We do it this way in order to be
15094 friendly to the stack unwinding code, which only wants to see a single
15095 stack decrement per instruction. The RTL we generate for the note looks
15096 something like this:
15098 (sequence [
15099 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15100 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15101 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15102 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15106 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15107 instead we'd have a parallel expression detailing all
15108 the stores to the various memory addresses so that debug
15109 information is more up-to-date. Remember however while writing
15110 this to take care of the constraints with the push instruction.
15112 Note also that this has to be taken care of for the VFP registers.
15114 For more see PR43399. */
15116 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15117 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15118 dwarf_par_index = 1;
15120 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15122 if (mask & (1 << i))
15124 reg = gen_rtx_REG (SImode, i);
15126 XVECEXP (par, 0, 0)
15127 = gen_rtx_SET (VOIDmode,
15128 gen_frame_mem
15129 (BLKmode,
15130 gen_rtx_PRE_MODIFY (Pmode,
15131 stack_pointer_rtx,
15132 plus_constant
15133 (stack_pointer_rtx,
15134 -4 * num_regs))
15136 gen_rtx_UNSPEC (BLKmode,
15137 gen_rtvec (1, reg),
15138 UNSPEC_PUSH_MULT));
15140 if (i != PC_REGNUM)
15142 tmp = gen_rtx_SET (VOIDmode,
15143 gen_frame_mem (SImode, stack_pointer_rtx),
15144 reg);
15145 RTX_FRAME_RELATED_P (tmp) = 1;
15146 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15147 dwarf_par_index++;
15150 break;
15154 for (j = 1, i++; j < num_regs; i++)
15156 if (mask & (1 << i))
15158 reg = gen_rtx_REG (SImode, i);
15160 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15162 if (i != PC_REGNUM)
15165 = gen_rtx_SET (VOIDmode,
15166 gen_frame_mem
15167 (SImode,
15168 plus_constant (stack_pointer_rtx,
15169 4 * j)),
15170 reg);
15171 RTX_FRAME_RELATED_P (tmp) = 1;
15172 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15175 j++;
15179 par = emit_insn (par);
15181 tmp = gen_rtx_SET (VOIDmode,
15182 stack_pointer_rtx,
15183 plus_constant (stack_pointer_rtx, -4 * num_regs));
15184 RTX_FRAME_RELATED_P (tmp) = 1;
15185 XVECEXP (dwarf, 0, 0) = tmp;
15187 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15189 return par;
15192 /* Calculate the size of the return value that is passed in registers. */
15193 static unsigned
15194 arm_size_return_regs (void)
15196 enum machine_mode mode;
15198 if (crtl->return_rtx != 0)
15199 mode = GET_MODE (crtl->return_rtx);
15200 else
15201 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15203 return GET_MODE_SIZE (mode);
15206 static rtx
15207 emit_sfm (int base_reg, int count)
15209 rtx par;
15210 rtx dwarf;
15211 rtx tmp, reg;
15212 int i;
15214 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15215 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15217 reg = gen_rtx_REG (XFmode, base_reg++);
15219 XVECEXP (par, 0, 0)
15220 = gen_rtx_SET (VOIDmode,
15221 gen_frame_mem
15222 (BLKmode,
15223 gen_rtx_PRE_MODIFY (Pmode,
15224 stack_pointer_rtx,
15225 plus_constant
15226 (stack_pointer_rtx,
15227 -12 * count))
15229 gen_rtx_UNSPEC (BLKmode,
15230 gen_rtvec (1, reg),
15231 UNSPEC_PUSH_MULT));
15232 tmp = gen_rtx_SET (VOIDmode,
15233 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15234 RTX_FRAME_RELATED_P (tmp) = 1;
15235 XVECEXP (dwarf, 0, 1) = tmp;
15237 for (i = 1; i < count; i++)
15239 reg = gen_rtx_REG (XFmode, base_reg++);
15240 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15242 tmp = gen_rtx_SET (VOIDmode,
15243 gen_frame_mem (XFmode,
15244 plus_constant (stack_pointer_rtx,
15245 i * 12)),
15246 reg);
15247 RTX_FRAME_RELATED_P (tmp) = 1;
15248 XVECEXP (dwarf, 0, i + 1) = tmp;
15251 tmp = gen_rtx_SET (VOIDmode,
15252 stack_pointer_rtx,
15253 plus_constant (stack_pointer_rtx, -12 * count));
15255 RTX_FRAME_RELATED_P (tmp) = 1;
15256 XVECEXP (dwarf, 0, 0) = tmp;
15258 par = emit_insn (par);
15259 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15261 return par;
15265 /* Return true if the current function needs to save/restore LR. */
15267 static bool
15268 thumb_force_lr_save (void)
15270 return !cfun->machine->lr_save_eliminated
15271 && (!leaf_function_p ()
15272 || thumb_far_jump_used_p ()
15273 || df_regs_ever_live_p (LR_REGNUM));
15277 /* Return true if r3 is used by any of the tail call insns in the
15278 current function. */
15280 static bool
15281 any_sibcall_uses_r3 (void)
15283 edge_iterator ei;
15284 edge e;
15286 if (!crtl->tail_call_emit)
15287 return false;
15288 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15289 if (e->flags & EDGE_SIBCALL)
15291 rtx call = BB_END (e->src);
15292 if (!CALL_P (call))
15293 call = prev_nonnote_nondebug_insn (call);
15294 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15295 if (find_regno_fusage (call, USE, 3))
15296 return true;
15298 return false;
15302 /* Compute the distance from register FROM to register TO.
15303 These can be the arg pointer (26), the soft frame pointer (25),
15304 the stack pointer (13) or the hard frame pointer (11).
15305 In thumb mode r7 is used as the soft frame pointer, if needed.
15306 Typical stack layout looks like this:
15308 old stack pointer -> | |
15309 ----
15310 | | \
15311 | | saved arguments for
15312 | | vararg functions
15313 | | /
15315 hard FP & arg pointer -> | | \
15316 | | stack
15317 | | frame
15318 | | /
15320 | | \
15321 | | call saved
15322 | | registers
15323 soft frame pointer -> | | /
15325 | | \
15326 | | local
15327 | | variables
15328 locals base pointer -> | | /
15330 | | \
15331 | | outgoing
15332 | | arguments
15333 current stack pointer -> | | /
15336 For a given function some or all of these stack components
15337 may not be needed, giving rise to the possibility of
15338 eliminating some of the registers.
15340 The values returned by this function must reflect the behavior
15341 of arm_expand_prologue() and arm_compute_save_reg_mask().
15343 The sign of the number returned reflects the direction of stack
15344 growth, so the values are positive for all eliminations except
15345 from the soft frame pointer to the hard frame pointer.
15347 SFP may point just inside the local variables block to ensure correct
15348 alignment. */
15351 /* Calculate stack offsets. These are used to calculate register elimination
15352 offsets and in prologue/epilogue code. Also calculates which registers
15353 should be saved. */
15355 static arm_stack_offsets *
15356 arm_get_frame_offsets (void)
15358 struct arm_stack_offsets *offsets;
15359 unsigned long func_type;
15360 int leaf;
15361 int saved;
15362 int core_saved;
15363 HOST_WIDE_INT frame_size;
15364 int i;
15366 offsets = &cfun->machine->stack_offsets;
15368 /* We need to know if we are a leaf function. Unfortunately, it
15369 is possible to be called after start_sequence has been called,
15370 which causes get_insns to return the insns for the sequence,
15371 not the function, which will cause leaf_function_p to return
15372 the incorrect result.
15374 to know about leaf functions once reload has completed, and the
15375 frame size cannot be changed after that time, so we can safely
15376 use the cached value. */
15378 if (reload_completed)
15379 return offsets;
15381 /* Initially this is the size of the local variables. It will translated
15382 into an offset once we have determined the size of preceding data. */
15383 frame_size = ROUND_UP_WORD (get_frame_size ());
15385 leaf = leaf_function_p ();
15387 /* Space for variadic functions. */
15388 offsets->saved_args = crtl->args.pretend_args_size;
15390 /* In Thumb mode this is incorrect, but never used. */
15391 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15392 arm_compute_static_chain_stack_bytes();
15394 if (TARGET_32BIT)
15396 unsigned int regno;
15398 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15399 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15400 saved = core_saved;
15402 /* We know that SP will be doubleword aligned on entry, and we must
15403 preserve that condition at any subroutine call. We also require the
15404 soft frame pointer to be doubleword aligned. */
15406 if (TARGET_REALLY_IWMMXT)
15408 /* Check for the call-saved iWMMXt registers. */
15409 for (regno = FIRST_IWMMXT_REGNUM;
15410 regno <= LAST_IWMMXT_REGNUM;
15411 regno++)
15412 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15413 saved += 8;
15416 func_type = arm_current_func_type ();
15417 if (! IS_VOLATILE (func_type))
15419 /* Space for saved FPA registers. */
15420 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15421 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15422 saved += 12;
15424 /* Space for saved VFP registers. */
15425 if (TARGET_HARD_FLOAT && TARGET_VFP)
15426 saved += arm_get_vfp_saved_size ();
15429 else /* TARGET_THUMB1 */
15431 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15432 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15433 saved = core_saved;
15434 if (TARGET_BACKTRACE)
15435 saved += 16;
15438 /* Saved registers include the stack frame. */
15439 offsets->saved_regs = offsets->saved_args + saved +
15440 arm_compute_static_chain_stack_bytes();
15441 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15442 /* A leaf function does not need any stack alignment if it has nothing
15443 on the stack. */
15444 if (leaf && frame_size == 0
15445 /* However if it calls alloca(), we have a dynamically allocated
15446 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15447 && ! cfun->calls_alloca)
15449 offsets->outgoing_args = offsets->soft_frame;
15450 offsets->locals_base = offsets->soft_frame;
15451 return offsets;
15454 /* Ensure SFP has the correct alignment. */
15455 if (ARM_DOUBLEWORD_ALIGN
15456 && (offsets->soft_frame & 7))
15458 offsets->soft_frame += 4;
15459 /* Try to align stack by pushing an extra reg. Don't bother doing this
15460 when there is a stack frame as the alignment will be rolled into
15461 the normal stack adjustment. */
15462 if (frame_size + crtl->outgoing_args_size == 0)
15464 int reg = -1;
15466 /* If it is safe to use r3, then do so. This sometimes
15467 generates better code on Thumb-2 by avoiding the need to
15468 use 32-bit push/pop instructions. */
15469 if (! any_sibcall_uses_r3 ()
15470 && arm_size_return_regs () <= 12
15471 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15473 reg = 3;
15475 else
15476 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15478 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15480 reg = i;
15481 break;
15485 if (reg != -1)
15487 offsets->saved_regs += 4;
15488 offsets->saved_regs_mask |= (1 << reg);
15493 offsets->locals_base = offsets->soft_frame + frame_size;
15494 offsets->outgoing_args = (offsets->locals_base
15495 + crtl->outgoing_args_size);
15497 if (ARM_DOUBLEWORD_ALIGN)
15499 /* Ensure SP remains doubleword aligned. */
15500 if (offsets->outgoing_args & 7)
15501 offsets->outgoing_args += 4;
15502 gcc_assert (!(offsets->outgoing_args & 7));
15505 return offsets;
15509 /* Calculate the relative offsets for the different stack pointers. Positive
15510 offsets are in the direction of stack growth. */
15512 HOST_WIDE_INT
15513 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15515 arm_stack_offsets *offsets;
15517 offsets = arm_get_frame_offsets ();
15519 /* OK, now we have enough information to compute the distances.
15520 There must be an entry in these switch tables for each pair
15521 of registers in ELIMINABLE_REGS, even if some of the entries
15522 seem to be redundant or useless. */
15523 switch (from)
15525 case ARG_POINTER_REGNUM:
15526 switch (to)
15528 case THUMB_HARD_FRAME_POINTER_REGNUM:
15529 return 0;
15531 case FRAME_POINTER_REGNUM:
15532 /* This is the reverse of the soft frame pointer
15533 to hard frame pointer elimination below. */
15534 return offsets->soft_frame - offsets->saved_args;
15536 case ARM_HARD_FRAME_POINTER_REGNUM:
15537 /* This is only non-zero in the case where the static chain register
15538 is stored above the frame. */
15539 return offsets->frame - offsets->saved_args - 4;
15541 case STACK_POINTER_REGNUM:
15542 /* If nothing has been pushed on the stack at all
15543 then this will return -4. This *is* correct! */
15544 return offsets->outgoing_args - (offsets->saved_args + 4);
15546 default:
15547 gcc_unreachable ();
15549 gcc_unreachable ();
15551 case FRAME_POINTER_REGNUM:
15552 switch (to)
15554 case THUMB_HARD_FRAME_POINTER_REGNUM:
15555 return 0;
15557 case ARM_HARD_FRAME_POINTER_REGNUM:
15558 /* The hard frame pointer points to the top entry in the
15559 stack frame. The soft frame pointer to the bottom entry
15560 in the stack frame. If there is no stack frame at all,
15561 then they are identical. */
15563 return offsets->frame - offsets->soft_frame;
15565 case STACK_POINTER_REGNUM:
15566 return offsets->outgoing_args - offsets->soft_frame;
15568 default:
15569 gcc_unreachable ();
15571 gcc_unreachable ();
15573 default:
15574 /* You cannot eliminate from the stack pointer.
15575 In theory you could eliminate from the hard frame
15576 pointer to the stack pointer, but this will never
15577 happen, since if a stack frame is not needed the
15578 hard frame pointer will never be used. */
15579 gcc_unreachable ();
15583 /* Given FROM and TO register numbers, say whether this elimination is
15584 allowed. Frame pointer elimination is automatically handled.
15586 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15587 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15588 pointer, we must eliminate FRAME_POINTER_REGNUM into
15589 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15590 ARG_POINTER_REGNUM. */
15592 bool
15593 arm_can_eliminate (const int from, const int to)
15595 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15596 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15597 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15598 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15599 true);
15602 /* Emit RTL to save coprocessor registers on function entry. Returns the
15603 number of bytes pushed. */
15605 static int
15606 arm_save_coproc_regs(void)
15608 int saved_size = 0;
15609 unsigned reg;
15610 unsigned start_reg;
15611 rtx insn;
15613 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15614 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15616 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15617 insn = gen_rtx_MEM (V2SImode, insn);
15618 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15619 RTX_FRAME_RELATED_P (insn) = 1;
15620 saved_size += 8;
15623 /* Save any floating point call-saved registers used by this
15624 function. */
15625 if (TARGET_FPA_EMU2)
15627 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15628 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15630 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15631 insn = gen_rtx_MEM (XFmode, insn);
15632 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15633 RTX_FRAME_RELATED_P (insn) = 1;
15634 saved_size += 12;
15637 else
15639 start_reg = LAST_FPA_REGNUM;
15641 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15643 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15645 if (start_reg - reg == 3)
15647 insn = emit_sfm (reg, 4);
15648 RTX_FRAME_RELATED_P (insn) = 1;
15649 saved_size += 48;
15650 start_reg = reg - 1;
15653 else
15655 if (start_reg != reg)
15657 insn = emit_sfm (reg + 1, start_reg - reg);
15658 RTX_FRAME_RELATED_P (insn) = 1;
15659 saved_size += (start_reg - reg) * 12;
15661 start_reg = reg - 1;
15665 if (start_reg != reg)
15667 insn = emit_sfm (reg + 1, start_reg - reg);
15668 saved_size += (start_reg - reg) * 12;
15669 RTX_FRAME_RELATED_P (insn) = 1;
15672 if (TARGET_HARD_FLOAT && TARGET_VFP)
15674 start_reg = FIRST_VFP_REGNUM;
15676 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15678 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15679 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15681 if (start_reg != reg)
15682 saved_size += vfp_emit_fstmd (start_reg,
15683 (reg - start_reg) / 2);
15684 start_reg = reg + 2;
15687 if (start_reg != reg)
15688 saved_size += vfp_emit_fstmd (start_reg,
15689 (reg - start_reg) / 2);
15691 return saved_size;
15695 /* Set the Thumb frame pointer from the stack pointer. */
15697 static void
15698 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15700 HOST_WIDE_INT amount;
15701 rtx insn, dwarf;
15703 amount = offsets->outgoing_args - offsets->locals_base;
15704 if (amount < 1024)
15705 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15706 stack_pointer_rtx, GEN_INT (amount)));
15707 else
15709 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15710 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15711 expects the first two operands to be the same. */
15712 if (TARGET_THUMB2)
15714 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15715 stack_pointer_rtx,
15716 hard_frame_pointer_rtx));
15718 else
15720 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15721 hard_frame_pointer_rtx,
15722 stack_pointer_rtx));
15724 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15725 plus_constant (stack_pointer_rtx, amount));
15726 RTX_FRAME_RELATED_P (dwarf) = 1;
15727 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15730 RTX_FRAME_RELATED_P (insn) = 1;
15733 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15734 function. */
15735 void
15736 arm_expand_prologue (void)
15738 rtx amount;
15739 rtx insn;
15740 rtx ip_rtx;
15741 unsigned long live_regs_mask;
15742 unsigned long func_type;
15743 int fp_offset = 0;
15744 int saved_pretend_args = 0;
15745 int saved_regs = 0;
15746 unsigned HOST_WIDE_INT args_to_push;
15747 arm_stack_offsets *offsets;
15749 func_type = arm_current_func_type ();
15751 /* Naked functions don't have prologues. */
15752 if (IS_NAKED (func_type))
15753 return;
15755 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15756 args_to_push = crtl->args.pretend_args_size;
15758 /* Compute which register we will have to save onto the stack. */
15759 offsets = arm_get_frame_offsets ();
15760 live_regs_mask = offsets->saved_regs_mask;
15762 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15764 if (IS_STACKALIGN (func_type))
15766 rtx dwarf;
15767 rtx r0;
15768 rtx r1;
15769 /* Handle a word-aligned stack pointer. We generate the following:
15771 mov r0, sp
15772 bic r1, r0, #7
15773 mov sp, r1
15774 <save and restore r0 in normal prologue/epilogue>
15775 mov sp, r0
15776 bx lr
15778 The unwinder doesn't need to know about the stack realignment.
15779 Just tell it we saved SP in r0. */
15780 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15782 r0 = gen_rtx_REG (SImode, 0);
15783 r1 = gen_rtx_REG (SImode, 1);
15784 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15785 compiler won't choke. */
15786 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15787 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15788 insn = gen_movsi (r0, stack_pointer_rtx);
15789 RTX_FRAME_RELATED_P (insn) = 1;
15790 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15791 emit_insn (insn);
15792 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15793 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15796 /* For APCS frames, if IP register is clobbered
15797 when creating frame, save that register in a special
15798 way. */
15799 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15801 if (IS_INTERRUPT (func_type))
15803 /* Interrupt functions must not corrupt any registers.
15804 Creating a frame pointer however, corrupts the IP
15805 register, so we must push it first. */
15806 insn = emit_multi_reg_push (1 << IP_REGNUM);
15808 /* Do not set RTX_FRAME_RELATED_P on this insn.
15809 The dwarf stack unwinding code only wants to see one
15810 stack decrement per function, and this is not it. If
15811 this instruction is labeled as being part of the frame
15812 creation sequence then dwarf2out_frame_debug_expr will
15813 die when it encounters the assignment of IP to FP
15814 later on, since the use of SP here establishes SP as
15815 the CFA register and not IP.
15817 Anyway this instruction is not really part of the stack
15818 frame creation although it is part of the prologue. */
15820 else if (IS_NESTED (func_type))
15822 /* The Static chain register is the same as the IP register
15823 used as a scratch register during stack frame creation.
15824 To get around this need to find somewhere to store IP
15825 whilst the frame is being created. We try the following
15826 places in order:
15828 1. The last argument register.
15829 2. A slot on the stack above the frame. (This only
15830 works if the function is not a varargs function).
15831 3. Register r3, after pushing the argument registers
15832 onto the stack.
15834 Note - we only need to tell the dwarf2 backend about the SP
15835 adjustment in the second variant; the static chain register
15836 doesn't need to be unwound, as it doesn't contain a value
15837 inherited from the caller. */
15839 if (df_regs_ever_live_p (3) == false)
15840 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15841 else if (args_to_push == 0)
15843 rtx dwarf;
15845 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15846 saved_regs += 4;
15848 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15849 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15850 fp_offset = 4;
15852 /* Just tell the dwarf backend that we adjusted SP. */
15853 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15854 plus_constant (stack_pointer_rtx,
15855 -fp_offset));
15856 RTX_FRAME_RELATED_P (insn) = 1;
15857 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15859 else
15861 /* Store the args on the stack. */
15862 if (cfun->machine->uses_anonymous_args)
15863 insn = emit_multi_reg_push
15864 ((0xf0 >> (args_to_push / 4)) & 0xf);
15865 else
15866 insn = emit_insn
15867 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15868 GEN_INT (- args_to_push)));
15870 RTX_FRAME_RELATED_P (insn) = 1;
15872 saved_pretend_args = 1;
15873 fp_offset = args_to_push;
15874 args_to_push = 0;
15876 /* Now reuse r3 to preserve IP. */
15877 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15881 insn = emit_set_insn (ip_rtx,
15882 plus_constant (stack_pointer_rtx, fp_offset));
15883 RTX_FRAME_RELATED_P (insn) = 1;
15886 if (args_to_push)
15888 /* Push the argument registers, or reserve space for them. */
15889 if (cfun->machine->uses_anonymous_args)
15890 insn = emit_multi_reg_push
15891 ((0xf0 >> (args_to_push / 4)) & 0xf);
15892 else
15893 insn = emit_insn
15894 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15895 GEN_INT (- args_to_push)));
15896 RTX_FRAME_RELATED_P (insn) = 1;
15899 /* If this is an interrupt service routine, and the link register
15900 is going to be pushed, and we're not generating extra
15901 push of IP (needed when frame is needed and frame layout if apcs),
15902 subtracting four from LR now will mean that the function return
15903 can be done with a single instruction. */
15904 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15905 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15906 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15907 && TARGET_ARM)
15909 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15911 emit_set_insn (lr, plus_constant (lr, -4));
15914 if (live_regs_mask)
15916 saved_regs += bit_count (live_regs_mask) * 4;
15917 if (optimize_size && !frame_pointer_needed
15918 && saved_regs == offsets->saved_regs - offsets->saved_args)
15920 /* If no coprocessor registers are being pushed and we don't have
15921 to worry about a frame pointer then push extra registers to
15922 create the stack frame. This is done is a way that does not
15923 alter the frame layout, so is independent of the epilogue. */
15924 int n;
15925 int frame;
15926 n = 0;
15927 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15928 n++;
15929 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15930 if (frame && n * 4 >= frame)
15932 n = frame / 4;
15933 live_regs_mask |= (1 << n) - 1;
15934 saved_regs += frame;
15937 insn = emit_multi_reg_push (live_regs_mask);
15938 RTX_FRAME_RELATED_P (insn) = 1;
15941 if (! IS_VOLATILE (func_type))
15942 saved_regs += arm_save_coproc_regs ();
15944 if (frame_pointer_needed && TARGET_ARM)
15946 /* Create the new frame pointer. */
15947 if (TARGET_APCS_FRAME)
15949 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15950 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15951 RTX_FRAME_RELATED_P (insn) = 1;
15953 if (IS_NESTED (func_type))
15955 /* Recover the static chain register. */
15956 if (!df_regs_ever_live_p (3)
15957 || saved_pretend_args)
15958 insn = gen_rtx_REG (SImode, 3);
15959 else /* if (crtl->args.pretend_args_size == 0) */
15961 insn = plus_constant (hard_frame_pointer_rtx, 4);
15962 insn = gen_frame_mem (SImode, insn);
15964 emit_set_insn (ip_rtx, insn);
15965 /* Add a USE to stop propagate_one_insn() from barfing. */
15966 emit_insn (gen_prologue_use (ip_rtx));
15969 else
15971 insn = GEN_INT (saved_regs - 4);
15972 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15973 stack_pointer_rtx, insn));
15974 RTX_FRAME_RELATED_P (insn) = 1;
15978 if (flag_stack_usage)
15979 current_function_static_stack_size
15980 = offsets->outgoing_args - offsets->saved_args;
15982 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15984 /* This add can produce multiple insns for a large constant, so we
15985 need to get tricky. */
15986 rtx last = get_last_insn ();
15988 amount = GEN_INT (offsets->saved_args + saved_regs
15989 - offsets->outgoing_args);
15991 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15992 amount));
15995 last = last ? NEXT_INSN (last) : get_insns ();
15996 RTX_FRAME_RELATED_P (last) = 1;
15998 while (last != insn);
16000 /* If the frame pointer is needed, emit a special barrier that
16001 will prevent the scheduler from moving stores to the frame
16002 before the stack adjustment. */
16003 if (frame_pointer_needed)
16004 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16005 hard_frame_pointer_rtx));
16009 if (frame_pointer_needed && TARGET_THUMB2)
16010 thumb_set_frame_pointer (offsets);
16012 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16014 unsigned long mask;
16016 mask = live_regs_mask;
16017 mask &= THUMB2_WORK_REGS;
16018 if (!IS_NESTED (func_type))
16019 mask |= (1 << IP_REGNUM);
16020 arm_load_pic_register (mask);
16023 /* If we are profiling, make sure no instructions are scheduled before
16024 the call to mcount. Similarly if the user has requested no
16025 scheduling in the prolog. Similarly if we want non-call exceptions
16026 using the EABI unwinder, to prevent faulting instructions from being
16027 swapped with a stack adjustment. */
16028 if (crtl->profile || !TARGET_SCHED_PROLOG
16029 || (arm_except_unwind_info (&global_options) == UI_TARGET
16030 && cfun->can_throw_non_call_exceptions))
16031 emit_insn (gen_blockage ());
16033 /* If the link register is being kept alive, with the return address in it,
16034 then make sure that it does not get reused by the ce2 pass. */
16035 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16036 cfun->machine->lr_save_eliminated = 1;
16039 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16040 static void
16041 arm_print_condition (FILE *stream)
16043 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16045 /* Branch conversion is not implemented for Thumb-2. */
16046 if (TARGET_THUMB)
16048 output_operand_lossage ("predicated Thumb instruction");
16049 return;
16051 if (current_insn_predicate != NULL)
16053 output_operand_lossage
16054 ("predicated instruction in conditional sequence");
16055 return;
16058 fputs (arm_condition_codes[arm_current_cc], stream);
16060 else if (current_insn_predicate)
16062 enum arm_cond_code code;
16064 if (TARGET_THUMB1)
16066 output_operand_lossage ("predicated Thumb instruction");
16067 return;
16070 code = get_arm_condition_code (current_insn_predicate);
16071 fputs (arm_condition_codes[code], stream);
16076 /* If CODE is 'd', then the X is a condition operand and the instruction
16077 should only be executed if the condition is true.
16078 if CODE is 'D', then the X is a condition operand and the instruction
16079 should only be executed if the condition is false: however, if the mode
16080 of the comparison is CCFPEmode, then always execute the instruction -- we
16081 do this because in these circumstances !GE does not necessarily imply LT;
16082 in these cases the instruction pattern will take care to make sure that
16083 an instruction containing %d will follow, thereby undoing the effects of
16084 doing this instruction unconditionally.
16085 If CODE is 'N' then X is a floating point operand that must be negated
16086 before output.
16087 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16088 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16089 static void
16090 arm_print_operand (FILE *stream, rtx x, int code)
16092 switch (code)
16094 case '@':
16095 fputs (ASM_COMMENT_START, stream);
16096 return;
16098 case '_':
16099 fputs (user_label_prefix, stream);
16100 return;
16102 case '|':
16103 fputs (REGISTER_PREFIX, stream);
16104 return;
16106 case '?':
16107 arm_print_condition (stream);
16108 return;
16110 case '(':
16111 /* Nothing in unified syntax, otherwise the current condition code. */
16112 if (!TARGET_UNIFIED_ASM)
16113 arm_print_condition (stream);
16114 break;
16116 case ')':
16117 /* The current condition code in unified syntax, otherwise nothing. */
16118 if (TARGET_UNIFIED_ASM)
16119 arm_print_condition (stream);
16120 break;
16122 case '.':
16123 /* The current condition code for a condition code setting instruction.
16124 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16125 if (TARGET_UNIFIED_ASM)
16127 fputc('s', stream);
16128 arm_print_condition (stream);
16130 else
16132 arm_print_condition (stream);
16133 fputc('s', stream);
16135 return;
16137 case '!':
16138 /* If the instruction is conditionally executed then print
16139 the current condition code, otherwise print 's'. */
16140 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16141 if (current_insn_predicate)
16142 arm_print_condition (stream);
16143 else
16144 fputc('s', stream);
16145 break;
16147 /* %# is a "break" sequence. It doesn't output anything, but is used to
16148 separate e.g. operand numbers from following text, if that text consists
16149 of further digits which we don't want to be part of the operand
16150 number. */
16151 case '#':
16152 return;
16154 case 'N':
16156 REAL_VALUE_TYPE r;
16157 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16158 r = real_value_negate (&r);
16159 fprintf (stream, "%s", fp_const_from_val (&r));
16161 return;
16163 /* An integer or symbol address without a preceding # sign. */
16164 case 'c':
16165 switch (GET_CODE (x))
16167 case CONST_INT:
16168 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16169 break;
16171 case SYMBOL_REF:
16172 output_addr_const (stream, x);
16173 break;
16175 default:
16176 gcc_unreachable ();
16178 return;
16180 case 'B':
16181 if (GET_CODE (x) == CONST_INT)
16183 HOST_WIDE_INT val;
16184 val = ARM_SIGN_EXTEND (~INTVAL (x));
16185 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16187 else
16189 putc ('~', stream);
16190 output_addr_const (stream, x);
16192 return;
16194 case 'L':
16195 /* The low 16 bits of an immediate constant. */
16196 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16197 return;
16199 case 'i':
16200 fprintf (stream, "%s", arithmetic_instr (x, 1));
16201 return;
16203 /* Truncate Cirrus shift counts. */
16204 case 's':
16205 if (GET_CODE (x) == CONST_INT)
16207 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16208 return;
16210 arm_print_operand (stream, x, 0);
16211 return;
16213 case 'I':
16214 fprintf (stream, "%s", arithmetic_instr (x, 0));
16215 return;
16217 case 'S':
16219 HOST_WIDE_INT val;
16220 const char *shift;
16222 if (!shift_operator (x, SImode))
16224 output_operand_lossage ("invalid shift operand");
16225 break;
16228 shift = shift_op (x, &val);
16230 if (shift)
16232 fprintf (stream, ", %s ", shift);
16233 if (val == -1)
16234 arm_print_operand (stream, XEXP (x, 1), 0);
16235 else
16236 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16239 return;
16241 /* An explanation of the 'Q', 'R' and 'H' register operands:
16243 In a pair of registers containing a DI or DF value the 'Q'
16244 operand returns the register number of the register containing
16245 the least significant part of the value. The 'R' operand returns
16246 the register number of the register containing the most
16247 significant part of the value.
16249 The 'H' operand returns the higher of the two register numbers.
16250 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16251 same as the 'Q' operand, since the most significant part of the
16252 value is held in the lower number register. The reverse is true
16253 on systems where WORDS_BIG_ENDIAN is false.
16255 The purpose of these operands is to distinguish between cases
16256 where the endian-ness of the values is important (for example
16257 when they are added together), and cases where the endian-ness
16258 is irrelevant, but the order of register operations is important.
16259 For example when loading a value from memory into a register
16260 pair, the endian-ness does not matter. Provided that the value
16261 from the lower memory address is put into the lower numbered
16262 register, and the value from the higher address is put into the
16263 higher numbered register, the load will work regardless of whether
16264 the value being loaded is big-wordian or little-wordian. The
16265 order of the two register loads can matter however, if the address
16266 of the memory location is actually held in one of the registers
16267 being overwritten by the load.
16269 The 'Q' and 'R' constraints are also available for 64-bit
16270 constants. */
16271 case 'Q':
16272 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16274 rtx part = gen_lowpart (SImode, x);
16275 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16276 return;
16279 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16281 output_operand_lossage ("invalid operand for code '%c'", code);
16282 return;
16285 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16286 return;
16288 case 'R':
16289 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16291 enum machine_mode mode = GET_MODE (x);
16292 rtx part;
16294 if (mode == VOIDmode)
16295 mode = DImode;
16296 part = gen_highpart_mode (SImode, mode, x);
16297 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16298 return;
16301 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16303 output_operand_lossage ("invalid operand for code '%c'", code);
16304 return;
16307 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16308 return;
16310 case 'H':
16311 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16313 output_operand_lossage ("invalid operand for code '%c'", code);
16314 return;
16317 asm_fprintf (stream, "%r", REGNO (x) + 1);
16318 return;
16320 case 'J':
16321 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16323 output_operand_lossage ("invalid operand for code '%c'", code);
16324 return;
16327 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16328 return;
16330 case 'K':
16331 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16333 output_operand_lossage ("invalid operand for code '%c'", code);
16334 return;
16337 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16338 return;
16340 case 'm':
16341 asm_fprintf (stream, "%r",
16342 GET_CODE (XEXP (x, 0)) == REG
16343 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16344 return;
16346 case 'M':
16347 asm_fprintf (stream, "{%r-%r}",
16348 REGNO (x),
16349 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16350 return;
16352 /* Like 'M', but writing doubleword vector registers, for use by Neon
16353 insns. */
16354 case 'h':
16356 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16357 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16358 if (numregs == 1)
16359 asm_fprintf (stream, "{d%d}", regno);
16360 else
16361 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16363 return;
16365 case 'd':
16366 /* CONST_TRUE_RTX means always -- that's the default. */
16367 if (x == const_true_rtx)
16368 return;
16370 if (!COMPARISON_P (x))
16372 output_operand_lossage ("invalid operand for code '%c'", code);
16373 return;
16376 fputs (arm_condition_codes[get_arm_condition_code (x)],
16377 stream);
16378 return;
16380 case 'D':
16381 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16382 want to do that. */
16383 if (x == const_true_rtx)
16385 output_operand_lossage ("instruction never executed");
16386 return;
16388 if (!COMPARISON_P (x))
16390 output_operand_lossage ("invalid operand for code '%c'", code);
16391 return;
16394 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16395 (get_arm_condition_code (x))],
16396 stream);
16397 return;
16399 /* Cirrus registers can be accessed in a variety of ways:
16400 single floating point (f)
16401 double floating point (d)
16402 32bit integer (fx)
16403 64bit integer (dx). */
16404 case 'W': /* Cirrus register in F mode. */
16405 case 'X': /* Cirrus register in D mode. */
16406 case 'Y': /* Cirrus register in FX mode. */
16407 case 'Z': /* Cirrus register in DX mode. */
16408 gcc_assert (GET_CODE (x) == REG
16409 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16411 fprintf (stream, "mv%s%s",
16412 code == 'W' ? "f"
16413 : code == 'X' ? "d"
16414 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16416 return;
16418 /* Print cirrus register in the mode specified by the register's mode. */
16419 case 'V':
16421 int mode = GET_MODE (x);
16423 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16425 output_operand_lossage ("invalid operand for code '%c'", code);
16426 return;
16429 fprintf (stream, "mv%s%s",
16430 mode == DFmode ? "d"
16431 : mode == SImode ? "fx"
16432 : mode == DImode ? "dx"
16433 : "f", reg_names[REGNO (x)] + 2);
16435 return;
16438 case 'U':
16439 if (GET_CODE (x) != REG
16440 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16441 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16442 /* Bad value for wCG register number. */
16444 output_operand_lossage ("invalid operand for code '%c'", code);
16445 return;
16448 else
16449 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16450 return;
16452 /* Print an iWMMXt control register name. */
16453 case 'w':
16454 if (GET_CODE (x) != CONST_INT
16455 || INTVAL (x) < 0
16456 || INTVAL (x) >= 16)
16457 /* Bad value for wC register number. */
16459 output_operand_lossage ("invalid operand for code '%c'", code);
16460 return;
16463 else
16465 static const char * wc_reg_names [16] =
16467 "wCID", "wCon", "wCSSF", "wCASF",
16468 "wC4", "wC5", "wC6", "wC7",
16469 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16470 "wC12", "wC13", "wC14", "wC15"
16473 fprintf (stream, wc_reg_names [INTVAL (x)]);
16475 return;
16477 /* Print the high single-precision register of a VFP double-precision
16478 register. */
16479 case 'p':
16481 int mode = GET_MODE (x);
16482 int regno;
16484 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16486 output_operand_lossage ("invalid operand for code '%c'", code);
16487 return;
16490 regno = REGNO (x);
16491 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16493 output_operand_lossage ("invalid operand for code '%c'", code);
16494 return;
16497 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16499 return;
16501 /* Print a VFP/Neon double precision or quad precision register name. */
16502 case 'P':
16503 case 'q':
16505 int mode = GET_MODE (x);
16506 int is_quad = (code == 'q');
16507 int regno;
16509 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16511 output_operand_lossage ("invalid operand for code '%c'", code);
16512 return;
16515 if (GET_CODE (x) != REG
16516 || !IS_VFP_REGNUM (REGNO (x)))
16518 output_operand_lossage ("invalid operand for code '%c'", code);
16519 return;
16522 regno = REGNO (x);
16523 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16524 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16526 output_operand_lossage ("invalid operand for code '%c'", code);
16527 return;
16530 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16531 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16533 return;
16535 /* These two codes print the low/high doubleword register of a Neon quad
16536 register, respectively. For pair-structure types, can also print
16537 low/high quadword registers. */
16538 case 'e':
16539 case 'f':
16541 int mode = GET_MODE (x);
16542 int regno;
16544 if ((GET_MODE_SIZE (mode) != 16
16545 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16547 output_operand_lossage ("invalid operand for code '%c'", code);
16548 return;
16551 regno = REGNO (x);
16552 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16554 output_operand_lossage ("invalid operand for code '%c'", code);
16555 return;
16558 if (GET_MODE_SIZE (mode) == 16)
16559 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16560 + (code == 'f' ? 1 : 0));
16561 else
16562 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16563 + (code == 'f' ? 1 : 0));
16565 return;
16567 /* Print a VFPv3 floating-point constant, represented as an integer
16568 index. */
16569 case 'G':
16571 int index = vfp3_const_double_index (x);
16572 gcc_assert (index != -1);
16573 fprintf (stream, "%d", index);
16575 return;
16577 /* Print bits representing opcode features for Neon.
16579 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16580 and polynomials as unsigned.
16582 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16584 Bit 2 is 1 for rounding functions, 0 otherwise. */
16586 /* Identify the type as 's', 'u', 'p' or 'f'. */
16587 case 'T':
16589 HOST_WIDE_INT bits = INTVAL (x);
16590 fputc ("uspf"[bits & 3], stream);
16592 return;
16594 /* Likewise, but signed and unsigned integers are both 'i'. */
16595 case 'F':
16597 HOST_WIDE_INT bits = INTVAL (x);
16598 fputc ("iipf"[bits & 3], stream);
16600 return;
16602 /* As for 'T', but emit 'u' instead of 'p'. */
16603 case 't':
16605 HOST_WIDE_INT bits = INTVAL (x);
16606 fputc ("usuf"[bits & 3], stream);
16608 return;
16610 /* Bit 2: rounding (vs none). */
16611 case 'O':
16613 HOST_WIDE_INT bits = INTVAL (x);
16614 fputs ((bits & 4) != 0 ? "r" : "", stream);
16616 return;
16618 /* Memory operand for vld1/vst1 instruction. */
16619 case 'A':
16621 rtx addr;
16622 bool postinc = FALSE;
16623 unsigned align, modesize, align_bits;
16625 gcc_assert (GET_CODE (x) == MEM);
16626 addr = XEXP (x, 0);
16627 if (GET_CODE (addr) == POST_INC)
16629 postinc = 1;
16630 addr = XEXP (addr, 0);
16632 asm_fprintf (stream, "[%r", REGNO (addr));
16634 /* We know the alignment of this access, so we can emit a hint in the
16635 instruction (for some alignments) as an aid to the memory subsystem
16636 of the target. */
16637 align = MEM_ALIGN (x) >> 3;
16638 modesize = GET_MODE_SIZE (GET_MODE (x));
16640 /* Only certain alignment specifiers are supported by the hardware. */
16641 if (modesize == 16 && (align % 32) == 0)
16642 align_bits = 256;
16643 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16644 align_bits = 128;
16645 else if ((align % 8) == 0)
16646 align_bits = 64;
16647 else
16648 align_bits = 0;
16650 if (align_bits != 0)
16651 asm_fprintf (stream, ":%d", align_bits);
16653 asm_fprintf (stream, "]");
16655 if (postinc)
16656 fputs("!", stream);
16658 return;
16660 case 'C':
16662 rtx addr;
16664 gcc_assert (GET_CODE (x) == MEM);
16665 addr = XEXP (x, 0);
16666 gcc_assert (GET_CODE (addr) == REG);
16667 asm_fprintf (stream, "[%r]", REGNO (addr));
16669 return;
16671 /* Translate an S register number into a D register number and element index. */
16672 case 'y':
16674 int mode = GET_MODE (x);
16675 int regno;
16677 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16679 output_operand_lossage ("invalid operand for code '%c'", code);
16680 return;
16683 regno = REGNO (x);
16684 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16686 output_operand_lossage ("invalid operand for code '%c'", code);
16687 return;
16690 regno = regno - FIRST_VFP_REGNUM;
16691 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16693 return;
16695 /* Register specifier for vld1.16/vst1.16. Translate the S register
16696 number into a D register number and element index. */
16697 case 'z':
16699 int mode = GET_MODE (x);
16700 int regno;
16702 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16704 output_operand_lossage ("invalid operand for code '%c'", code);
16705 return;
16708 regno = REGNO (x);
16709 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16711 output_operand_lossage ("invalid operand for code '%c'", code);
16712 return;
16715 regno = regno - FIRST_VFP_REGNUM;
16716 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16718 return;
16720 default:
16721 if (x == 0)
16723 output_operand_lossage ("missing operand");
16724 return;
16727 switch (GET_CODE (x))
16729 case REG:
16730 asm_fprintf (stream, "%r", REGNO (x));
16731 break;
16733 case MEM:
16734 output_memory_reference_mode = GET_MODE (x);
16735 output_address (XEXP (x, 0));
16736 break;
16738 case CONST_DOUBLE:
16739 if (TARGET_NEON)
16741 char fpstr[20];
16742 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16743 sizeof (fpstr), 0, 1);
16744 fprintf (stream, "#%s", fpstr);
16746 else
16747 fprintf (stream, "#%s", fp_immediate_constant (x));
16748 break;
16750 default:
16751 gcc_assert (GET_CODE (x) != NEG);
16752 fputc ('#', stream);
16753 if (GET_CODE (x) == HIGH)
16755 fputs (":lower16:", stream);
16756 x = XEXP (x, 0);
16759 output_addr_const (stream, x);
16760 break;
16765 /* Target hook for printing a memory address. */
16766 static void
16767 arm_print_operand_address (FILE *stream, rtx x)
16769 if (TARGET_32BIT)
16771 int is_minus = GET_CODE (x) == MINUS;
16773 if (GET_CODE (x) == REG)
16774 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16775 else if (GET_CODE (x) == PLUS || is_minus)
16777 rtx base = XEXP (x, 0);
16778 rtx index = XEXP (x, 1);
16779 HOST_WIDE_INT offset = 0;
16780 if (GET_CODE (base) != REG
16781 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16783 /* Ensure that BASE is a register. */
16784 /* (one of them must be). */
16785 /* Also ensure the SP is not used as in index register. */
16786 rtx temp = base;
16787 base = index;
16788 index = temp;
16790 switch (GET_CODE (index))
16792 case CONST_INT:
16793 offset = INTVAL (index);
16794 if (is_minus)
16795 offset = -offset;
16796 asm_fprintf (stream, "[%r, #%wd]",
16797 REGNO (base), offset);
16798 break;
16800 case REG:
16801 asm_fprintf (stream, "[%r, %s%r]",
16802 REGNO (base), is_minus ? "-" : "",
16803 REGNO (index));
16804 break;
16806 case MULT:
16807 case ASHIFTRT:
16808 case LSHIFTRT:
16809 case ASHIFT:
16810 case ROTATERT:
16812 asm_fprintf (stream, "[%r, %s%r",
16813 REGNO (base), is_minus ? "-" : "",
16814 REGNO (XEXP (index, 0)));
16815 arm_print_operand (stream, index, 'S');
16816 fputs ("]", stream);
16817 break;
16820 default:
16821 gcc_unreachable ();
16824 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16825 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16827 extern enum machine_mode output_memory_reference_mode;
16829 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16831 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16832 asm_fprintf (stream, "[%r, #%s%d]!",
16833 REGNO (XEXP (x, 0)),
16834 GET_CODE (x) == PRE_DEC ? "-" : "",
16835 GET_MODE_SIZE (output_memory_reference_mode));
16836 else
16837 asm_fprintf (stream, "[%r], #%s%d",
16838 REGNO (XEXP (x, 0)),
16839 GET_CODE (x) == POST_DEC ? "-" : "",
16840 GET_MODE_SIZE (output_memory_reference_mode));
16842 else if (GET_CODE (x) == PRE_MODIFY)
16844 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16845 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16846 asm_fprintf (stream, "#%wd]!",
16847 INTVAL (XEXP (XEXP (x, 1), 1)));
16848 else
16849 asm_fprintf (stream, "%r]!",
16850 REGNO (XEXP (XEXP (x, 1), 1)));
16852 else if (GET_CODE (x) == POST_MODIFY)
16854 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16855 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16856 asm_fprintf (stream, "#%wd",
16857 INTVAL (XEXP (XEXP (x, 1), 1)));
16858 else
16859 asm_fprintf (stream, "%r",
16860 REGNO (XEXP (XEXP (x, 1), 1)));
16862 else output_addr_const (stream, x);
16864 else
16866 if (GET_CODE (x) == REG)
16867 asm_fprintf (stream, "[%r]", REGNO (x));
16868 else if (GET_CODE (x) == POST_INC)
16869 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16870 else if (GET_CODE (x) == PLUS)
16872 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16873 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16874 asm_fprintf (stream, "[%r, #%wd]",
16875 REGNO (XEXP (x, 0)),
16876 INTVAL (XEXP (x, 1)));
16877 else
16878 asm_fprintf (stream, "[%r, %r]",
16879 REGNO (XEXP (x, 0)),
16880 REGNO (XEXP (x, 1)));
16882 else
16883 output_addr_const (stream, x);
16887 /* Target hook for indicating whether a punctuation character for
16888 TARGET_PRINT_OPERAND is valid. */
16889 static bool
16890 arm_print_operand_punct_valid_p (unsigned char code)
16892 return (code == '@' || code == '|' || code == '.'
16893 || code == '(' || code == ')' || code == '#'
16894 || (TARGET_32BIT && (code == '?'))
16895 || (TARGET_THUMB2 && (code == '!'))
16896 || (TARGET_THUMB && (code == '_')));
16899 /* Target hook for assembling integer objects. The ARM version needs to
16900 handle word-sized values specially. */
16901 static bool
16902 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16904 enum machine_mode mode;
16906 if (size == UNITS_PER_WORD && aligned_p)
16908 fputs ("\t.word\t", asm_out_file);
16909 output_addr_const (asm_out_file, x);
16911 /* Mark symbols as position independent. We only do this in the
16912 .text segment, not in the .data segment. */
16913 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16914 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16916 /* See legitimize_pic_address for an explanation of the
16917 TARGET_VXWORKS_RTP check. */
16918 if (TARGET_VXWORKS_RTP
16919 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16920 fputs ("(GOT)", asm_out_file);
16921 else
16922 fputs ("(GOTOFF)", asm_out_file);
16924 fputc ('\n', asm_out_file);
16925 return true;
16928 mode = GET_MODE (x);
16930 if (arm_vector_mode_supported_p (mode))
16932 int i, units;
16934 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16936 units = CONST_VECTOR_NUNITS (x);
16937 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16939 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16940 for (i = 0; i < units; i++)
16942 rtx elt = CONST_VECTOR_ELT (x, i);
16943 assemble_integer
16944 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16946 else
16947 for (i = 0; i < units; i++)
16949 rtx elt = CONST_VECTOR_ELT (x, i);
16950 REAL_VALUE_TYPE rval;
16952 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16954 assemble_real
16955 (rval, GET_MODE_INNER (mode),
16956 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16959 return true;
16962 return default_assemble_integer (x, size, aligned_p);
16965 static void
16966 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16968 section *s;
16970 if (!TARGET_AAPCS_BASED)
16972 (is_ctor ?
16973 default_named_section_asm_out_constructor
16974 : default_named_section_asm_out_destructor) (symbol, priority);
16975 return;
16978 /* Put these in the .init_array section, using a special relocation. */
16979 if (priority != DEFAULT_INIT_PRIORITY)
16981 char buf[18];
16982 sprintf (buf, "%s.%.5u",
16983 is_ctor ? ".init_array" : ".fini_array",
16984 priority);
16985 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16987 else if (is_ctor)
16988 s = ctors_section;
16989 else
16990 s = dtors_section;
16992 switch_to_section (s);
16993 assemble_align (POINTER_SIZE);
16994 fputs ("\t.word\t", asm_out_file);
16995 output_addr_const (asm_out_file, symbol);
16996 fputs ("(target1)\n", asm_out_file);
16999 /* Add a function to the list of static constructors. */
17001 static void
17002 arm_elf_asm_constructor (rtx symbol, int priority)
17004 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17007 /* Add a function to the list of static destructors. */
17009 static void
17010 arm_elf_asm_destructor (rtx symbol, int priority)
17012 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17015 /* A finite state machine takes care of noticing whether or not instructions
17016 can be conditionally executed, and thus decrease execution time and code
17017 size by deleting branch instructions. The fsm is controlled by
17018 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17020 /* The state of the fsm controlling condition codes are:
17021 0: normal, do nothing special
17022 1: make ASM_OUTPUT_OPCODE not output this instruction
17023 2: make ASM_OUTPUT_OPCODE not output this instruction
17024 3: make instructions conditional
17025 4: make instructions conditional
17027 State transitions (state->state by whom under condition):
17028 0 -> 1 final_prescan_insn if the `target' is a label
17029 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17030 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17031 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17032 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17033 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17034 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17035 (the target insn is arm_target_insn).
17037 If the jump clobbers the conditions then we use states 2 and 4.
17039 A similar thing can be done with conditional return insns.
17041 XXX In case the `target' is an unconditional branch, this conditionalising
17042 of the instructions always reduces code size, but not always execution
17043 time. But then, I want to reduce the code size to somewhere near what
17044 /bin/cc produces. */
17046 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17047 instructions. When a COND_EXEC instruction is seen the subsequent
17048 instructions are scanned so that multiple conditional instructions can be
17049 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17050 specify the length and true/false mask for the IT block. These will be
17051 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17053 /* Returns the index of the ARM condition code string in
17054 `arm_condition_codes'. COMPARISON should be an rtx like
17055 `(eq (...) (...))'. */
17056 static enum arm_cond_code
17057 get_arm_condition_code (rtx comparison)
17059 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17060 enum arm_cond_code code;
17061 enum rtx_code comp_code = GET_CODE (comparison);
17063 if (GET_MODE_CLASS (mode) != MODE_CC)
17064 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17065 XEXP (comparison, 1));
17067 switch (mode)
17069 case CC_DNEmode: code = ARM_NE; goto dominance;
17070 case CC_DEQmode: code = ARM_EQ; goto dominance;
17071 case CC_DGEmode: code = ARM_GE; goto dominance;
17072 case CC_DGTmode: code = ARM_GT; goto dominance;
17073 case CC_DLEmode: code = ARM_LE; goto dominance;
17074 case CC_DLTmode: code = ARM_LT; goto dominance;
17075 case CC_DGEUmode: code = ARM_CS; goto dominance;
17076 case CC_DGTUmode: code = ARM_HI; goto dominance;
17077 case CC_DLEUmode: code = ARM_LS; goto dominance;
17078 case CC_DLTUmode: code = ARM_CC;
17080 dominance:
17081 gcc_assert (comp_code == EQ || comp_code == NE);
17083 if (comp_code == EQ)
17084 return ARM_INVERSE_CONDITION_CODE (code);
17085 return code;
17087 case CC_NOOVmode:
17088 switch (comp_code)
17090 case NE: return ARM_NE;
17091 case EQ: return ARM_EQ;
17092 case GE: return ARM_PL;
17093 case LT: return ARM_MI;
17094 default: gcc_unreachable ();
17097 case CC_Zmode:
17098 switch (comp_code)
17100 case NE: return ARM_NE;
17101 case EQ: return ARM_EQ;
17102 default: gcc_unreachable ();
17105 case CC_Nmode:
17106 switch (comp_code)
17108 case NE: return ARM_MI;
17109 case EQ: return ARM_PL;
17110 default: gcc_unreachable ();
17113 case CCFPEmode:
17114 case CCFPmode:
17115 /* These encodings assume that AC=1 in the FPA system control
17116 byte. This allows us to handle all cases except UNEQ and
17117 LTGT. */
17118 switch (comp_code)
17120 case GE: return ARM_GE;
17121 case GT: return ARM_GT;
17122 case LE: return ARM_LS;
17123 case LT: return ARM_MI;
17124 case NE: return ARM_NE;
17125 case EQ: return ARM_EQ;
17126 case ORDERED: return ARM_VC;
17127 case UNORDERED: return ARM_VS;
17128 case UNLT: return ARM_LT;
17129 case UNLE: return ARM_LE;
17130 case UNGT: return ARM_HI;
17131 case UNGE: return ARM_PL;
17132 /* UNEQ and LTGT do not have a representation. */
17133 case UNEQ: /* Fall through. */
17134 case LTGT: /* Fall through. */
17135 default: gcc_unreachable ();
17138 case CC_SWPmode:
17139 switch (comp_code)
17141 case NE: return ARM_NE;
17142 case EQ: return ARM_EQ;
17143 case GE: return ARM_LE;
17144 case GT: return ARM_LT;
17145 case LE: return ARM_GE;
17146 case LT: return ARM_GT;
17147 case GEU: return ARM_LS;
17148 case GTU: return ARM_CC;
17149 case LEU: return ARM_CS;
17150 case LTU: return ARM_HI;
17151 default: gcc_unreachable ();
17154 case CC_Cmode:
17155 switch (comp_code)
17157 case LTU: return ARM_CS;
17158 case GEU: return ARM_CC;
17159 default: gcc_unreachable ();
17162 case CC_CZmode:
17163 switch (comp_code)
17165 case NE: return ARM_NE;
17166 case EQ: return ARM_EQ;
17167 case GEU: return ARM_CS;
17168 case GTU: return ARM_HI;
17169 case LEU: return ARM_LS;
17170 case LTU: return ARM_CC;
17171 default: gcc_unreachable ();
17174 case CC_NCVmode:
17175 switch (comp_code)
17177 case GE: return ARM_GE;
17178 case LT: return ARM_LT;
17179 case GEU: return ARM_CS;
17180 case LTU: return ARM_CC;
17181 default: gcc_unreachable ();
17184 case CCmode:
17185 switch (comp_code)
17187 case NE: return ARM_NE;
17188 case EQ: return ARM_EQ;
17189 case GE: return ARM_GE;
17190 case GT: return ARM_GT;
17191 case LE: return ARM_LE;
17192 case LT: return ARM_LT;
17193 case GEU: return ARM_CS;
17194 case GTU: return ARM_HI;
17195 case LEU: return ARM_LS;
17196 case LTU: return ARM_CC;
17197 default: gcc_unreachable ();
17200 default: gcc_unreachable ();
17204 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17205 instructions. */
17206 void
17207 thumb2_final_prescan_insn (rtx insn)
17209 rtx first_insn = insn;
17210 rtx body = PATTERN (insn);
17211 rtx predicate;
17212 enum arm_cond_code code;
17213 int n;
17214 int mask;
17216 /* Remove the previous insn from the count of insns to be output. */
17217 if (arm_condexec_count)
17218 arm_condexec_count--;
17220 /* Nothing to do if we are already inside a conditional block. */
17221 if (arm_condexec_count)
17222 return;
17224 if (GET_CODE (body) != COND_EXEC)
17225 return;
17227 /* Conditional jumps are implemented directly. */
17228 if (GET_CODE (insn) == JUMP_INSN)
17229 return;
17231 predicate = COND_EXEC_TEST (body);
17232 arm_current_cc = get_arm_condition_code (predicate);
17234 n = get_attr_ce_count (insn);
17235 arm_condexec_count = 1;
17236 arm_condexec_mask = (1 << n) - 1;
17237 arm_condexec_masklen = n;
17238 /* See if subsequent instructions can be combined into the same block. */
17239 for (;;)
17241 insn = next_nonnote_insn (insn);
17243 /* Jumping into the middle of an IT block is illegal, so a label or
17244 barrier terminates the block. */
17245 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17246 break;
17248 body = PATTERN (insn);
17249 /* USE and CLOBBER aren't really insns, so just skip them. */
17250 if (GET_CODE (body) == USE
17251 || GET_CODE (body) == CLOBBER)
17252 continue;
17254 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17255 if (GET_CODE (body) != COND_EXEC)
17256 break;
17257 /* Allow up to 4 conditionally executed instructions in a block. */
17258 n = get_attr_ce_count (insn);
17259 if (arm_condexec_masklen + n > 4)
17260 break;
17262 predicate = COND_EXEC_TEST (body);
17263 code = get_arm_condition_code (predicate);
17264 mask = (1 << n) - 1;
17265 if (arm_current_cc == code)
17266 arm_condexec_mask |= (mask << arm_condexec_masklen);
17267 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17268 break;
17270 arm_condexec_count++;
17271 arm_condexec_masklen += n;
17273 /* A jump must be the last instruction in a conditional block. */
17274 if (GET_CODE(insn) == JUMP_INSN)
17275 break;
17277 /* Restore recog_data (getting the attributes of other insns can
17278 destroy this array, but final.c assumes that it remains intact
17279 across this call). */
17280 extract_constrain_insn_cached (first_insn);
17283 void
17284 arm_final_prescan_insn (rtx insn)
17286 /* BODY will hold the body of INSN. */
17287 rtx body = PATTERN (insn);
17289 /* This will be 1 if trying to repeat the trick, and things need to be
17290 reversed if it appears to fail. */
17291 int reverse = 0;
17293 /* If we start with a return insn, we only succeed if we find another one. */
17294 int seeking_return = 0;
17296 /* START_INSN will hold the insn from where we start looking. This is the
17297 first insn after the following code_label if REVERSE is true. */
17298 rtx start_insn = insn;
17300 /* If in state 4, check if the target branch is reached, in order to
17301 change back to state 0. */
17302 if (arm_ccfsm_state == 4)
17304 if (insn == arm_target_insn)
17306 arm_target_insn = NULL;
17307 arm_ccfsm_state = 0;
17309 return;
17312 /* If in state 3, it is possible to repeat the trick, if this insn is an
17313 unconditional branch to a label, and immediately following this branch
17314 is the previous target label which is only used once, and the label this
17315 branch jumps to is not too far off. */
17316 if (arm_ccfsm_state == 3)
17318 if (simplejump_p (insn))
17320 start_insn = next_nonnote_insn (start_insn);
17321 if (GET_CODE (start_insn) == BARRIER)
17323 /* XXX Isn't this always a barrier? */
17324 start_insn = next_nonnote_insn (start_insn);
17326 if (GET_CODE (start_insn) == CODE_LABEL
17327 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17328 && LABEL_NUSES (start_insn) == 1)
17329 reverse = TRUE;
17330 else
17331 return;
17333 else if (GET_CODE (body) == RETURN)
17335 start_insn = next_nonnote_insn (start_insn);
17336 if (GET_CODE (start_insn) == BARRIER)
17337 start_insn = next_nonnote_insn (start_insn);
17338 if (GET_CODE (start_insn) == CODE_LABEL
17339 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17340 && LABEL_NUSES (start_insn) == 1)
17342 reverse = TRUE;
17343 seeking_return = 1;
17345 else
17346 return;
17348 else
17349 return;
17352 gcc_assert (!arm_ccfsm_state || reverse);
17353 if (GET_CODE (insn) != JUMP_INSN)
17354 return;
17356 /* This jump might be paralleled with a clobber of the condition codes
17357 the jump should always come first */
17358 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17359 body = XVECEXP (body, 0, 0);
17361 if (reverse
17362 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17363 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17365 int insns_skipped;
17366 int fail = FALSE, succeed = FALSE;
17367 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17368 int then_not_else = TRUE;
17369 rtx this_insn = start_insn, label = 0;
17371 /* Register the insn jumped to. */
17372 if (reverse)
17374 if (!seeking_return)
17375 label = XEXP (SET_SRC (body), 0);
17377 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17378 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17379 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17381 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17382 then_not_else = FALSE;
17384 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17385 seeking_return = 1;
17386 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17388 seeking_return = 1;
17389 then_not_else = FALSE;
17391 else
17392 gcc_unreachable ();
17394 /* See how many insns this branch skips, and what kind of insns. If all
17395 insns are okay, and the label or unconditional branch to the same
17396 label is not too far away, succeed. */
17397 for (insns_skipped = 0;
17398 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17400 rtx scanbody;
17402 this_insn = next_nonnote_insn (this_insn);
17403 if (!this_insn)
17404 break;
17406 switch (GET_CODE (this_insn))
17408 case CODE_LABEL:
17409 /* Succeed if it is the target label, otherwise fail since
17410 control falls in from somewhere else. */
17411 if (this_insn == label)
17413 arm_ccfsm_state = 1;
17414 succeed = TRUE;
17416 else
17417 fail = TRUE;
17418 break;
17420 case BARRIER:
17421 /* Succeed if the following insn is the target label.
17422 Otherwise fail.
17423 If return insns are used then the last insn in a function
17424 will be a barrier. */
17425 this_insn = next_nonnote_insn (this_insn);
17426 if (this_insn && this_insn == label)
17428 arm_ccfsm_state = 1;
17429 succeed = TRUE;
17431 else
17432 fail = TRUE;
17433 break;
17435 case CALL_INSN:
17436 /* The AAPCS says that conditional calls should not be
17437 used since they make interworking inefficient (the
17438 linker can't transform BL<cond> into BLX). That's
17439 only a problem if the machine has BLX. */
17440 if (arm_arch5)
17442 fail = TRUE;
17443 break;
17446 /* Succeed if the following insn is the target label, or
17447 if the following two insns are a barrier and the
17448 target label. */
17449 this_insn = next_nonnote_insn (this_insn);
17450 if (this_insn && GET_CODE (this_insn) == BARRIER)
17451 this_insn = next_nonnote_insn (this_insn);
17453 if (this_insn && this_insn == label
17454 && insns_skipped < max_insns_skipped)
17456 arm_ccfsm_state = 1;
17457 succeed = TRUE;
17459 else
17460 fail = TRUE;
17461 break;
17463 case JUMP_INSN:
17464 /* If this is an unconditional branch to the same label, succeed.
17465 If it is to another label, do nothing. If it is conditional,
17466 fail. */
17467 /* XXX Probably, the tests for SET and the PC are
17468 unnecessary. */
17470 scanbody = PATTERN (this_insn);
17471 if (GET_CODE (scanbody) == SET
17472 && GET_CODE (SET_DEST (scanbody)) == PC)
17474 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17475 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17477 arm_ccfsm_state = 2;
17478 succeed = TRUE;
17480 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17481 fail = TRUE;
17483 /* Fail if a conditional return is undesirable (e.g. on a
17484 StrongARM), but still allow this if optimizing for size. */
17485 else if (GET_CODE (scanbody) == RETURN
17486 && !use_return_insn (TRUE, NULL)
17487 && !optimize_size)
17488 fail = TRUE;
17489 else if (GET_CODE (scanbody) == RETURN
17490 && seeking_return)
17492 arm_ccfsm_state = 2;
17493 succeed = TRUE;
17495 else if (GET_CODE (scanbody) == PARALLEL)
17497 switch (get_attr_conds (this_insn))
17499 case CONDS_NOCOND:
17500 break;
17501 default:
17502 fail = TRUE;
17503 break;
17506 else
17507 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17509 break;
17511 case INSN:
17512 /* Instructions using or affecting the condition codes make it
17513 fail. */
17514 scanbody = PATTERN (this_insn);
17515 if (!(GET_CODE (scanbody) == SET
17516 || GET_CODE (scanbody) == PARALLEL)
17517 || get_attr_conds (this_insn) != CONDS_NOCOND)
17518 fail = TRUE;
17520 /* A conditional cirrus instruction must be followed by
17521 a non Cirrus instruction. However, since we
17522 conditionalize instructions in this function and by
17523 the time we get here we can't add instructions
17524 (nops), because shorten_branches() has already been
17525 called, we will disable conditionalizing Cirrus
17526 instructions to be safe. */
17527 if (GET_CODE (scanbody) != USE
17528 && GET_CODE (scanbody) != CLOBBER
17529 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17530 fail = TRUE;
17531 break;
17533 default:
17534 break;
17537 if (succeed)
17539 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17540 arm_target_label = CODE_LABEL_NUMBER (label);
17541 else
17543 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17545 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17547 this_insn = next_nonnote_insn (this_insn);
17548 gcc_assert (!this_insn
17549 || (GET_CODE (this_insn) != BARRIER
17550 && GET_CODE (this_insn) != CODE_LABEL));
17552 if (!this_insn)
17554 /* Oh, dear! we ran off the end.. give up. */
17555 extract_constrain_insn_cached (insn);
17556 arm_ccfsm_state = 0;
17557 arm_target_insn = NULL;
17558 return;
17560 arm_target_insn = this_insn;
17563 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17564 what it was. */
17565 if (!reverse)
17566 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17568 if (reverse || then_not_else)
17569 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17572 /* Restore recog_data (getting the attributes of other insns can
17573 destroy this array, but final.c assumes that it remains intact
17574 across this call. */
17575 extract_constrain_insn_cached (insn);
17579 /* Output IT instructions. */
17580 void
17581 thumb2_asm_output_opcode (FILE * stream)
17583 char buff[5];
17584 int n;
17586 if (arm_condexec_mask)
17588 for (n = 0; n < arm_condexec_masklen; n++)
17589 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17590 buff[n] = 0;
17591 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17592 arm_condition_codes[arm_current_cc]);
17593 arm_condexec_mask = 0;
17597 /* Returns true if REGNO is a valid register
17598 for holding a quantity of type MODE. */
17600 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17602 if (GET_MODE_CLASS (mode) == MODE_CC)
17603 return (regno == CC_REGNUM
17604 || (TARGET_HARD_FLOAT && TARGET_VFP
17605 && regno == VFPCC_REGNUM));
17607 if (TARGET_THUMB1)
17608 /* For the Thumb we only allow values bigger than SImode in
17609 registers 0 - 6, so that there is always a second low
17610 register available to hold the upper part of the value.
17611 We probably we ought to ensure that the register is the
17612 start of an even numbered register pair. */
17613 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17615 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17616 && IS_CIRRUS_REGNUM (regno))
17617 /* We have outlawed SI values in Cirrus registers because they
17618 reside in the lower 32 bits, but SF values reside in the
17619 upper 32 bits. This causes gcc all sorts of grief. We can't
17620 even split the registers into pairs because Cirrus SI values
17621 get sign extended to 64bits-- aldyh. */
17622 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17624 if (TARGET_HARD_FLOAT && TARGET_VFP
17625 && IS_VFP_REGNUM (regno))
17627 if (mode == SFmode || mode == SImode)
17628 return VFP_REGNO_OK_FOR_SINGLE (regno);
17630 if (mode == DFmode)
17631 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17633 /* VFP registers can hold HFmode values, but there is no point in
17634 putting them there unless we have hardware conversion insns. */
17635 if (mode == HFmode)
17636 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17638 if (TARGET_NEON)
17639 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17640 || (VALID_NEON_QREG_MODE (mode)
17641 && NEON_REGNO_OK_FOR_QUAD (regno))
17642 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17643 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17644 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17645 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17646 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17648 return FALSE;
17651 if (TARGET_REALLY_IWMMXT)
17653 if (IS_IWMMXT_GR_REGNUM (regno))
17654 return mode == SImode;
17656 if (IS_IWMMXT_REGNUM (regno))
17657 return VALID_IWMMXT_REG_MODE (mode);
17660 /* We allow almost any value to be stored in the general registers.
17661 Restrict doubleword quantities to even register pairs so that we can
17662 use ldrd. Do not allow very large Neon structure opaque modes in
17663 general registers; they would use too many. */
17664 if (regno <= LAST_ARM_REGNUM)
17665 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17666 && ARM_NUM_REGS (mode) <= 4;
17668 if (regno == FRAME_POINTER_REGNUM
17669 || regno == ARG_POINTER_REGNUM)
17670 /* We only allow integers in the fake hard registers. */
17671 return GET_MODE_CLASS (mode) == MODE_INT;
17673 /* The only registers left are the FPA registers
17674 which we only allow to hold FP values. */
17675 return (TARGET_HARD_FLOAT && TARGET_FPA
17676 && GET_MODE_CLASS (mode) == MODE_FLOAT
17677 && regno >= FIRST_FPA_REGNUM
17678 && regno <= LAST_FPA_REGNUM);
17681 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17682 not used in arm mode. */
17684 enum reg_class
17685 arm_regno_class (int regno)
17687 if (TARGET_THUMB1)
17689 if (regno == STACK_POINTER_REGNUM)
17690 return STACK_REG;
17691 if (regno == CC_REGNUM)
17692 return CC_REG;
17693 if (regno < 8)
17694 return LO_REGS;
17695 return HI_REGS;
17698 if (TARGET_THUMB2 && regno < 8)
17699 return LO_REGS;
17701 if ( regno <= LAST_ARM_REGNUM
17702 || regno == FRAME_POINTER_REGNUM
17703 || regno == ARG_POINTER_REGNUM)
17704 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17706 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17707 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17709 if (IS_CIRRUS_REGNUM (regno))
17710 return CIRRUS_REGS;
17712 if (IS_VFP_REGNUM (regno))
17714 if (regno <= D7_VFP_REGNUM)
17715 return VFP_D0_D7_REGS;
17716 else if (regno <= LAST_LO_VFP_REGNUM)
17717 return VFP_LO_REGS;
17718 else
17719 return VFP_HI_REGS;
17722 if (IS_IWMMXT_REGNUM (regno))
17723 return IWMMXT_REGS;
17725 if (IS_IWMMXT_GR_REGNUM (regno))
17726 return IWMMXT_GR_REGS;
17728 return FPA_REGS;
17731 /* Handle a special case when computing the offset
17732 of an argument from the frame pointer. */
17734 arm_debugger_arg_offset (int value, rtx addr)
17736 rtx insn;
17738 /* We are only interested if dbxout_parms() failed to compute the offset. */
17739 if (value != 0)
17740 return 0;
17742 /* We can only cope with the case where the address is held in a register. */
17743 if (GET_CODE (addr) != REG)
17744 return 0;
17746 /* If we are using the frame pointer to point at the argument, then
17747 an offset of 0 is correct. */
17748 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17749 return 0;
17751 /* If we are using the stack pointer to point at the
17752 argument, then an offset of 0 is correct. */
17753 /* ??? Check this is consistent with thumb2 frame layout. */
17754 if ((TARGET_THUMB || !frame_pointer_needed)
17755 && REGNO (addr) == SP_REGNUM)
17756 return 0;
17758 /* Oh dear. The argument is pointed to by a register rather
17759 than being held in a register, or being stored at a known
17760 offset from the frame pointer. Since GDB only understands
17761 those two kinds of argument we must translate the address
17762 held in the register into an offset from the frame pointer.
17763 We do this by searching through the insns for the function
17764 looking to see where this register gets its value. If the
17765 register is initialized from the frame pointer plus an offset
17766 then we are in luck and we can continue, otherwise we give up.
17768 This code is exercised by producing debugging information
17769 for a function with arguments like this:
17771 double func (double a, double b, int c, double d) {return d;}
17773 Without this code the stab for parameter 'd' will be set to
17774 an offset of 0 from the frame pointer, rather than 8. */
17776 /* The if() statement says:
17778 If the insn is a normal instruction
17779 and if the insn is setting the value in a register
17780 and if the register being set is the register holding the address of the argument
17781 and if the address is computing by an addition
17782 that involves adding to a register
17783 which is the frame pointer
17784 a constant integer
17786 then... */
17788 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17790 if ( GET_CODE (insn) == INSN
17791 && GET_CODE (PATTERN (insn)) == SET
17792 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17793 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17794 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17795 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17796 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17799 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17801 break;
17805 if (value == 0)
17807 debug_rtx (addr);
17808 warning (0, "unable to compute real location of stacked parameter");
17809 value = 8; /* XXX magic hack */
17812 return value;
17815 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17816 do \
17818 if ((MASK) & insn_flags) \
17819 add_builtin_function ((NAME), (TYPE), (CODE), \
17820 BUILT_IN_MD, NULL, NULL_TREE); \
17822 while (0)
17824 struct builtin_description
17826 const unsigned int mask;
17827 const enum insn_code icode;
17828 const char * const name;
17829 const enum arm_builtins code;
17830 const enum rtx_code comparison;
17831 const unsigned int flag;
17834 static const struct builtin_description bdesc_2arg[] =
17836 #define IWMMXT_BUILTIN(code, string, builtin) \
17837 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17838 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17840 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17841 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17842 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17843 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17844 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17845 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17846 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17847 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17848 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17849 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17850 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17851 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17852 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17853 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17854 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17855 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17856 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17857 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17858 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17859 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17860 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17861 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17862 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17863 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17864 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17865 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17866 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17867 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17868 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17869 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17870 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17871 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17872 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17873 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17874 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17875 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17876 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17877 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17878 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17879 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17880 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17881 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17882 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17883 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17884 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17885 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17886 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17887 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17888 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17889 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17890 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17891 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17892 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17893 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17894 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17895 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17896 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17897 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17899 #define IWMMXT_BUILTIN2(code, builtin) \
17900 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17902 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17903 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17904 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17905 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17906 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17907 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17908 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17909 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17910 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17911 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17912 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17913 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17914 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17915 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17916 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17917 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17918 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17919 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17920 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17921 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17922 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17923 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17924 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17925 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17926 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17927 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17928 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17929 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17930 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17931 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17932 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17933 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17936 static const struct builtin_description bdesc_1arg[] =
17938 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17939 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17940 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17941 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17942 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17943 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17944 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17945 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17946 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17947 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17948 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17949 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17950 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17951 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17952 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17953 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17954 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17955 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17958 /* Set up all the iWMMXt builtins. This is
17959 not called if TARGET_IWMMXT is zero. */
17961 static void
17962 arm_init_iwmmxt_builtins (void)
17964 const struct builtin_description * d;
17965 size_t i;
17966 tree endlink = void_list_node;
17968 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17969 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17970 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17972 tree int_ftype_int
17973 = build_function_type (integer_type_node,
17974 tree_cons (NULL_TREE, integer_type_node, endlink));
17975 tree v8qi_ftype_v8qi_v8qi_int
17976 = build_function_type (V8QI_type_node,
17977 tree_cons (NULL_TREE, V8QI_type_node,
17978 tree_cons (NULL_TREE, V8QI_type_node,
17979 tree_cons (NULL_TREE,
17980 integer_type_node,
17981 endlink))));
17982 tree v4hi_ftype_v4hi_int
17983 = build_function_type (V4HI_type_node,
17984 tree_cons (NULL_TREE, V4HI_type_node,
17985 tree_cons (NULL_TREE, integer_type_node,
17986 endlink)));
17987 tree v2si_ftype_v2si_int
17988 = build_function_type (V2SI_type_node,
17989 tree_cons (NULL_TREE, V2SI_type_node,
17990 tree_cons (NULL_TREE, integer_type_node,
17991 endlink)));
17992 tree v2si_ftype_di_di
17993 = build_function_type (V2SI_type_node,
17994 tree_cons (NULL_TREE, long_long_integer_type_node,
17995 tree_cons (NULL_TREE, long_long_integer_type_node,
17996 endlink)));
17997 tree di_ftype_di_int
17998 = build_function_type (long_long_integer_type_node,
17999 tree_cons (NULL_TREE, long_long_integer_type_node,
18000 tree_cons (NULL_TREE, integer_type_node,
18001 endlink)));
18002 tree di_ftype_di_int_int
18003 = build_function_type (long_long_integer_type_node,
18004 tree_cons (NULL_TREE, long_long_integer_type_node,
18005 tree_cons (NULL_TREE, integer_type_node,
18006 tree_cons (NULL_TREE,
18007 integer_type_node,
18008 endlink))));
18009 tree int_ftype_v8qi
18010 = build_function_type (integer_type_node,
18011 tree_cons (NULL_TREE, V8QI_type_node,
18012 endlink));
18013 tree int_ftype_v4hi
18014 = build_function_type (integer_type_node,
18015 tree_cons (NULL_TREE, V4HI_type_node,
18016 endlink));
18017 tree int_ftype_v2si
18018 = build_function_type (integer_type_node,
18019 tree_cons (NULL_TREE, V2SI_type_node,
18020 endlink));
18021 tree int_ftype_v8qi_int
18022 = build_function_type (integer_type_node,
18023 tree_cons (NULL_TREE, V8QI_type_node,
18024 tree_cons (NULL_TREE, integer_type_node,
18025 endlink)));
18026 tree int_ftype_v4hi_int
18027 = build_function_type (integer_type_node,
18028 tree_cons (NULL_TREE, V4HI_type_node,
18029 tree_cons (NULL_TREE, integer_type_node,
18030 endlink)));
18031 tree int_ftype_v2si_int
18032 = build_function_type (integer_type_node,
18033 tree_cons (NULL_TREE, V2SI_type_node,
18034 tree_cons (NULL_TREE, integer_type_node,
18035 endlink)));
18036 tree v8qi_ftype_v8qi_int_int
18037 = build_function_type (V8QI_type_node,
18038 tree_cons (NULL_TREE, V8QI_type_node,
18039 tree_cons (NULL_TREE, integer_type_node,
18040 tree_cons (NULL_TREE,
18041 integer_type_node,
18042 endlink))));
18043 tree v4hi_ftype_v4hi_int_int
18044 = build_function_type (V4HI_type_node,
18045 tree_cons (NULL_TREE, V4HI_type_node,
18046 tree_cons (NULL_TREE, integer_type_node,
18047 tree_cons (NULL_TREE,
18048 integer_type_node,
18049 endlink))));
18050 tree v2si_ftype_v2si_int_int
18051 = build_function_type (V2SI_type_node,
18052 tree_cons (NULL_TREE, V2SI_type_node,
18053 tree_cons (NULL_TREE, integer_type_node,
18054 tree_cons (NULL_TREE,
18055 integer_type_node,
18056 endlink))));
18057 /* Miscellaneous. */
18058 tree v8qi_ftype_v4hi_v4hi
18059 = build_function_type (V8QI_type_node,
18060 tree_cons (NULL_TREE, V4HI_type_node,
18061 tree_cons (NULL_TREE, V4HI_type_node,
18062 endlink)));
18063 tree v4hi_ftype_v2si_v2si
18064 = build_function_type (V4HI_type_node,
18065 tree_cons (NULL_TREE, V2SI_type_node,
18066 tree_cons (NULL_TREE, V2SI_type_node,
18067 endlink)));
18068 tree v2si_ftype_v4hi_v4hi
18069 = build_function_type (V2SI_type_node,
18070 tree_cons (NULL_TREE, V4HI_type_node,
18071 tree_cons (NULL_TREE, V4HI_type_node,
18072 endlink)));
18073 tree v2si_ftype_v8qi_v8qi
18074 = build_function_type (V2SI_type_node,
18075 tree_cons (NULL_TREE, V8QI_type_node,
18076 tree_cons (NULL_TREE, V8QI_type_node,
18077 endlink)));
18078 tree v4hi_ftype_v4hi_di
18079 = build_function_type (V4HI_type_node,
18080 tree_cons (NULL_TREE, V4HI_type_node,
18081 tree_cons (NULL_TREE,
18082 long_long_integer_type_node,
18083 endlink)));
18084 tree v2si_ftype_v2si_di
18085 = build_function_type (V2SI_type_node,
18086 tree_cons (NULL_TREE, V2SI_type_node,
18087 tree_cons (NULL_TREE,
18088 long_long_integer_type_node,
18089 endlink)));
18090 tree void_ftype_int_int
18091 = build_function_type (void_type_node,
18092 tree_cons (NULL_TREE, integer_type_node,
18093 tree_cons (NULL_TREE, integer_type_node,
18094 endlink)));
18095 tree di_ftype_void
18096 = build_function_type (long_long_unsigned_type_node, endlink);
18097 tree di_ftype_v8qi
18098 = build_function_type (long_long_integer_type_node,
18099 tree_cons (NULL_TREE, V8QI_type_node,
18100 endlink));
18101 tree di_ftype_v4hi
18102 = build_function_type (long_long_integer_type_node,
18103 tree_cons (NULL_TREE, V4HI_type_node,
18104 endlink));
18105 tree di_ftype_v2si
18106 = build_function_type (long_long_integer_type_node,
18107 tree_cons (NULL_TREE, V2SI_type_node,
18108 endlink));
18109 tree v2si_ftype_v4hi
18110 = build_function_type (V2SI_type_node,
18111 tree_cons (NULL_TREE, V4HI_type_node,
18112 endlink));
18113 tree v4hi_ftype_v8qi
18114 = build_function_type (V4HI_type_node,
18115 tree_cons (NULL_TREE, V8QI_type_node,
18116 endlink));
18118 tree di_ftype_di_v4hi_v4hi
18119 = build_function_type (long_long_unsigned_type_node,
18120 tree_cons (NULL_TREE,
18121 long_long_unsigned_type_node,
18122 tree_cons (NULL_TREE, V4HI_type_node,
18123 tree_cons (NULL_TREE,
18124 V4HI_type_node,
18125 endlink))));
18127 tree di_ftype_v4hi_v4hi
18128 = build_function_type (long_long_unsigned_type_node,
18129 tree_cons (NULL_TREE, V4HI_type_node,
18130 tree_cons (NULL_TREE, V4HI_type_node,
18131 endlink)));
18133 /* Normal vector binops. */
18134 tree v8qi_ftype_v8qi_v8qi
18135 = build_function_type (V8QI_type_node,
18136 tree_cons (NULL_TREE, V8QI_type_node,
18137 tree_cons (NULL_TREE, V8QI_type_node,
18138 endlink)));
18139 tree v4hi_ftype_v4hi_v4hi
18140 = build_function_type (V4HI_type_node,
18141 tree_cons (NULL_TREE, V4HI_type_node,
18142 tree_cons (NULL_TREE, V4HI_type_node,
18143 endlink)));
18144 tree v2si_ftype_v2si_v2si
18145 = build_function_type (V2SI_type_node,
18146 tree_cons (NULL_TREE, V2SI_type_node,
18147 tree_cons (NULL_TREE, V2SI_type_node,
18148 endlink)));
18149 tree di_ftype_di_di
18150 = build_function_type (long_long_unsigned_type_node,
18151 tree_cons (NULL_TREE, long_long_unsigned_type_node,
18152 tree_cons (NULL_TREE,
18153 long_long_unsigned_type_node,
18154 endlink)));
18156 /* Add all builtins that are more or less simple operations on two
18157 operands. */
18158 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18160 /* Use one of the operands; the target can have a different mode for
18161 mask-generating compares. */
18162 enum machine_mode mode;
18163 tree type;
18165 if (d->name == 0)
18166 continue;
18168 mode = insn_data[d->icode].operand[1].mode;
18170 switch (mode)
18172 case V8QImode:
18173 type = v8qi_ftype_v8qi_v8qi;
18174 break;
18175 case V4HImode:
18176 type = v4hi_ftype_v4hi_v4hi;
18177 break;
18178 case V2SImode:
18179 type = v2si_ftype_v2si_v2si;
18180 break;
18181 case DImode:
18182 type = di_ftype_di_di;
18183 break;
18185 default:
18186 gcc_unreachable ();
18189 def_mbuiltin (d->mask, d->name, type, d->code);
18192 /* Add the remaining MMX insns with somewhat more complicated types. */
18193 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18194 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18195 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18197 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18198 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18199 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18200 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18201 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18202 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18204 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18205 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18206 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18207 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18208 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18209 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18211 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18212 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18213 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18214 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18215 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18216 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18218 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18219 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18220 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18221 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18222 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18223 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18225 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18227 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18228 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18229 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18230 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18232 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18233 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18234 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18235 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18236 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18237 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18238 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18239 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18240 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18242 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18243 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18244 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18246 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18247 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18248 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18250 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18251 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18252 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18253 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18254 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18255 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18257 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18258 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18259 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18260 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18261 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18262 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18263 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18264 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18265 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18266 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18267 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18268 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18270 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18271 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18272 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18273 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18275 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18276 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18277 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18278 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18279 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18280 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18281 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18284 static void
18285 arm_init_tls_builtins (void)
18287 tree ftype, decl;
18289 ftype = build_function_type (ptr_type_node, void_list_node);
18290 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18291 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18292 NULL, NULL_TREE);
18293 TREE_NOTHROW (decl) = 1;
18294 TREE_READONLY (decl) = 1;
18297 enum neon_builtin_type_bits {
18298 T_V8QI = 0x0001,
18299 T_V4HI = 0x0002,
18300 T_V2SI = 0x0004,
18301 T_V2SF = 0x0008,
18302 T_DI = 0x0010,
18303 T_V16QI = 0x0020,
18304 T_V8HI = 0x0040,
18305 T_V4SI = 0x0080,
18306 T_V4SF = 0x0100,
18307 T_V2DI = 0x0200,
18308 T_TI = 0x0400,
18309 T_EI = 0x0800,
18310 T_OI = 0x1000
18313 #define v8qi_UP T_V8QI
18314 #define v4hi_UP T_V4HI
18315 #define v2si_UP T_V2SI
18316 #define v2sf_UP T_V2SF
18317 #define di_UP T_DI
18318 #define v16qi_UP T_V16QI
18319 #define v8hi_UP T_V8HI
18320 #define v4si_UP T_V4SI
18321 #define v4sf_UP T_V4SF
18322 #define v2di_UP T_V2DI
18323 #define ti_UP T_TI
18324 #define ei_UP T_EI
18325 #define oi_UP T_OI
18327 #define UP(X) X##_UP
18329 #define T_MAX 13
18331 typedef enum {
18332 NEON_BINOP,
18333 NEON_TERNOP,
18334 NEON_UNOP,
18335 NEON_GETLANE,
18336 NEON_SETLANE,
18337 NEON_CREATE,
18338 NEON_DUP,
18339 NEON_DUPLANE,
18340 NEON_COMBINE,
18341 NEON_SPLIT,
18342 NEON_LANEMUL,
18343 NEON_LANEMULL,
18344 NEON_LANEMULH,
18345 NEON_LANEMAC,
18346 NEON_SCALARMUL,
18347 NEON_SCALARMULL,
18348 NEON_SCALARMULH,
18349 NEON_SCALARMAC,
18350 NEON_CONVERT,
18351 NEON_FIXCONV,
18352 NEON_SELECT,
18353 NEON_RESULTPAIR,
18354 NEON_REINTERP,
18355 NEON_VTBL,
18356 NEON_VTBX,
18357 NEON_LOAD1,
18358 NEON_LOAD1LANE,
18359 NEON_STORE1,
18360 NEON_STORE1LANE,
18361 NEON_LOADSTRUCT,
18362 NEON_LOADSTRUCTLANE,
18363 NEON_STORESTRUCT,
18364 NEON_STORESTRUCTLANE,
18365 NEON_LOGICBINOP,
18366 NEON_SHIFTINSERT,
18367 NEON_SHIFTIMM,
18368 NEON_SHIFTACC
18369 } neon_itype;
18371 typedef struct {
18372 const char *name;
18373 const neon_itype itype;
18374 const int bits;
18375 const enum insn_code codes[T_MAX];
18376 const unsigned int num_vars;
18377 unsigned int base_fcode;
18378 } neon_builtin_datum;
18380 #define CF(N,X) CODE_FOR_neon_##N##X
18382 #define VAR1(T, N, A) \
18383 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18384 #define VAR2(T, N, A, B) \
18385 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18386 #define VAR3(T, N, A, B, C) \
18387 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18388 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18389 #define VAR4(T, N, A, B, C, D) \
18390 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18391 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18392 #define VAR5(T, N, A, B, C, D, E) \
18393 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18394 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18395 #define VAR6(T, N, A, B, C, D, E, F) \
18396 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18397 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18398 #define VAR7(T, N, A, B, C, D, E, F, G) \
18399 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18400 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18401 CF (N, G) }, 7, 0
18402 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18403 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18404 | UP (H), \
18405 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18406 CF (N, G), CF (N, H) }, 8, 0
18407 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18408 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18409 | UP (H) | UP (I), \
18410 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18411 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18412 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18413 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18414 | UP (H) | UP (I) | UP (J), \
18415 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18416 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18418 /* The mode entries in the following table correspond to the "key" type of the
18419 instruction variant, i.e. equivalent to that which would be specified after
18420 the assembler mnemonic, which usually refers to the last vector operand.
18421 (Signed/unsigned/polynomial types are not differentiated between though, and
18422 are all mapped onto the same mode for a given element size.) The modes
18423 listed per instruction should be the same as those defined for that
18424 instruction's pattern in neon.md.
18425 WARNING: Variants should be listed in the same increasing order as
18426 neon_builtin_type_bits. */
18428 static neon_builtin_datum neon_builtin_data[] =
18430 { VAR10 (BINOP, vadd,
18431 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18432 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18433 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18434 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18435 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18436 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18437 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18438 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18439 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18440 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18441 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18442 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18443 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18444 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18445 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18446 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18447 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18448 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18449 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18450 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18451 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18452 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18453 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18454 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18455 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18456 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18457 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18458 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18459 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18460 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18461 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18462 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18463 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18464 { VAR10 (BINOP, vsub,
18465 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18466 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18467 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18468 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18469 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18470 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18471 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18472 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18473 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18474 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18475 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18476 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18477 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18478 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18479 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18480 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18481 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18482 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18483 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18484 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18485 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18486 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18487 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18488 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18489 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18490 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18491 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18492 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18493 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18494 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18495 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18496 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18497 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18498 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18499 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18500 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18501 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18502 /* FIXME: vget_lane supports more variants than this! */
18503 { VAR10 (GETLANE, vget_lane,
18504 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18505 { VAR10 (SETLANE, vset_lane,
18506 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18507 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18508 { VAR10 (DUP, vdup_n,
18509 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18510 { VAR10 (DUPLANE, vdup_lane,
18511 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18512 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18513 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18514 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18515 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18516 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18517 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18518 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18519 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18520 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18521 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18522 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18523 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18524 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18525 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18526 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18527 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18528 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18529 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18530 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18531 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18532 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18533 { VAR10 (BINOP, vext,
18534 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18535 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18536 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18537 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18538 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18539 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18540 { VAR10 (SELECT, vbsl,
18541 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18542 { VAR1 (VTBL, vtbl1, v8qi) },
18543 { VAR1 (VTBL, vtbl2, v8qi) },
18544 { VAR1 (VTBL, vtbl3, v8qi) },
18545 { VAR1 (VTBL, vtbl4, v8qi) },
18546 { VAR1 (VTBX, vtbx1, v8qi) },
18547 { VAR1 (VTBX, vtbx2, v8qi) },
18548 { VAR1 (VTBX, vtbx3, v8qi) },
18549 { VAR1 (VTBX, vtbx4, v8qi) },
18550 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18551 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18552 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18553 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18554 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18555 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18556 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18557 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18558 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18559 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18560 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18561 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18562 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18563 { VAR10 (LOAD1, vld1,
18564 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18565 { VAR10 (LOAD1LANE, vld1_lane,
18566 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18567 { VAR10 (LOAD1, vld1_dup,
18568 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18569 { VAR10 (STORE1, vst1,
18570 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18571 { VAR10 (STORE1LANE, vst1_lane,
18572 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18573 { VAR9 (LOADSTRUCT,
18574 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18575 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18576 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18577 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18578 { VAR9 (STORESTRUCT, vst2,
18579 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18580 { VAR7 (STORESTRUCTLANE, vst2_lane,
18581 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18582 { VAR9 (LOADSTRUCT,
18583 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18584 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18585 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18586 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18587 { VAR9 (STORESTRUCT, vst3,
18588 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18589 { VAR7 (STORESTRUCTLANE, vst3_lane,
18590 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18591 { VAR9 (LOADSTRUCT, vld4,
18592 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18593 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18594 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18595 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18596 { VAR9 (STORESTRUCT, vst4,
18597 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18598 { VAR7 (STORESTRUCTLANE, vst4_lane,
18599 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18600 { VAR10 (LOGICBINOP, vand,
18601 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18602 { VAR10 (LOGICBINOP, vorr,
18603 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18604 { VAR10 (BINOP, veor,
18605 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18606 { VAR10 (LOGICBINOP, vbic,
18607 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18608 { VAR10 (LOGICBINOP, vorn,
18609 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18612 #undef CF
18613 #undef VAR1
18614 #undef VAR2
18615 #undef VAR3
18616 #undef VAR4
18617 #undef VAR5
18618 #undef VAR6
18619 #undef VAR7
18620 #undef VAR8
18621 #undef VAR9
18622 #undef VAR10
18624 static void
18625 arm_init_neon_builtins (void)
18627 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18629 tree neon_intQI_type_node;
18630 tree neon_intHI_type_node;
18631 tree neon_polyQI_type_node;
18632 tree neon_polyHI_type_node;
18633 tree neon_intSI_type_node;
18634 tree neon_intDI_type_node;
18635 tree neon_float_type_node;
18637 tree intQI_pointer_node;
18638 tree intHI_pointer_node;
18639 tree intSI_pointer_node;
18640 tree intDI_pointer_node;
18641 tree float_pointer_node;
18643 tree const_intQI_node;
18644 tree const_intHI_node;
18645 tree const_intSI_node;
18646 tree const_intDI_node;
18647 tree const_float_node;
18649 tree const_intQI_pointer_node;
18650 tree const_intHI_pointer_node;
18651 tree const_intSI_pointer_node;
18652 tree const_intDI_pointer_node;
18653 tree const_float_pointer_node;
18655 tree V8QI_type_node;
18656 tree V4HI_type_node;
18657 tree V2SI_type_node;
18658 tree V2SF_type_node;
18659 tree V16QI_type_node;
18660 tree V8HI_type_node;
18661 tree V4SI_type_node;
18662 tree V4SF_type_node;
18663 tree V2DI_type_node;
18665 tree intUQI_type_node;
18666 tree intUHI_type_node;
18667 tree intUSI_type_node;
18668 tree intUDI_type_node;
18670 tree intEI_type_node;
18671 tree intOI_type_node;
18672 tree intCI_type_node;
18673 tree intXI_type_node;
18675 tree V8QI_pointer_node;
18676 tree V4HI_pointer_node;
18677 tree V2SI_pointer_node;
18678 tree V2SF_pointer_node;
18679 tree V16QI_pointer_node;
18680 tree V8HI_pointer_node;
18681 tree V4SI_pointer_node;
18682 tree V4SF_pointer_node;
18683 tree V2DI_pointer_node;
18685 tree void_ftype_pv8qi_v8qi_v8qi;
18686 tree void_ftype_pv4hi_v4hi_v4hi;
18687 tree void_ftype_pv2si_v2si_v2si;
18688 tree void_ftype_pv2sf_v2sf_v2sf;
18689 tree void_ftype_pdi_di_di;
18690 tree void_ftype_pv16qi_v16qi_v16qi;
18691 tree void_ftype_pv8hi_v8hi_v8hi;
18692 tree void_ftype_pv4si_v4si_v4si;
18693 tree void_ftype_pv4sf_v4sf_v4sf;
18694 tree void_ftype_pv2di_v2di_v2di;
18696 tree reinterp_ftype_dreg[5][5];
18697 tree reinterp_ftype_qreg[5][5];
18698 tree dreg_types[5], qreg_types[5];
18700 /* Create distinguished type nodes for NEON vector element types,
18701 and pointers to values of such types, so we can detect them later. */
18702 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18703 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18704 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18705 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18706 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18707 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18708 neon_float_type_node = make_node (REAL_TYPE);
18709 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18710 layout_type (neon_float_type_node);
18712 /* Define typedefs which exactly correspond to the modes we are basing vector
18713 types on. If you change these names you'll need to change
18714 the table used by arm_mangle_type too. */
18715 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18716 "__builtin_neon_qi");
18717 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18718 "__builtin_neon_hi");
18719 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18720 "__builtin_neon_si");
18721 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18722 "__builtin_neon_sf");
18723 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18724 "__builtin_neon_di");
18725 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18726 "__builtin_neon_poly8");
18727 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18728 "__builtin_neon_poly16");
18730 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18731 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18732 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18733 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18734 float_pointer_node = build_pointer_type (neon_float_type_node);
18736 /* Next create constant-qualified versions of the above types. */
18737 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18738 TYPE_QUAL_CONST);
18739 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18740 TYPE_QUAL_CONST);
18741 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18742 TYPE_QUAL_CONST);
18743 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18744 TYPE_QUAL_CONST);
18745 const_float_node = build_qualified_type (neon_float_type_node,
18746 TYPE_QUAL_CONST);
18748 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18749 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18750 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18751 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18752 const_float_pointer_node = build_pointer_type (const_float_node);
18754 /* Now create vector types based on our NEON element types. */
18755 /* 64-bit vectors. */
18756 V8QI_type_node =
18757 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18758 V4HI_type_node =
18759 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18760 V2SI_type_node =
18761 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18762 V2SF_type_node =
18763 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18764 /* 128-bit vectors. */
18765 V16QI_type_node =
18766 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18767 V8HI_type_node =
18768 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18769 V4SI_type_node =
18770 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18771 V4SF_type_node =
18772 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18773 V2DI_type_node =
18774 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18776 /* Unsigned integer types for various mode sizes. */
18777 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18778 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18779 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18780 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18782 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18783 "__builtin_neon_uqi");
18784 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18785 "__builtin_neon_uhi");
18786 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18787 "__builtin_neon_usi");
18788 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18789 "__builtin_neon_udi");
18791 /* Opaque integer types for structures of vectors. */
18792 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18793 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18794 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18795 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18797 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18798 "__builtin_neon_ti");
18799 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18800 "__builtin_neon_ei");
18801 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18802 "__builtin_neon_oi");
18803 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18804 "__builtin_neon_ci");
18805 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18806 "__builtin_neon_xi");
18808 /* Pointers to vector types. */
18809 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18810 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18811 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18812 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18813 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18814 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18815 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18816 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18817 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18819 /* Operations which return results as pairs. */
18820 void_ftype_pv8qi_v8qi_v8qi =
18821 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18822 V8QI_type_node, NULL);
18823 void_ftype_pv4hi_v4hi_v4hi =
18824 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18825 V4HI_type_node, NULL);
18826 void_ftype_pv2si_v2si_v2si =
18827 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18828 V2SI_type_node, NULL);
18829 void_ftype_pv2sf_v2sf_v2sf =
18830 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18831 V2SF_type_node, NULL);
18832 void_ftype_pdi_di_di =
18833 build_function_type_list (void_type_node, intDI_pointer_node,
18834 neon_intDI_type_node, neon_intDI_type_node, NULL);
18835 void_ftype_pv16qi_v16qi_v16qi =
18836 build_function_type_list (void_type_node, V16QI_pointer_node,
18837 V16QI_type_node, V16QI_type_node, NULL);
18838 void_ftype_pv8hi_v8hi_v8hi =
18839 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18840 V8HI_type_node, NULL);
18841 void_ftype_pv4si_v4si_v4si =
18842 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18843 V4SI_type_node, NULL);
18844 void_ftype_pv4sf_v4sf_v4sf =
18845 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18846 V4SF_type_node, NULL);
18847 void_ftype_pv2di_v2di_v2di =
18848 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18849 V2DI_type_node, NULL);
18851 dreg_types[0] = V8QI_type_node;
18852 dreg_types[1] = V4HI_type_node;
18853 dreg_types[2] = V2SI_type_node;
18854 dreg_types[3] = V2SF_type_node;
18855 dreg_types[4] = neon_intDI_type_node;
18857 qreg_types[0] = V16QI_type_node;
18858 qreg_types[1] = V8HI_type_node;
18859 qreg_types[2] = V4SI_type_node;
18860 qreg_types[3] = V4SF_type_node;
18861 qreg_types[4] = V2DI_type_node;
18863 for (i = 0; i < 5; i++)
18865 int j;
18866 for (j = 0; j < 5; j++)
18868 reinterp_ftype_dreg[i][j]
18869 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18870 reinterp_ftype_qreg[i][j]
18871 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18875 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18877 neon_builtin_datum *d = &neon_builtin_data[i];
18878 unsigned int j, codeidx = 0;
18880 d->base_fcode = fcode;
18882 for (j = 0; j < T_MAX; j++)
18884 const char* const modenames[] = {
18885 "v8qi", "v4hi", "v2si", "v2sf", "di",
18886 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18888 char namebuf[60];
18889 tree ftype = NULL;
18890 enum insn_code icode;
18891 int is_load = 0, is_store = 0;
18893 if ((d->bits & (1 << j)) == 0)
18894 continue;
18896 icode = d->codes[codeidx++];
18898 switch (d->itype)
18900 case NEON_LOAD1:
18901 case NEON_LOAD1LANE:
18902 case NEON_LOADSTRUCT:
18903 case NEON_LOADSTRUCTLANE:
18904 is_load = 1;
18905 /* Fall through. */
18906 case NEON_STORE1:
18907 case NEON_STORE1LANE:
18908 case NEON_STORESTRUCT:
18909 case NEON_STORESTRUCTLANE:
18910 if (!is_load)
18911 is_store = 1;
18912 /* Fall through. */
18913 case NEON_UNOP:
18914 case NEON_BINOP:
18915 case NEON_LOGICBINOP:
18916 case NEON_SHIFTINSERT:
18917 case NEON_TERNOP:
18918 case NEON_GETLANE:
18919 case NEON_SETLANE:
18920 case NEON_CREATE:
18921 case NEON_DUP:
18922 case NEON_DUPLANE:
18923 case NEON_SHIFTIMM:
18924 case NEON_SHIFTACC:
18925 case NEON_COMBINE:
18926 case NEON_SPLIT:
18927 case NEON_CONVERT:
18928 case NEON_FIXCONV:
18929 case NEON_LANEMUL:
18930 case NEON_LANEMULL:
18931 case NEON_LANEMULH:
18932 case NEON_LANEMAC:
18933 case NEON_SCALARMUL:
18934 case NEON_SCALARMULL:
18935 case NEON_SCALARMULH:
18936 case NEON_SCALARMAC:
18937 case NEON_SELECT:
18938 case NEON_VTBL:
18939 case NEON_VTBX:
18941 int k;
18942 tree return_type = void_type_node, args = void_list_node;
18944 /* Build a function type directly from the insn_data for this
18945 builtin. The build_function_type() function takes care of
18946 removing duplicates for us. */
18947 for (k = insn_data[icode].n_generator_args - 1; k >= 0; k--)
18949 tree eltype;
18951 if (is_load && k == 1)
18953 /* Neon load patterns always have the memory operand
18954 (a SImode pointer) in the operand 1 position. We
18955 want a const pointer to the element type in that
18956 position. */
18957 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18959 switch (1 << j)
18961 case T_V8QI:
18962 case T_V16QI:
18963 eltype = const_intQI_pointer_node;
18964 break;
18966 case T_V4HI:
18967 case T_V8HI:
18968 eltype = const_intHI_pointer_node;
18969 break;
18971 case T_V2SI:
18972 case T_V4SI:
18973 eltype = const_intSI_pointer_node;
18974 break;
18976 case T_V2SF:
18977 case T_V4SF:
18978 eltype = const_float_pointer_node;
18979 break;
18981 case T_DI:
18982 case T_V2DI:
18983 eltype = const_intDI_pointer_node;
18984 break;
18986 default: gcc_unreachable ();
18989 else if (is_store && k == 0)
18991 /* Similarly, Neon store patterns use operand 0 as
18992 the memory location to store to (a SImode pointer).
18993 Use a pointer to the element type of the store in
18994 that position. */
18995 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18997 switch (1 << j)
18999 case T_V8QI:
19000 case T_V16QI:
19001 eltype = intQI_pointer_node;
19002 break;
19004 case T_V4HI:
19005 case T_V8HI:
19006 eltype = intHI_pointer_node;
19007 break;
19009 case T_V2SI:
19010 case T_V4SI:
19011 eltype = intSI_pointer_node;
19012 break;
19014 case T_V2SF:
19015 case T_V4SF:
19016 eltype = float_pointer_node;
19017 break;
19019 case T_DI:
19020 case T_V2DI:
19021 eltype = intDI_pointer_node;
19022 break;
19024 default: gcc_unreachable ();
19027 else
19029 switch (insn_data[icode].operand[k].mode)
19031 case VOIDmode: eltype = void_type_node; break;
19032 /* Scalars. */
19033 case QImode: eltype = neon_intQI_type_node; break;
19034 case HImode: eltype = neon_intHI_type_node; break;
19035 case SImode: eltype = neon_intSI_type_node; break;
19036 case SFmode: eltype = neon_float_type_node; break;
19037 case DImode: eltype = neon_intDI_type_node; break;
19038 case TImode: eltype = intTI_type_node; break;
19039 case EImode: eltype = intEI_type_node; break;
19040 case OImode: eltype = intOI_type_node; break;
19041 case CImode: eltype = intCI_type_node; break;
19042 case XImode: eltype = intXI_type_node; break;
19043 /* 64-bit vectors. */
19044 case V8QImode: eltype = V8QI_type_node; break;
19045 case V4HImode: eltype = V4HI_type_node; break;
19046 case V2SImode: eltype = V2SI_type_node; break;
19047 case V2SFmode: eltype = V2SF_type_node; break;
19048 /* 128-bit vectors. */
19049 case V16QImode: eltype = V16QI_type_node; break;
19050 case V8HImode: eltype = V8HI_type_node; break;
19051 case V4SImode: eltype = V4SI_type_node; break;
19052 case V4SFmode: eltype = V4SF_type_node; break;
19053 case V2DImode: eltype = V2DI_type_node; break;
19054 default: gcc_unreachable ();
19058 if (k == 0 && !is_store)
19059 return_type = eltype;
19060 else
19061 args = tree_cons (NULL_TREE, eltype, args);
19064 ftype = build_function_type (return_type, args);
19066 break;
19068 case NEON_RESULTPAIR:
19070 switch (insn_data[icode].operand[1].mode)
19072 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19073 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19074 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19075 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19076 case DImode: ftype = void_ftype_pdi_di_di; break;
19077 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19078 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19079 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19080 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19081 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19082 default: gcc_unreachable ();
19085 break;
19087 case NEON_REINTERP:
19089 /* We iterate over 5 doubleword types, then 5 quadword
19090 types. */
19091 int rhs = j % 5;
19092 switch (insn_data[icode].operand[0].mode)
19094 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19095 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19096 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19097 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19098 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19099 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19100 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19101 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19102 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19103 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19104 default: gcc_unreachable ();
19107 break;
19109 default:
19110 gcc_unreachable ();
19113 gcc_assert (ftype != NULL);
19115 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
19117 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
19118 NULL_TREE);
19123 static void
19124 arm_init_fp16_builtins (void)
19126 tree fp16_type = make_node (REAL_TYPE);
19127 TYPE_PRECISION (fp16_type) = 16;
19128 layout_type (fp16_type);
19129 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19132 static void
19133 arm_init_builtins (void)
19135 arm_init_tls_builtins ();
19137 if (TARGET_REALLY_IWMMXT)
19138 arm_init_iwmmxt_builtins ();
19140 if (TARGET_NEON)
19141 arm_init_neon_builtins ();
19143 if (arm_fp16_format)
19144 arm_init_fp16_builtins ();
19147 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19149 static const char *
19150 arm_invalid_parameter_type (const_tree t)
19152 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19153 return N_("function parameters cannot have __fp16 type");
19154 return NULL;
19157 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19159 static const char *
19160 arm_invalid_return_type (const_tree t)
19162 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19163 return N_("functions cannot return __fp16 type");
19164 return NULL;
19167 /* Implement TARGET_PROMOTED_TYPE. */
19169 static tree
19170 arm_promoted_type (const_tree t)
19172 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19173 return float_type_node;
19174 return NULL_TREE;
19177 /* Implement TARGET_CONVERT_TO_TYPE.
19178 Specifically, this hook implements the peculiarity of the ARM
19179 half-precision floating-point C semantics that requires conversions between
19180 __fp16 to or from double to do an intermediate conversion to float. */
19182 static tree
19183 arm_convert_to_type (tree type, tree expr)
19185 tree fromtype = TREE_TYPE (expr);
19186 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19187 return NULL_TREE;
19188 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19189 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19190 return convert (type, convert (float_type_node, expr));
19191 return NULL_TREE;
19194 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19195 This simply adds HFmode as a supported mode; even though we don't
19196 implement arithmetic on this type directly, it's supported by
19197 optabs conversions, much the way the double-word arithmetic is
19198 special-cased in the default hook. */
19200 static bool
19201 arm_scalar_mode_supported_p (enum machine_mode mode)
19203 if (mode == HFmode)
19204 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19205 else
19206 return default_scalar_mode_supported_p (mode);
19209 /* Errors in the source file can cause expand_expr to return const0_rtx
19210 where we expect a vector. To avoid crashing, use one of the vector
19211 clear instructions. */
19213 static rtx
19214 safe_vector_operand (rtx x, enum machine_mode mode)
19216 if (x != const0_rtx)
19217 return x;
19218 x = gen_reg_rtx (mode);
19220 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19221 : gen_rtx_SUBREG (DImode, x, 0)));
19222 return x;
19225 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19227 static rtx
19228 arm_expand_binop_builtin (enum insn_code icode,
19229 tree exp, rtx target)
19231 rtx pat;
19232 tree arg0 = CALL_EXPR_ARG (exp, 0);
19233 tree arg1 = CALL_EXPR_ARG (exp, 1);
19234 rtx op0 = expand_normal (arg0);
19235 rtx op1 = expand_normal (arg1);
19236 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19237 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19238 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19240 if (VECTOR_MODE_P (mode0))
19241 op0 = safe_vector_operand (op0, mode0);
19242 if (VECTOR_MODE_P (mode1))
19243 op1 = safe_vector_operand (op1, mode1);
19245 if (! target
19246 || GET_MODE (target) != tmode
19247 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19248 target = gen_reg_rtx (tmode);
19250 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19252 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19253 op0 = copy_to_mode_reg (mode0, op0);
19254 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19255 op1 = copy_to_mode_reg (mode1, op1);
19257 pat = GEN_FCN (icode) (target, op0, op1);
19258 if (! pat)
19259 return 0;
19260 emit_insn (pat);
19261 return target;
19264 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19266 static rtx
19267 arm_expand_unop_builtin (enum insn_code icode,
19268 tree exp, rtx target, int do_load)
19270 rtx pat;
19271 tree arg0 = CALL_EXPR_ARG (exp, 0);
19272 rtx op0 = expand_normal (arg0);
19273 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19274 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19276 if (! target
19277 || GET_MODE (target) != tmode
19278 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19279 target = gen_reg_rtx (tmode);
19280 if (do_load)
19281 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19282 else
19284 if (VECTOR_MODE_P (mode0))
19285 op0 = safe_vector_operand (op0, mode0);
19287 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19288 op0 = copy_to_mode_reg (mode0, op0);
19291 pat = GEN_FCN (icode) (target, op0);
19292 if (! pat)
19293 return 0;
19294 emit_insn (pat);
19295 return target;
19298 static int
19299 neon_builtin_compare (const void *a, const void *b)
19301 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19302 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19303 unsigned int soughtcode = key->base_fcode;
19305 if (soughtcode >= memb->base_fcode
19306 && soughtcode < memb->base_fcode + memb->num_vars)
19307 return 0;
19308 else if (soughtcode < memb->base_fcode)
19309 return -1;
19310 else
19311 return 1;
19314 static enum insn_code
19315 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19317 neon_builtin_datum key
19318 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19319 neon_builtin_datum *found;
19320 int idx;
19322 key.base_fcode = fcode;
19323 found = (neon_builtin_datum *)
19324 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19325 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19326 gcc_assert (found);
19327 idx = fcode - (int) found->base_fcode;
19328 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19330 if (itype)
19331 *itype = found->itype;
19333 return found->codes[idx];
19336 typedef enum {
19337 NEON_ARG_COPY_TO_REG,
19338 NEON_ARG_CONSTANT,
19339 NEON_ARG_STOP
19340 } builtin_arg;
19342 #define NEON_MAX_BUILTIN_ARGS 5
19344 /* Expand a Neon builtin. */
19345 static rtx
19346 arm_expand_neon_args (rtx target, int icode, int have_retval,
19347 tree exp, ...)
19349 va_list ap;
19350 rtx pat;
19351 tree arg[NEON_MAX_BUILTIN_ARGS];
19352 rtx op[NEON_MAX_BUILTIN_ARGS];
19353 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19354 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19355 int argc = 0;
19357 if (have_retval
19358 && (!target
19359 || GET_MODE (target) != tmode
19360 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19361 target = gen_reg_rtx (tmode);
19363 va_start (ap, exp);
19365 for (;;)
19367 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19369 if (thisarg == NEON_ARG_STOP)
19370 break;
19371 else
19373 arg[argc] = CALL_EXPR_ARG (exp, argc);
19374 op[argc] = expand_normal (arg[argc]);
19375 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19377 switch (thisarg)
19379 case NEON_ARG_COPY_TO_REG:
19380 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19381 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19382 (op[argc], mode[argc]))
19383 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19384 break;
19386 case NEON_ARG_CONSTANT:
19387 /* FIXME: This error message is somewhat unhelpful. */
19388 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19389 (op[argc], mode[argc]))
19390 error ("argument must be a constant");
19391 break;
19393 case NEON_ARG_STOP:
19394 gcc_unreachable ();
19397 argc++;
19401 va_end (ap);
19403 if (have_retval)
19404 switch (argc)
19406 case 1:
19407 pat = GEN_FCN (icode) (target, op[0]);
19408 break;
19410 case 2:
19411 pat = GEN_FCN (icode) (target, op[0], op[1]);
19412 break;
19414 case 3:
19415 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19416 break;
19418 case 4:
19419 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19420 break;
19422 case 5:
19423 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19424 break;
19426 default:
19427 gcc_unreachable ();
19429 else
19430 switch (argc)
19432 case 1:
19433 pat = GEN_FCN (icode) (op[0]);
19434 break;
19436 case 2:
19437 pat = GEN_FCN (icode) (op[0], op[1]);
19438 break;
19440 case 3:
19441 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19442 break;
19444 case 4:
19445 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19446 break;
19448 case 5:
19449 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19450 break;
19452 default:
19453 gcc_unreachable ();
19456 if (!pat)
19457 return 0;
19459 emit_insn (pat);
19461 return target;
19464 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19465 constants defined per-instruction or per instruction-variant. Instead, the
19466 required info is looked up in the table neon_builtin_data. */
19467 static rtx
19468 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19470 neon_itype itype;
19471 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19473 switch (itype)
19475 case NEON_UNOP:
19476 case NEON_CONVERT:
19477 case NEON_DUPLANE:
19478 return arm_expand_neon_args (target, icode, 1, exp,
19479 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19481 case NEON_BINOP:
19482 case NEON_SETLANE:
19483 case NEON_SCALARMUL:
19484 case NEON_SCALARMULL:
19485 case NEON_SCALARMULH:
19486 case NEON_SHIFTINSERT:
19487 case NEON_LOGICBINOP:
19488 return arm_expand_neon_args (target, icode, 1, exp,
19489 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19490 NEON_ARG_STOP);
19492 case NEON_TERNOP:
19493 return arm_expand_neon_args (target, icode, 1, exp,
19494 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19495 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19497 case NEON_GETLANE:
19498 case NEON_FIXCONV:
19499 case NEON_SHIFTIMM:
19500 return arm_expand_neon_args (target, icode, 1, exp,
19501 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19502 NEON_ARG_STOP);
19504 case NEON_CREATE:
19505 return arm_expand_neon_args (target, icode, 1, exp,
19506 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19508 case NEON_DUP:
19509 case NEON_SPLIT:
19510 case NEON_REINTERP:
19511 return arm_expand_neon_args (target, icode, 1, exp,
19512 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19514 case NEON_COMBINE:
19515 case NEON_VTBL:
19516 return arm_expand_neon_args (target, icode, 1, exp,
19517 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19519 case NEON_RESULTPAIR:
19520 return arm_expand_neon_args (target, icode, 0, exp,
19521 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19522 NEON_ARG_STOP);
19524 case NEON_LANEMUL:
19525 case NEON_LANEMULL:
19526 case NEON_LANEMULH:
19527 return arm_expand_neon_args (target, icode, 1, exp,
19528 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19529 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19531 case NEON_LANEMAC:
19532 return arm_expand_neon_args (target, icode, 1, exp,
19533 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19534 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19536 case NEON_SHIFTACC:
19537 return arm_expand_neon_args (target, icode, 1, exp,
19538 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19539 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19541 case NEON_SCALARMAC:
19542 return arm_expand_neon_args (target, icode, 1, exp,
19543 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19544 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19546 case NEON_SELECT:
19547 case NEON_VTBX:
19548 return arm_expand_neon_args (target, icode, 1, exp,
19549 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19550 NEON_ARG_STOP);
19552 case NEON_LOAD1:
19553 case NEON_LOADSTRUCT:
19554 return arm_expand_neon_args (target, icode, 1, exp,
19555 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19557 case NEON_LOAD1LANE:
19558 case NEON_LOADSTRUCTLANE:
19559 return arm_expand_neon_args (target, icode, 1, exp,
19560 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19561 NEON_ARG_STOP);
19563 case NEON_STORE1:
19564 case NEON_STORESTRUCT:
19565 return arm_expand_neon_args (target, icode, 0, exp,
19566 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19568 case NEON_STORE1LANE:
19569 case NEON_STORESTRUCTLANE:
19570 return arm_expand_neon_args (target, icode, 0, exp,
19571 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19572 NEON_ARG_STOP);
19575 gcc_unreachable ();
19578 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19579 void
19580 neon_reinterpret (rtx dest, rtx src)
19582 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19585 /* Emit code to place a Neon pair result in memory locations (with equal
19586 registers). */
19587 void
19588 neon_emit_pair_result_insn (enum machine_mode mode,
19589 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19590 rtx op1, rtx op2)
19592 rtx mem = gen_rtx_MEM (mode, destaddr);
19593 rtx tmp1 = gen_reg_rtx (mode);
19594 rtx tmp2 = gen_reg_rtx (mode);
19596 emit_insn (intfn (tmp1, op1, tmp2, op2));
19598 emit_move_insn (mem, tmp1);
19599 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19600 emit_move_insn (mem, tmp2);
19603 /* Set up operands for a register copy from src to dest, taking care not to
19604 clobber registers in the process.
19605 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19606 be called with a large N, so that should be OK. */
19608 void
19609 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19611 unsigned int copied = 0, opctr = 0;
19612 unsigned int done = (1 << count) - 1;
19613 unsigned int i, j;
19615 while (copied != done)
19617 for (i = 0; i < count; i++)
19619 int good = 1;
19621 for (j = 0; good && j < count; j++)
19622 if (i != j && (copied & (1 << j)) == 0
19623 && reg_overlap_mentioned_p (src[j], dest[i]))
19624 good = 0;
19626 if (good)
19628 operands[opctr++] = dest[i];
19629 operands[opctr++] = src[i];
19630 copied |= 1 << i;
19635 gcc_assert (opctr == count * 2);
19638 /* Expand an expression EXP that calls a built-in function,
19639 with result going to TARGET if that's convenient
19640 (and in mode MODE if that's convenient).
19641 SUBTARGET may be used as the target for computing one of EXP's operands.
19642 IGNORE is nonzero if the value is to be ignored. */
19644 static rtx
19645 arm_expand_builtin (tree exp,
19646 rtx target,
19647 rtx subtarget ATTRIBUTE_UNUSED,
19648 enum machine_mode mode ATTRIBUTE_UNUSED,
19649 int ignore ATTRIBUTE_UNUSED)
19651 const struct builtin_description * d;
19652 enum insn_code icode;
19653 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19654 tree arg0;
19655 tree arg1;
19656 tree arg2;
19657 rtx op0;
19658 rtx op1;
19659 rtx op2;
19660 rtx pat;
19661 int fcode = DECL_FUNCTION_CODE (fndecl);
19662 size_t i;
19663 enum machine_mode tmode;
19664 enum machine_mode mode0;
19665 enum machine_mode mode1;
19666 enum machine_mode mode2;
19668 if (fcode >= ARM_BUILTIN_NEON_BASE)
19669 return arm_expand_neon_builtin (fcode, exp, target);
19671 switch (fcode)
19673 case ARM_BUILTIN_TEXTRMSB:
19674 case ARM_BUILTIN_TEXTRMUB:
19675 case ARM_BUILTIN_TEXTRMSH:
19676 case ARM_BUILTIN_TEXTRMUH:
19677 case ARM_BUILTIN_TEXTRMSW:
19678 case ARM_BUILTIN_TEXTRMUW:
19679 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19680 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19681 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19682 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19683 : CODE_FOR_iwmmxt_textrmw);
19685 arg0 = CALL_EXPR_ARG (exp, 0);
19686 arg1 = CALL_EXPR_ARG (exp, 1);
19687 op0 = expand_normal (arg0);
19688 op1 = expand_normal (arg1);
19689 tmode = insn_data[icode].operand[0].mode;
19690 mode0 = insn_data[icode].operand[1].mode;
19691 mode1 = insn_data[icode].operand[2].mode;
19693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19694 op0 = copy_to_mode_reg (mode0, op0);
19695 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19697 /* @@@ better error message */
19698 error ("selector must be an immediate");
19699 return gen_reg_rtx (tmode);
19701 if (target == 0
19702 || GET_MODE (target) != tmode
19703 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19704 target = gen_reg_rtx (tmode);
19705 pat = GEN_FCN (icode) (target, op0, op1);
19706 if (! pat)
19707 return 0;
19708 emit_insn (pat);
19709 return target;
19711 case ARM_BUILTIN_TINSRB:
19712 case ARM_BUILTIN_TINSRH:
19713 case ARM_BUILTIN_TINSRW:
19714 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19715 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19716 : CODE_FOR_iwmmxt_tinsrw);
19717 arg0 = CALL_EXPR_ARG (exp, 0);
19718 arg1 = CALL_EXPR_ARG (exp, 1);
19719 arg2 = CALL_EXPR_ARG (exp, 2);
19720 op0 = expand_normal (arg0);
19721 op1 = expand_normal (arg1);
19722 op2 = expand_normal (arg2);
19723 tmode = insn_data[icode].operand[0].mode;
19724 mode0 = insn_data[icode].operand[1].mode;
19725 mode1 = insn_data[icode].operand[2].mode;
19726 mode2 = insn_data[icode].operand[3].mode;
19728 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19729 op0 = copy_to_mode_reg (mode0, op0);
19730 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19731 op1 = copy_to_mode_reg (mode1, op1);
19732 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19734 /* @@@ better error message */
19735 error ("selector must be an immediate");
19736 return const0_rtx;
19738 if (target == 0
19739 || GET_MODE (target) != tmode
19740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19741 target = gen_reg_rtx (tmode);
19742 pat = GEN_FCN (icode) (target, op0, op1, op2);
19743 if (! pat)
19744 return 0;
19745 emit_insn (pat);
19746 return target;
19748 case ARM_BUILTIN_SETWCX:
19749 arg0 = CALL_EXPR_ARG (exp, 0);
19750 arg1 = CALL_EXPR_ARG (exp, 1);
19751 op0 = force_reg (SImode, expand_normal (arg0));
19752 op1 = expand_normal (arg1);
19753 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19754 return 0;
19756 case ARM_BUILTIN_GETWCX:
19757 arg0 = CALL_EXPR_ARG (exp, 0);
19758 op0 = expand_normal (arg0);
19759 target = gen_reg_rtx (SImode);
19760 emit_insn (gen_iwmmxt_tmrc (target, op0));
19761 return target;
19763 case ARM_BUILTIN_WSHUFH:
19764 icode = CODE_FOR_iwmmxt_wshufh;
19765 arg0 = CALL_EXPR_ARG (exp, 0);
19766 arg1 = CALL_EXPR_ARG (exp, 1);
19767 op0 = expand_normal (arg0);
19768 op1 = expand_normal (arg1);
19769 tmode = insn_data[icode].operand[0].mode;
19770 mode1 = insn_data[icode].operand[1].mode;
19771 mode2 = insn_data[icode].operand[2].mode;
19773 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19774 op0 = copy_to_mode_reg (mode1, op0);
19775 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19777 /* @@@ better error message */
19778 error ("mask must be an immediate");
19779 return const0_rtx;
19781 if (target == 0
19782 || GET_MODE (target) != tmode
19783 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19784 target = gen_reg_rtx (tmode);
19785 pat = GEN_FCN (icode) (target, op0, op1);
19786 if (! pat)
19787 return 0;
19788 emit_insn (pat);
19789 return target;
19791 case ARM_BUILTIN_WSADB:
19792 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19793 case ARM_BUILTIN_WSADH:
19794 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19795 case ARM_BUILTIN_WSADBZ:
19796 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19797 case ARM_BUILTIN_WSADHZ:
19798 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19800 /* Several three-argument builtins. */
19801 case ARM_BUILTIN_WMACS:
19802 case ARM_BUILTIN_WMACU:
19803 case ARM_BUILTIN_WALIGN:
19804 case ARM_BUILTIN_TMIA:
19805 case ARM_BUILTIN_TMIAPH:
19806 case ARM_BUILTIN_TMIATT:
19807 case ARM_BUILTIN_TMIATB:
19808 case ARM_BUILTIN_TMIABT:
19809 case ARM_BUILTIN_TMIABB:
19810 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19811 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19812 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19813 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19814 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19815 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19816 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19817 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19818 : CODE_FOR_iwmmxt_walign);
19819 arg0 = CALL_EXPR_ARG (exp, 0);
19820 arg1 = CALL_EXPR_ARG (exp, 1);
19821 arg2 = CALL_EXPR_ARG (exp, 2);
19822 op0 = expand_normal (arg0);
19823 op1 = expand_normal (arg1);
19824 op2 = expand_normal (arg2);
19825 tmode = insn_data[icode].operand[0].mode;
19826 mode0 = insn_data[icode].operand[1].mode;
19827 mode1 = insn_data[icode].operand[2].mode;
19828 mode2 = insn_data[icode].operand[3].mode;
19830 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19831 op0 = copy_to_mode_reg (mode0, op0);
19832 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19833 op1 = copy_to_mode_reg (mode1, op1);
19834 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19835 op2 = copy_to_mode_reg (mode2, op2);
19836 if (target == 0
19837 || GET_MODE (target) != tmode
19838 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19839 target = gen_reg_rtx (tmode);
19840 pat = GEN_FCN (icode) (target, op0, op1, op2);
19841 if (! pat)
19842 return 0;
19843 emit_insn (pat);
19844 return target;
19846 case ARM_BUILTIN_WZERO:
19847 target = gen_reg_rtx (DImode);
19848 emit_insn (gen_iwmmxt_clrdi (target));
19849 return target;
19851 case ARM_BUILTIN_THREAD_POINTER:
19852 return arm_load_tp (target);
19854 default:
19855 break;
19858 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19859 if (d->code == (const enum arm_builtins) fcode)
19860 return arm_expand_binop_builtin (d->icode, exp, target);
19862 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19863 if (d->code == (const enum arm_builtins) fcode)
19864 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19866 /* @@@ Should really do something sensible here. */
19867 return NULL_RTX;
19870 /* Return the number (counting from 0) of
19871 the least significant set bit in MASK. */
19873 inline static int
19874 number_of_first_bit_set (unsigned mask)
19876 int bit;
19878 for (bit = 0;
19879 (mask & (1 << bit)) == 0;
19880 ++bit)
19881 continue;
19883 return bit;
19886 /* Emit code to push or pop registers to or from the stack. F is the
19887 assembly file. MASK is the registers to push or pop. PUSH is
19888 nonzero if we should push, and zero if we should pop. For debugging
19889 output, if pushing, adjust CFA_OFFSET by the amount of space added
19890 to the stack. REAL_REGS should have the same number of bits set as
19891 MASK, and will be used instead (in the same order) to describe which
19892 registers were saved - this is used to mark the save slots when we
19893 push high registers after moving them to low registers. */
19894 static void
19895 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19896 unsigned long real_regs)
19898 int regno;
19899 int lo_mask = mask & 0xFF;
19900 int pushed_words = 0;
19902 gcc_assert (mask);
19904 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19906 /* Special case. Do not generate a POP PC statement here, do it in
19907 thumb_exit() */
19908 thumb_exit (f, -1);
19909 return;
19912 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19914 fprintf (f, "\t.save\t{");
19915 for (regno = 0; regno < 15; regno++)
19917 if (real_regs & (1 << regno))
19919 if (real_regs & ((1 << regno) -1))
19920 fprintf (f, ", ");
19921 asm_fprintf (f, "%r", regno);
19924 fprintf (f, "}\n");
19927 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19929 /* Look at the low registers first. */
19930 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19932 if (lo_mask & 1)
19934 asm_fprintf (f, "%r", regno);
19936 if ((lo_mask & ~1) != 0)
19937 fprintf (f, ", ");
19939 pushed_words++;
19943 if (push && (mask & (1 << LR_REGNUM)))
19945 /* Catch pushing the LR. */
19946 if (mask & 0xFF)
19947 fprintf (f, ", ");
19949 asm_fprintf (f, "%r", LR_REGNUM);
19951 pushed_words++;
19953 else if (!push && (mask & (1 << PC_REGNUM)))
19955 /* Catch popping the PC. */
19956 if (TARGET_INTERWORK || TARGET_BACKTRACE
19957 || crtl->calls_eh_return)
19959 /* The PC is never poped directly, instead
19960 it is popped into r3 and then BX is used. */
19961 fprintf (f, "}\n");
19963 thumb_exit (f, -1);
19965 return;
19967 else
19969 if (mask & 0xFF)
19970 fprintf (f, ", ");
19972 asm_fprintf (f, "%r", PC_REGNUM);
19976 fprintf (f, "}\n");
19978 if (push && pushed_words && dwarf2out_do_frame ())
19980 char *l = dwarf2out_cfi_label (false);
19981 int pushed_mask = real_regs;
19983 *cfa_offset += pushed_words * 4;
19984 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19986 pushed_words = 0;
19987 pushed_mask = real_regs;
19988 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19990 if (pushed_mask & 1)
19991 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19996 /* Generate code to return from a thumb function.
19997 If 'reg_containing_return_addr' is -1, then the return address is
19998 actually on the stack, at the stack pointer. */
19999 static void
20000 thumb_exit (FILE *f, int reg_containing_return_addr)
20002 unsigned regs_available_for_popping;
20003 unsigned regs_to_pop;
20004 int pops_needed;
20005 unsigned available;
20006 unsigned required;
20007 int mode;
20008 int size;
20009 int restore_a4 = FALSE;
20011 /* Compute the registers we need to pop. */
20012 regs_to_pop = 0;
20013 pops_needed = 0;
20015 if (reg_containing_return_addr == -1)
20017 regs_to_pop |= 1 << LR_REGNUM;
20018 ++pops_needed;
20021 if (TARGET_BACKTRACE)
20023 /* Restore the (ARM) frame pointer and stack pointer. */
20024 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20025 pops_needed += 2;
20028 /* If there is nothing to pop then just emit the BX instruction and
20029 return. */
20030 if (pops_needed == 0)
20032 if (crtl->calls_eh_return)
20033 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20035 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20036 return;
20038 /* Otherwise if we are not supporting interworking and we have not created
20039 a backtrace structure and the function was not entered in ARM mode then
20040 just pop the return address straight into the PC. */
20041 else if (!TARGET_INTERWORK
20042 && !TARGET_BACKTRACE
20043 && !is_called_in_ARM_mode (current_function_decl)
20044 && !crtl->calls_eh_return)
20046 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20047 return;
20050 /* Find out how many of the (return) argument registers we can corrupt. */
20051 regs_available_for_popping = 0;
20053 /* If returning via __builtin_eh_return, the bottom three registers
20054 all contain information needed for the return. */
20055 if (crtl->calls_eh_return)
20056 size = 12;
20057 else
20059 /* If we can deduce the registers used from the function's
20060 return value. This is more reliable that examining
20061 df_regs_ever_live_p () because that will be set if the register is
20062 ever used in the function, not just if the register is used
20063 to hold a return value. */
20065 if (crtl->return_rtx != 0)
20066 mode = GET_MODE (crtl->return_rtx);
20067 else
20068 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20070 size = GET_MODE_SIZE (mode);
20072 if (size == 0)
20074 /* In a void function we can use any argument register.
20075 In a function that returns a structure on the stack
20076 we can use the second and third argument registers. */
20077 if (mode == VOIDmode)
20078 regs_available_for_popping =
20079 (1 << ARG_REGISTER (1))
20080 | (1 << ARG_REGISTER (2))
20081 | (1 << ARG_REGISTER (3));
20082 else
20083 regs_available_for_popping =
20084 (1 << ARG_REGISTER (2))
20085 | (1 << ARG_REGISTER (3));
20087 else if (size <= 4)
20088 regs_available_for_popping =
20089 (1 << ARG_REGISTER (2))
20090 | (1 << ARG_REGISTER (3));
20091 else if (size <= 8)
20092 regs_available_for_popping =
20093 (1 << ARG_REGISTER (3));
20096 /* Match registers to be popped with registers into which we pop them. */
20097 for (available = regs_available_for_popping,
20098 required = regs_to_pop;
20099 required != 0 && available != 0;
20100 available &= ~(available & - available),
20101 required &= ~(required & - required))
20102 -- pops_needed;
20104 /* If we have any popping registers left over, remove them. */
20105 if (available > 0)
20106 regs_available_for_popping &= ~available;
20108 /* Otherwise if we need another popping register we can use
20109 the fourth argument register. */
20110 else if (pops_needed)
20112 /* If we have not found any free argument registers and
20113 reg a4 contains the return address, we must move it. */
20114 if (regs_available_for_popping == 0
20115 && reg_containing_return_addr == LAST_ARG_REGNUM)
20117 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20118 reg_containing_return_addr = LR_REGNUM;
20120 else if (size > 12)
20122 /* Register a4 is being used to hold part of the return value,
20123 but we have dire need of a free, low register. */
20124 restore_a4 = TRUE;
20126 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20129 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20131 /* The fourth argument register is available. */
20132 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20134 --pops_needed;
20138 /* Pop as many registers as we can. */
20139 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20140 regs_available_for_popping);
20142 /* Process the registers we popped. */
20143 if (reg_containing_return_addr == -1)
20145 /* The return address was popped into the lowest numbered register. */
20146 regs_to_pop &= ~(1 << LR_REGNUM);
20148 reg_containing_return_addr =
20149 number_of_first_bit_set (regs_available_for_popping);
20151 /* Remove this register for the mask of available registers, so that
20152 the return address will not be corrupted by further pops. */
20153 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20156 /* If we popped other registers then handle them here. */
20157 if (regs_available_for_popping)
20159 int frame_pointer;
20161 /* Work out which register currently contains the frame pointer. */
20162 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20164 /* Move it into the correct place. */
20165 asm_fprintf (f, "\tmov\t%r, %r\n",
20166 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20168 /* (Temporarily) remove it from the mask of popped registers. */
20169 regs_available_for_popping &= ~(1 << frame_pointer);
20170 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20172 if (regs_available_for_popping)
20174 int stack_pointer;
20176 /* We popped the stack pointer as well,
20177 find the register that contains it. */
20178 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20180 /* Move it into the stack register. */
20181 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20183 /* At this point we have popped all necessary registers, so
20184 do not worry about restoring regs_available_for_popping
20185 to its correct value:
20187 assert (pops_needed == 0)
20188 assert (regs_available_for_popping == (1 << frame_pointer))
20189 assert (regs_to_pop == (1 << STACK_POINTER)) */
20191 else
20193 /* Since we have just move the popped value into the frame
20194 pointer, the popping register is available for reuse, and
20195 we know that we still have the stack pointer left to pop. */
20196 regs_available_for_popping |= (1 << frame_pointer);
20200 /* If we still have registers left on the stack, but we no longer have
20201 any registers into which we can pop them, then we must move the return
20202 address into the link register and make available the register that
20203 contained it. */
20204 if (regs_available_for_popping == 0 && pops_needed > 0)
20206 regs_available_for_popping |= 1 << reg_containing_return_addr;
20208 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20209 reg_containing_return_addr);
20211 reg_containing_return_addr = LR_REGNUM;
20214 /* If we have registers left on the stack then pop some more.
20215 We know that at most we will want to pop FP and SP. */
20216 if (pops_needed > 0)
20218 int popped_into;
20219 int move_to;
20221 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20222 regs_available_for_popping);
20224 /* We have popped either FP or SP.
20225 Move whichever one it is into the correct register. */
20226 popped_into = number_of_first_bit_set (regs_available_for_popping);
20227 move_to = number_of_first_bit_set (regs_to_pop);
20229 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20231 regs_to_pop &= ~(1 << move_to);
20233 --pops_needed;
20236 /* If we still have not popped everything then we must have only
20237 had one register available to us and we are now popping the SP. */
20238 if (pops_needed > 0)
20240 int popped_into;
20242 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20243 regs_available_for_popping);
20245 popped_into = number_of_first_bit_set (regs_available_for_popping);
20247 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20249 assert (regs_to_pop == (1 << STACK_POINTER))
20250 assert (pops_needed == 1)
20254 /* If necessary restore the a4 register. */
20255 if (restore_a4)
20257 if (reg_containing_return_addr != LR_REGNUM)
20259 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20260 reg_containing_return_addr = LR_REGNUM;
20263 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20266 if (crtl->calls_eh_return)
20267 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20269 /* Return to caller. */
20270 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20273 /* Scan INSN just before assembler is output for it.
20274 For Thumb-1, we track the status of the condition codes; this
20275 information is used in the cbranchsi4_insn pattern. */
20276 void
20277 thumb1_final_prescan_insn (rtx insn)
20279 if (flag_print_asm_name)
20280 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20281 INSN_ADDRESSES (INSN_UID (insn)));
20282 /* Don't overwrite the previous setter when we get to a cbranch. */
20283 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20285 enum attr_conds conds;
20287 if (cfun->machine->thumb1_cc_insn)
20289 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20290 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20291 CC_STATUS_INIT;
20293 conds = get_attr_conds (insn);
20294 if (conds == CONDS_SET)
20296 rtx set = single_set (insn);
20297 cfun->machine->thumb1_cc_insn = insn;
20298 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20299 cfun->machine->thumb1_cc_op1 = const0_rtx;
20300 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20301 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20303 rtx src1 = XEXP (SET_SRC (set), 1);
20304 if (src1 == const0_rtx)
20305 cfun->machine->thumb1_cc_mode = CCmode;
20308 else if (conds != CONDS_NOCOND)
20309 cfun->machine->thumb1_cc_insn = NULL_RTX;
20314 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20316 unsigned HOST_WIDE_INT mask = 0xff;
20317 int i;
20319 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20320 if (val == 0) /* XXX */
20321 return 0;
20323 for (i = 0; i < 25; i++)
20324 if ((val & (mask << i)) == val)
20325 return 1;
20327 return 0;
20330 /* Returns nonzero if the current function contains,
20331 or might contain a far jump. */
20332 static int
20333 thumb_far_jump_used_p (void)
20335 rtx insn;
20337 /* This test is only important for leaf functions. */
20338 /* assert (!leaf_function_p ()); */
20340 /* If we have already decided that far jumps may be used,
20341 do not bother checking again, and always return true even if
20342 it turns out that they are not being used. Once we have made
20343 the decision that far jumps are present (and that hence the link
20344 register will be pushed onto the stack) we cannot go back on it. */
20345 if (cfun->machine->far_jump_used)
20346 return 1;
20348 /* If this function is not being called from the prologue/epilogue
20349 generation code then it must be being called from the
20350 INITIAL_ELIMINATION_OFFSET macro. */
20351 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20353 /* In this case we know that we are being asked about the elimination
20354 of the arg pointer register. If that register is not being used,
20355 then there are no arguments on the stack, and we do not have to
20356 worry that a far jump might force the prologue to push the link
20357 register, changing the stack offsets. In this case we can just
20358 return false, since the presence of far jumps in the function will
20359 not affect stack offsets.
20361 If the arg pointer is live (or if it was live, but has now been
20362 eliminated and so set to dead) then we do have to test to see if
20363 the function might contain a far jump. This test can lead to some
20364 false negatives, since before reload is completed, then length of
20365 branch instructions is not known, so gcc defaults to returning their
20366 longest length, which in turn sets the far jump attribute to true.
20368 A false negative will not result in bad code being generated, but it
20369 will result in a needless push and pop of the link register. We
20370 hope that this does not occur too often.
20372 If we need doubleword stack alignment this could affect the other
20373 elimination offsets so we can't risk getting it wrong. */
20374 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20375 cfun->machine->arg_pointer_live = 1;
20376 else if (!cfun->machine->arg_pointer_live)
20377 return 0;
20380 /* Check to see if the function contains a branch
20381 insn with the far jump attribute set. */
20382 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20384 if (GET_CODE (insn) == JUMP_INSN
20385 /* Ignore tablejump patterns. */
20386 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20387 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20388 && get_attr_far_jump (insn) == FAR_JUMP_YES
20391 /* Record the fact that we have decided that
20392 the function does use far jumps. */
20393 cfun->machine->far_jump_used = 1;
20394 return 1;
20398 return 0;
20401 /* Return nonzero if FUNC must be entered in ARM mode. */
20403 is_called_in_ARM_mode (tree func)
20405 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20407 /* Ignore the problem about functions whose address is taken. */
20408 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20409 return TRUE;
20411 #ifdef ARM_PE
20412 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20413 #else
20414 return FALSE;
20415 #endif
20418 /* Given the stack offsets and register mask in OFFSETS, decide how
20419 many additional registers to push instead of subtracting a constant
20420 from SP. For epilogues the principle is the same except we use pop.
20421 FOR_PROLOGUE indicates which we're generating. */
20422 static int
20423 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20425 HOST_WIDE_INT amount;
20426 unsigned long live_regs_mask = offsets->saved_regs_mask;
20427 /* Extract a mask of the ones we can give to the Thumb's push/pop
20428 instruction. */
20429 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20430 /* Then count how many other high registers will need to be pushed. */
20431 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20432 int n_free, reg_base;
20434 if (!for_prologue && frame_pointer_needed)
20435 amount = offsets->locals_base - offsets->saved_regs;
20436 else
20437 amount = offsets->outgoing_args - offsets->saved_regs;
20439 /* If the stack frame size is 512 exactly, we can save one load
20440 instruction, which should make this a win even when optimizing
20441 for speed. */
20442 if (!optimize_size && amount != 512)
20443 return 0;
20445 /* Can't do this if there are high registers to push. */
20446 if (high_regs_pushed != 0)
20447 return 0;
20449 /* Shouldn't do it in the prologue if no registers would normally
20450 be pushed at all. In the epilogue, also allow it if we'll have
20451 a pop insn for the PC. */
20452 if (l_mask == 0
20453 && (for_prologue
20454 || TARGET_BACKTRACE
20455 || (live_regs_mask & 1 << LR_REGNUM) == 0
20456 || TARGET_INTERWORK
20457 || crtl->args.pretend_args_size != 0))
20458 return 0;
20460 /* Don't do this if thumb_expand_prologue wants to emit instructions
20461 between the push and the stack frame allocation. */
20462 if (for_prologue
20463 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20464 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20465 return 0;
20467 reg_base = 0;
20468 n_free = 0;
20469 if (!for_prologue)
20471 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20472 live_regs_mask >>= reg_base;
20475 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20476 && (for_prologue || call_used_regs[reg_base + n_free]))
20478 live_regs_mask >>= 1;
20479 n_free++;
20482 if (n_free == 0)
20483 return 0;
20484 gcc_assert (amount / 4 * 4 == amount);
20486 if (amount >= 512 && (amount - n_free * 4) < 512)
20487 return (amount - 508) / 4;
20488 if (amount <= n_free * 4)
20489 return amount / 4;
20490 return 0;
20493 /* The bits which aren't usefully expanded as rtl. */
20494 const char *
20495 thumb_unexpanded_epilogue (void)
20497 arm_stack_offsets *offsets;
20498 int regno;
20499 unsigned long live_regs_mask = 0;
20500 int high_regs_pushed = 0;
20501 int extra_pop;
20502 int had_to_push_lr;
20503 int size;
20505 if (cfun->machine->return_used_this_function != 0)
20506 return "";
20508 if (IS_NAKED (arm_current_func_type ()))
20509 return "";
20511 offsets = arm_get_frame_offsets ();
20512 live_regs_mask = offsets->saved_regs_mask;
20513 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20515 /* If we can deduce the registers used from the function's return value.
20516 This is more reliable that examining df_regs_ever_live_p () because that
20517 will be set if the register is ever used in the function, not just if
20518 the register is used to hold a return value. */
20519 size = arm_size_return_regs ();
20521 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20522 if (extra_pop > 0)
20524 unsigned long extra_mask = (1 << extra_pop) - 1;
20525 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20528 /* The prolog may have pushed some high registers to use as
20529 work registers. e.g. the testsuite file:
20530 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20531 compiles to produce:
20532 push {r4, r5, r6, r7, lr}
20533 mov r7, r9
20534 mov r6, r8
20535 push {r6, r7}
20536 as part of the prolog. We have to undo that pushing here. */
20538 if (high_regs_pushed)
20540 unsigned long mask = live_regs_mask & 0xff;
20541 int next_hi_reg;
20543 /* The available low registers depend on the size of the value we are
20544 returning. */
20545 if (size <= 12)
20546 mask |= 1 << 3;
20547 if (size <= 8)
20548 mask |= 1 << 2;
20550 if (mask == 0)
20551 /* Oh dear! We have no low registers into which we can pop
20552 high registers! */
20553 internal_error
20554 ("no low registers available for popping high registers");
20556 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20557 if (live_regs_mask & (1 << next_hi_reg))
20558 break;
20560 while (high_regs_pushed)
20562 /* Find lo register(s) into which the high register(s) can
20563 be popped. */
20564 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20566 if (mask & (1 << regno))
20567 high_regs_pushed--;
20568 if (high_regs_pushed == 0)
20569 break;
20572 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20574 /* Pop the values into the low register(s). */
20575 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20577 /* Move the value(s) into the high registers. */
20578 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20580 if (mask & (1 << regno))
20582 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20583 regno);
20585 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20586 if (live_regs_mask & (1 << next_hi_reg))
20587 break;
20591 live_regs_mask &= ~0x0f00;
20594 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20595 live_regs_mask &= 0xff;
20597 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20599 /* Pop the return address into the PC. */
20600 if (had_to_push_lr)
20601 live_regs_mask |= 1 << PC_REGNUM;
20603 /* Either no argument registers were pushed or a backtrace
20604 structure was created which includes an adjusted stack
20605 pointer, so just pop everything. */
20606 if (live_regs_mask)
20607 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20608 live_regs_mask);
20610 /* We have either just popped the return address into the
20611 PC or it is was kept in LR for the entire function.
20612 Note that thumb_pushpop has already called thumb_exit if the
20613 PC was in the list. */
20614 if (!had_to_push_lr)
20615 thumb_exit (asm_out_file, LR_REGNUM);
20617 else
20619 /* Pop everything but the return address. */
20620 if (live_regs_mask)
20621 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20622 live_regs_mask);
20624 if (had_to_push_lr)
20626 if (size > 12)
20628 /* We have no free low regs, so save one. */
20629 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20630 LAST_ARG_REGNUM);
20633 /* Get the return address into a temporary register. */
20634 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20635 1 << LAST_ARG_REGNUM);
20637 if (size > 12)
20639 /* Move the return address to lr. */
20640 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20641 LAST_ARG_REGNUM);
20642 /* Restore the low register. */
20643 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20644 IP_REGNUM);
20645 regno = LR_REGNUM;
20647 else
20648 regno = LAST_ARG_REGNUM;
20650 else
20651 regno = LR_REGNUM;
20653 /* Remove the argument registers that were pushed onto the stack. */
20654 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20655 SP_REGNUM, SP_REGNUM,
20656 crtl->args.pretend_args_size);
20658 thumb_exit (asm_out_file, regno);
20661 return "";
20664 /* Functions to save and restore machine-specific function data. */
20665 static struct machine_function *
20666 arm_init_machine_status (void)
20668 struct machine_function *machine;
20669 machine = ggc_alloc_cleared_machine_function ();
20671 #if ARM_FT_UNKNOWN != 0
20672 machine->func_type = ARM_FT_UNKNOWN;
20673 #endif
20674 return machine;
20677 /* Return an RTX indicating where the return address to the
20678 calling function can be found. */
20680 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20682 if (count != 0)
20683 return NULL_RTX;
20685 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20688 /* Do anything needed before RTL is emitted for each function. */
20689 void
20690 arm_init_expanders (void)
20692 /* Arrange to initialize and mark the machine per-function status. */
20693 init_machine_status = arm_init_machine_status;
20695 /* This is to stop the combine pass optimizing away the alignment
20696 adjustment of va_arg. */
20697 /* ??? It is claimed that this should not be necessary. */
20698 if (cfun)
20699 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20703 /* Like arm_compute_initial_elimination offset. Simpler because there
20704 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20705 to point at the base of the local variables after static stack
20706 space for a function has been allocated. */
20708 HOST_WIDE_INT
20709 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20711 arm_stack_offsets *offsets;
20713 offsets = arm_get_frame_offsets ();
20715 switch (from)
20717 case ARG_POINTER_REGNUM:
20718 switch (to)
20720 case STACK_POINTER_REGNUM:
20721 return offsets->outgoing_args - offsets->saved_args;
20723 case FRAME_POINTER_REGNUM:
20724 return offsets->soft_frame - offsets->saved_args;
20726 case ARM_HARD_FRAME_POINTER_REGNUM:
20727 return offsets->saved_regs - offsets->saved_args;
20729 case THUMB_HARD_FRAME_POINTER_REGNUM:
20730 return offsets->locals_base - offsets->saved_args;
20732 default:
20733 gcc_unreachable ();
20735 break;
20737 case FRAME_POINTER_REGNUM:
20738 switch (to)
20740 case STACK_POINTER_REGNUM:
20741 return offsets->outgoing_args - offsets->soft_frame;
20743 case ARM_HARD_FRAME_POINTER_REGNUM:
20744 return offsets->saved_regs - offsets->soft_frame;
20746 case THUMB_HARD_FRAME_POINTER_REGNUM:
20747 return offsets->locals_base - offsets->soft_frame;
20749 default:
20750 gcc_unreachable ();
20752 break;
20754 default:
20755 gcc_unreachable ();
20759 /* Generate the rest of a function's prologue. */
20760 void
20761 thumb1_expand_prologue (void)
20763 rtx insn, dwarf;
20765 HOST_WIDE_INT amount;
20766 arm_stack_offsets *offsets;
20767 unsigned long func_type;
20768 int regno;
20769 unsigned long live_regs_mask;
20771 func_type = arm_current_func_type ();
20773 /* Naked functions don't have prologues. */
20774 if (IS_NAKED (func_type))
20775 return;
20777 if (IS_INTERRUPT (func_type))
20779 error ("interrupt Service Routines cannot be coded in Thumb mode");
20780 return;
20783 offsets = arm_get_frame_offsets ();
20784 live_regs_mask = offsets->saved_regs_mask;
20785 /* Load the pic register before setting the frame pointer,
20786 so we can use r7 as a temporary work register. */
20787 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20788 arm_load_pic_register (live_regs_mask);
20790 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20791 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20792 stack_pointer_rtx);
20794 if (flag_stack_usage)
20795 current_function_static_stack_size
20796 = offsets->outgoing_args - offsets->saved_args;
20798 amount = offsets->outgoing_args - offsets->saved_regs;
20799 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20800 if (amount)
20802 if (amount < 512)
20804 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20805 GEN_INT (- amount)));
20806 RTX_FRAME_RELATED_P (insn) = 1;
20808 else
20810 rtx reg;
20812 /* The stack decrement is too big for an immediate value in a single
20813 insn. In theory we could issue multiple subtracts, but after
20814 three of them it becomes more space efficient to place the full
20815 value in the constant pool and load into a register. (Also the
20816 ARM debugger really likes to see only one stack decrement per
20817 function). So instead we look for a scratch register into which
20818 we can load the decrement, and then we subtract this from the
20819 stack pointer. Unfortunately on the thumb the only available
20820 scratch registers are the argument registers, and we cannot use
20821 these as they may hold arguments to the function. Instead we
20822 attempt to locate a call preserved register which is used by this
20823 function. If we can find one, then we know that it will have
20824 been pushed at the start of the prologue and so we can corrupt
20825 it now. */
20826 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20827 if (live_regs_mask & (1 << regno))
20828 break;
20830 gcc_assert(regno <= LAST_LO_REGNUM);
20832 reg = gen_rtx_REG (SImode, regno);
20834 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20836 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20837 stack_pointer_rtx, reg));
20838 RTX_FRAME_RELATED_P (insn) = 1;
20839 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20840 plus_constant (stack_pointer_rtx,
20841 -amount));
20842 RTX_FRAME_RELATED_P (dwarf) = 1;
20843 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20847 if (frame_pointer_needed)
20848 thumb_set_frame_pointer (offsets);
20850 /* If we are profiling, make sure no instructions are scheduled before
20851 the call to mcount. Similarly if the user has requested no
20852 scheduling in the prolog. Similarly if we want non-call exceptions
20853 using the EABI unwinder, to prevent faulting instructions from being
20854 swapped with a stack adjustment. */
20855 if (crtl->profile || !TARGET_SCHED_PROLOG
20856 || (arm_except_unwind_info (&global_options) == UI_TARGET
20857 && cfun->can_throw_non_call_exceptions))
20858 emit_insn (gen_blockage ());
20860 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20861 if (live_regs_mask & 0xff)
20862 cfun->machine->lr_save_eliminated = 0;
20866 void
20867 thumb1_expand_epilogue (void)
20869 HOST_WIDE_INT amount;
20870 arm_stack_offsets *offsets;
20871 int regno;
20873 /* Naked functions don't have prologues. */
20874 if (IS_NAKED (arm_current_func_type ()))
20875 return;
20877 offsets = arm_get_frame_offsets ();
20878 amount = offsets->outgoing_args - offsets->saved_regs;
20880 if (frame_pointer_needed)
20882 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20883 amount = offsets->locals_base - offsets->saved_regs;
20885 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20887 gcc_assert (amount >= 0);
20888 if (amount)
20890 if (amount < 512)
20891 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20892 GEN_INT (amount)));
20893 else
20895 /* r3 is always free in the epilogue. */
20896 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20898 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20899 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20903 /* Emit a USE (stack_pointer_rtx), so that
20904 the stack adjustment will not be deleted. */
20905 emit_insn (gen_prologue_use (stack_pointer_rtx));
20907 if (crtl->profile || !TARGET_SCHED_PROLOG)
20908 emit_insn (gen_blockage ());
20910 /* Emit a clobber for each insn that will be restored in the epilogue,
20911 so that flow2 will get register lifetimes correct. */
20912 for (regno = 0; regno < 13; regno++)
20913 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20914 emit_clobber (gen_rtx_REG (SImode, regno));
20916 if (! df_regs_ever_live_p (LR_REGNUM))
20917 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20920 static void
20921 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20923 arm_stack_offsets *offsets;
20924 unsigned long live_regs_mask = 0;
20925 unsigned long l_mask;
20926 unsigned high_regs_pushed = 0;
20927 int cfa_offset = 0;
20928 int regno;
20930 if (IS_NAKED (arm_current_func_type ()))
20931 return;
20933 if (is_called_in_ARM_mode (current_function_decl))
20935 const char * name;
20937 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20938 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20939 == SYMBOL_REF);
20940 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20942 /* Generate code sequence to switch us into Thumb mode. */
20943 /* The .code 32 directive has already been emitted by
20944 ASM_DECLARE_FUNCTION_NAME. */
20945 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20946 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20948 /* Generate a label, so that the debugger will notice the
20949 change in instruction sets. This label is also used by
20950 the assembler to bypass the ARM code when this function
20951 is called from a Thumb encoded function elsewhere in the
20952 same file. Hence the definition of STUB_NAME here must
20953 agree with the definition in gas/config/tc-arm.c. */
20955 #define STUB_NAME ".real_start_of"
20957 fprintf (f, "\t.code\t16\n");
20958 #ifdef ARM_PE
20959 if (arm_dllexport_name_p (name))
20960 name = arm_strip_name_encoding (name);
20961 #endif
20962 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20963 fprintf (f, "\t.thumb_func\n");
20964 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20967 if (crtl->args.pretend_args_size)
20969 /* Output unwind directive for the stack adjustment. */
20970 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20971 fprintf (f, "\t.pad #%d\n",
20972 crtl->args.pretend_args_size);
20974 if (cfun->machine->uses_anonymous_args)
20976 int num_pushes;
20978 fprintf (f, "\tpush\t{");
20980 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20982 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20983 regno <= LAST_ARG_REGNUM;
20984 regno++)
20985 asm_fprintf (f, "%r%s", regno,
20986 regno == LAST_ARG_REGNUM ? "" : ", ");
20988 fprintf (f, "}\n");
20990 else
20991 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20992 SP_REGNUM, SP_REGNUM,
20993 crtl->args.pretend_args_size);
20995 /* We don't need to record the stores for unwinding (would it
20996 help the debugger any if we did?), but record the change in
20997 the stack pointer. */
20998 if (dwarf2out_do_frame ())
21000 char *l = dwarf2out_cfi_label (false);
21002 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
21003 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21007 /* Get the registers we are going to push. */
21008 offsets = arm_get_frame_offsets ();
21009 live_regs_mask = offsets->saved_regs_mask;
21010 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21011 l_mask = live_regs_mask & 0x40ff;
21012 /* Then count how many other high registers will need to be pushed. */
21013 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21015 if (TARGET_BACKTRACE)
21017 unsigned offset;
21018 unsigned work_register;
21020 /* We have been asked to create a stack backtrace structure.
21021 The code looks like this:
21023 0 .align 2
21024 0 func:
21025 0 sub SP, #16 Reserve space for 4 registers.
21026 2 push {R7} Push low registers.
21027 4 add R7, SP, #20 Get the stack pointer before the push.
21028 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21029 8 mov R7, PC Get hold of the start of this code plus 12.
21030 10 str R7, [SP, #16] Store it.
21031 12 mov R7, FP Get hold of the current frame pointer.
21032 14 str R7, [SP, #4] Store it.
21033 16 mov R7, LR Get hold of the current return address.
21034 18 str R7, [SP, #12] Store it.
21035 20 add R7, SP, #16 Point at the start of the backtrace structure.
21036 22 mov FP, R7 Put this value into the frame pointer. */
21038 work_register = thumb_find_work_register (live_regs_mask);
21040 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21041 asm_fprintf (f, "\t.pad #16\n");
21043 asm_fprintf
21044 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21045 SP_REGNUM, SP_REGNUM);
21047 if (dwarf2out_do_frame ())
21049 char *l = dwarf2out_cfi_label (false);
21051 cfa_offset = cfa_offset + 16;
21052 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21055 if (l_mask)
21057 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
21058 offset = bit_count (l_mask) * UNITS_PER_WORD;
21060 else
21061 offset = 0;
21063 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21064 offset + 16 + crtl->args.pretend_args_size);
21066 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21067 offset + 4);
21069 /* Make sure that the instruction fetching the PC is in the right place
21070 to calculate "start of backtrace creation code + 12". */
21071 if (l_mask)
21073 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21074 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21075 offset + 12);
21076 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21077 ARM_HARD_FRAME_POINTER_REGNUM);
21078 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21079 offset);
21081 else
21083 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21084 ARM_HARD_FRAME_POINTER_REGNUM);
21085 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21086 offset);
21087 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21088 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21089 offset + 12);
21092 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21093 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21094 offset + 8);
21095 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21096 offset + 12);
21097 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21098 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21100 /* Optimization: If we are not pushing any low registers but we are going
21101 to push some high registers then delay our first push. This will just
21102 be a push of LR and we can combine it with the push of the first high
21103 register. */
21104 else if ((l_mask & 0xff) != 0
21105 || (high_regs_pushed == 0 && l_mask))
21107 unsigned long mask = l_mask;
21108 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21109 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21112 if (high_regs_pushed)
21114 unsigned pushable_regs;
21115 unsigned next_hi_reg;
21117 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21118 if (live_regs_mask & (1 << next_hi_reg))
21119 break;
21121 pushable_regs = l_mask & 0xff;
21123 if (pushable_regs == 0)
21124 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21126 while (high_regs_pushed > 0)
21128 unsigned long real_regs_mask = 0;
21130 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21132 if (pushable_regs & (1 << regno))
21134 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21136 high_regs_pushed --;
21137 real_regs_mask |= (1 << next_hi_reg);
21139 if (high_regs_pushed)
21141 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21142 next_hi_reg --)
21143 if (live_regs_mask & (1 << next_hi_reg))
21144 break;
21146 else
21148 pushable_regs &= ~((1 << regno) - 1);
21149 break;
21154 /* If we had to find a work register and we have not yet
21155 saved the LR then add it to the list of regs to push. */
21156 if (l_mask == (1 << LR_REGNUM))
21158 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21159 1, &cfa_offset,
21160 real_regs_mask | (1 << LR_REGNUM));
21161 l_mask = 0;
21163 else
21164 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21169 /* Handle the case of a double word load into a low register from
21170 a computed memory address. The computed address may involve a
21171 register which is overwritten by the load. */
21172 const char *
21173 thumb_load_double_from_address (rtx *operands)
21175 rtx addr;
21176 rtx base;
21177 rtx offset;
21178 rtx arg1;
21179 rtx arg2;
21181 gcc_assert (GET_CODE (operands[0]) == REG);
21182 gcc_assert (GET_CODE (operands[1]) == MEM);
21184 /* Get the memory address. */
21185 addr = XEXP (operands[1], 0);
21187 /* Work out how the memory address is computed. */
21188 switch (GET_CODE (addr))
21190 case REG:
21191 operands[2] = adjust_address (operands[1], SImode, 4);
21193 if (REGNO (operands[0]) == REGNO (addr))
21195 output_asm_insn ("ldr\t%H0, %2", operands);
21196 output_asm_insn ("ldr\t%0, %1", operands);
21198 else
21200 output_asm_insn ("ldr\t%0, %1", operands);
21201 output_asm_insn ("ldr\t%H0, %2", operands);
21203 break;
21205 case CONST:
21206 /* Compute <address> + 4 for the high order load. */
21207 operands[2] = adjust_address (operands[1], SImode, 4);
21209 output_asm_insn ("ldr\t%0, %1", operands);
21210 output_asm_insn ("ldr\t%H0, %2", operands);
21211 break;
21213 case PLUS:
21214 arg1 = XEXP (addr, 0);
21215 arg2 = XEXP (addr, 1);
21217 if (CONSTANT_P (arg1))
21218 base = arg2, offset = arg1;
21219 else
21220 base = arg1, offset = arg2;
21222 gcc_assert (GET_CODE (base) == REG);
21224 /* Catch the case of <address> = <reg> + <reg> */
21225 if (GET_CODE (offset) == REG)
21227 int reg_offset = REGNO (offset);
21228 int reg_base = REGNO (base);
21229 int reg_dest = REGNO (operands[0]);
21231 /* Add the base and offset registers together into the
21232 higher destination register. */
21233 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21234 reg_dest + 1, reg_base, reg_offset);
21236 /* Load the lower destination register from the address in
21237 the higher destination register. */
21238 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21239 reg_dest, reg_dest + 1);
21241 /* Load the higher destination register from its own address
21242 plus 4. */
21243 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21244 reg_dest + 1, reg_dest + 1);
21246 else
21248 /* Compute <address> + 4 for the high order load. */
21249 operands[2] = adjust_address (operands[1], SImode, 4);
21251 /* If the computed address is held in the low order register
21252 then load the high order register first, otherwise always
21253 load the low order register first. */
21254 if (REGNO (operands[0]) == REGNO (base))
21256 output_asm_insn ("ldr\t%H0, %2", operands);
21257 output_asm_insn ("ldr\t%0, %1", operands);
21259 else
21261 output_asm_insn ("ldr\t%0, %1", operands);
21262 output_asm_insn ("ldr\t%H0, %2", operands);
21265 break;
21267 case LABEL_REF:
21268 /* With no registers to worry about we can just load the value
21269 directly. */
21270 operands[2] = adjust_address (operands[1], SImode, 4);
21272 output_asm_insn ("ldr\t%H0, %2", operands);
21273 output_asm_insn ("ldr\t%0, %1", operands);
21274 break;
21276 default:
21277 gcc_unreachable ();
21280 return "";
21283 const char *
21284 thumb_output_move_mem_multiple (int n, rtx *operands)
21286 rtx tmp;
21288 switch (n)
21290 case 2:
21291 if (REGNO (operands[4]) > REGNO (operands[5]))
21293 tmp = operands[4];
21294 operands[4] = operands[5];
21295 operands[5] = tmp;
21297 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21298 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21299 break;
21301 case 3:
21302 if (REGNO (operands[4]) > REGNO (operands[5]))
21304 tmp = operands[4];
21305 operands[4] = operands[5];
21306 operands[5] = tmp;
21308 if (REGNO (operands[5]) > REGNO (operands[6]))
21310 tmp = operands[5];
21311 operands[5] = operands[6];
21312 operands[6] = tmp;
21314 if (REGNO (operands[4]) > REGNO (operands[5]))
21316 tmp = operands[4];
21317 operands[4] = operands[5];
21318 operands[5] = tmp;
21321 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21322 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21323 break;
21325 default:
21326 gcc_unreachable ();
21329 return "";
21332 /* Output a call-via instruction for thumb state. */
21333 const char *
21334 thumb_call_via_reg (rtx reg)
21336 int regno = REGNO (reg);
21337 rtx *labelp;
21339 gcc_assert (regno < LR_REGNUM);
21341 /* If we are in the normal text section we can use a single instance
21342 per compilation unit. If we are doing function sections, then we need
21343 an entry per section, since we can't rely on reachability. */
21344 if (in_section == text_section)
21346 thumb_call_reg_needed = 1;
21348 if (thumb_call_via_label[regno] == NULL)
21349 thumb_call_via_label[regno] = gen_label_rtx ();
21350 labelp = thumb_call_via_label + regno;
21352 else
21354 if (cfun->machine->call_via[regno] == NULL)
21355 cfun->machine->call_via[regno] = gen_label_rtx ();
21356 labelp = cfun->machine->call_via + regno;
21359 output_asm_insn ("bl\t%a0", labelp);
21360 return "";
21363 /* Routines for generating rtl. */
21364 void
21365 thumb_expand_movmemqi (rtx *operands)
21367 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21368 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21369 HOST_WIDE_INT len = INTVAL (operands[2]);
21370 HOST_WIDE_INT offset = 0;
21372 while (len >= 12)
21374 emit_insn (gen_movmem12b (out, in, out, in));
21375 len -= 12;
21378 if (len >= 8)
21380 emit_insn (gen_movmem8b (out, in, out, in));
21381 len -= 8;
21384 if (len >= 4)
21386 rtx reg = gen_reg_rtx (SImode);
21387 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21388 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21389 len -= 4;
21390 offset += 4;
21393 if (len >= 2)
21395 rtx reg = gen_reg_rtx (HImode);
21396 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21397 plus_constant (in, offset))));
21398 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21399 reg));
21400 len -= 2;
21401 offset += 2;
21404 if (len)
21406 rtx reg = gen_reg_rtx (QImode);
21407 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21408 plus_constant (in, offset))));
21409 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21410 reg));
21414 void
21415 thumb_reload_out_hi (rtx *operands)
21417 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21420 /* Handle reading a half-word from memory during reload. */
21421 void
21422 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21424 gcc_unreachable ();
21427 /* Return the length of a function name prefix
21428 that starts with the character 'c'. */
21429 static int
21430 arm_get_strip_length (int c)
21432 switch (c)
21434 ARM_NAME_ENCODING_LENGTHS
21435 default: return 0;
21439 /* Return a pointer to a function's name with any
21440 and all prefix encodings stripped from it. */
21441 const char *
21442 arm_strip_name_encoding (const char *name)
21444 int skip;
21446 while ((skip = arm_get_strip_length (* name)))
21447 name += skip;
21449 return name;
21452 /* If there is a '*' anywhere in the name's prefix, then
21453 emit the stripped name verbatim, otherwise prepend an
21454 underscore if leading underscores are being used. */
21455 void
21456 arm_asm_output_labelref (FILE *stream, const char *name)
21458 int skip;
21459 int verbatim = 0;
21461 while ((skip = arm_get_strip_length (* name)))
21463 verbatim |= (*name == '*');
21464 name += skip;
21467 if (verbatim)
21468 fputs (name, stream);
21469 else
21470 asm_fprintf (stream, "%U%s", name);
21473 static void
21474 arm_file_start (void)
21476 int val;
21478 if (TARGET_UNIFIED_ASM)
21479 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21481 if (TARGET_BPABI)
21483 const char *fpu_name;
21484 if (arm_selected_arch)
21485 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21486 else
21487 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21489 if (TARGET_SOFT_FLOAT)
21491 if (TARGET_VFP)
21492 fpu_name = "softvfp";
21493 else
21494 fpu_name = "softfpa";
21496 else
21498 fpu_name = arm_fpu_desc->name;
21499 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21501 if (TARGET_HARD_FLOAT)
21502 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21503 if (TARGET_HARD_FLOAT_ABI)
21504 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21507 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21509 /* Some of these attributes only apply when the corresponding features
21510 are used. However we don't have any easy way of figuring this out.
21511 Conservatively record the setting that would have been used. */
21513 /* Tag_ABI_FP_rounding. */
21514 if (flag_rounding_math)
21515 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21516 if (!flag_unsafe_math_optimizations)
21518 /* Tag_ABI_FP_denomal. */
21519 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21520 /* Tag_ABI_FP_exceptions. */
21521 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21523 /* Tag_ABI_FP_user_exceptions. */
21524 if (flag_signaling_nans)
21525 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21526 /* Tag_ABI_FP_number_model. */
21527 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21528 flag_finite_math_only ? 1 : 3);
21530 /* Tag_ABI_align8_needed. */
21531 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21532 /* Tag_ABI_align8_preserved. */
21533 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21534 /* Tag_ABI_enum_size. */
21535 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21536 flag_short_enums ? 1 : 2);
21538 /* Tag_ABI_optimization_goals. */
21539 if (optimize_size)
21540 val = 4;
21541 else if (optimize >= 2)
21542 val = 2;
21543 else if (optimize)
21544 val = 1;
21545 else
21546 val = 6;
21547 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21549 /* Tag_ABI_FP_16bit_format. */
21550 if (arm_fp16_format)
21551 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21552 (int)arm_fp16_format);
21554 if (arm_lang_output_object_attributes_hook)
21555 arm_lang_output_object_attributes_hook();
21557 default_file_start();
21560 static void
21561 arm_file_end (void)
21563 int regno;
21565 if (NEED_INDICATE_EXEC_STACK)
21566 /* Add .note.GNU-stack. */
21567 file_end_indicate_exec_stack ();
21569 if (! thumb_call_reg_needed)
21570 return;
21572 switch_to_section (text_section);
21573 asm_fprintf (asm_out_file, "\t.code 16\n");
21574 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21576 for (regno = 0; regno < LR_REGNUM; regno++)
21578 rtx label = thumb_call_via_label[regno];
21580 if (label != 0)
21582 targetm.asm_out.internal_label (asm_out_file, "L",
21583 CODE_LABEL_NUMBER (label));
21584 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21589 #ifndef ARM_PE
21590 /* Symbols in the text segment can be accessed without indirecting via the
21591 constant pool; it may take an extra binary operation, but this is still
21592 faster than indirecting via memory. Don't do this when not optimizing,
21593 since we won't be calculating al of the offsets necessary to do this
21594 simplification. */
21596 static void
21597 arm_encode_section_info (tree decl, rtx rtl, int first)
21599 if (optimize > 0 && TREE_CONSTANT (decl))
21600 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21602 default_encode_section_info (decl, rtl, first);
21604 #endif /* !ARM_PE */
21606 static void
21607 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21609 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21610 && !strcmp (prefix, "L"))
21612 arm_ccfsm_state = 0;
21613 arm_target_insn = NULL;
21615 default_internal_label (stream, prefix, labelno);
21618 /* Output code to add DELTA to the first argument, and then jump
21619 to FUNCTION. Used for C++ multiple inheritance. */
21620 static void
21621 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21622 HOST_WIDE_INT delta,
21623 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21624 tree function)
21626 static int thunk_label = 0;
21627 char label[256];
21628 char labelpc[256];
21629 int mi_delta = delta;
21630 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21631 int shift = 0;
21632 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21633 ? 1 : 0);
21634 if (mi_delta < 0)
21635 mi_delta = - mi_delta;
21637 if (TARGET_THUMB1)
21639 int labelno = thunk_label++;
21640 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21641 /* Thunks are entered in arm mode when avaiable. */
21642 if (TARGET_THUMB1_ONLY)
21644 /* push r3 so we can use it as a temporary. */
21645 /* TODO: Omit this save if r3 is not used. */
21646 fputs ("\tpush {r3}\n", file);
21647 fputs ("\tldr\tr3, ", file);
21649 else
21651 fputs ("\tldr\tr12, ", file);
21653 assemble_name (file, label);
21654 fputc ('\n', file);
21655 if (flag_pic)
21657 /* If we are generating PIC, the ldr instruction below loads
21658 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21659 the address of the add + 8, so we have:
21661 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21662 = target + 1.
21664 Note that we have "+ 1" because some versions of GNU ld
21665 don't set the low bit of the result for R_ARM_REL32
21666 relocations against thumb function symbols.
21667 On ARMv6M this is +4, not +8. */
21668 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21669 assemble_name (file, labelpc);
21670 fputs (":\n", file);
21671 if (TARGET_THUMB1_ONLY)
21673 /* This is 2 insns after the start of the thunk, so we know it
21674 is 4-byte aligned. */
21675 fputs ("\tadd\tr3, pc, r3\n", file);
21676 fputs ("\tmov r12, r3\n", file);
21678 else
21679 fputs ("\tadd\tr12, pc, r12\n", file);
21681 else if (TARGET_THUMB1_ONLY)
21682 fputs ("\tmov r12, r3\n", file);
21684 if (TARGET_THUMB1_ONLY)
21686 if (mi_delta > 255)
21688 fputs ("\tldr\tr3, ", file);
21689 assemble_name (file, label);
21690 fputs ("+4\n", file);
21691 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21692 mi_op, this_regno, this_regno);
21694 else if (mi_delta != 0)
21696 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21697 mi_op, this_regno, this_regno,
21698 mi_delta);
21701 else
21703 /* TODO: Use movw/movt for large constants when available. */
21704 while (mi_delta != 0)
21706 if ((mi_delta & (3 << shift)) == 0)
21707 shift += 2;
21708 else
21710 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21711 mi_op, this_regno, this_regno,
21712 mi_delta & (0xff << shift));
21713 mi_delta &= ~(0xff << shift);
21714 shift += 8;
21718 if (TARGET_THUMB1)
21720 if (TARGET_THUMB1_ONLY)
21721 fputs ("\tpop\t{r3}\n", file);
21723 fprintf (file, "\tbx\tr12\n");
21724 ASM_OUTPUT_ALIGN (file, 2);
21725 assemble_name (file, label);
21726 fputs (":\n", file);
21727 if (flag_pic)
21729 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21730 rtx tem = XEXP (DECL_RTL (function), 0);
21731 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21732 tem = gen_rtx_MINUS (GET_MODE (tem),
21733 tem,
21734 gen_rtx_SYMBOL_REF (Pmode,
21735 ggc_strdup (labelpc)));
21736 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21738 else
21739 /* Output ".word .LTHUNKn". */
21740 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21742 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21743 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21745 else
21747 fputs ("\tb\t", file);
21748 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21749 if (NEED_PLT_RELOC)
21750 fputs ("(PLT)", file);
21751 fputc ('\n', file);
21756 arm_emit_vector_const (FILE *file, rtx x)
21758 int i;
21759 const char * pattern;
21761 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21763 switch (GET_MODE (x))
21765 case V2SImode: pattern = "%08x"; break;
21766 case V4HImode: pattern = "%04x"; break;
21767 case V8QImode: pattern = "%02x"; break;
21768 default: gcc_unreachable ();
21771 fprintf (file, "0x");
21772 for (i = CONST_VECTOR_NUNITS (x); i--;)
21774 rtx element;
21776 element = CONST_VECTOR_ELT (x, i);
21777 fprintf (file, pattern, INTVAL (element));
21780 return 1;
21783 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21784 HFmode constant pool entries are actually loaded with ldr. */
21785 void
21786 arm_emit_fp16_const (rtx c)
21788 REAL_VALUE_TYPE r;
21789 long bits;
21791 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21792 bits = real_to_target (NULL, &r, HFmode);
21793 if (WORDS_BIG_ENDIAN)
21794 assemble_zeros (2);
21795 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21796 if (!WORDS_BIG_ENDIAN)
21797 assemble_zeros (2);
21800 const char *
21801 arm_output_load_gr (rtx *operands)
21803 rtx reg;
21804 rtx offset;
21805 rtx wcgr;
21806 rtx sum;
21808 if (GET_CODE (operands [1]) != MEM
21809 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21810 || GET_CODE (reg = XEXP (sum, 0)) != REG
21811 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21812 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21813 return "wldrw%?\t%0, %1";
21815 /* Fix up an out-of-range load of a GR register. */
21816 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21817 wcgr = operands[0];
21818 operands[0] = reg;
21819 output_asm_insn ("ldr%?\t%0, %1", operands);
21821 operands[0] = wcgr;
21822 operands[1] = reg;
21823 output_asm_insn ("tmcr%?\t%0, %1", operands);
21824 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21826 return "";
21829 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21831 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21832 named arg and all anonymous args onto the stack.
21833 XXX I know the prologue shouldn't be pushing registers, but it is faster
21834 that way. */
21836 static void
21837 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21838 enum machine_mode mode,
21839 tree type,
21840 int *pretend_size,
21841 int second_time ATTRIBUTE_UNUSED)
21843 int nregs;
21845 cfun->machine->uses_anonymous_args = 1;
21846 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21848 nregs = pcum->aapcs_ncrn;
21849 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21850 nregs++;
21852 else
21853 nregs = pcum->nregs;
21855 if (nregs < NUM_ARG_REGS)
21856 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21859 /* Return nonzero if the CONSUMER instruction (a store) does not need
21860 PRODUCER's value to calculate the address. */
21863 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21865 rtx value = PATTERN (producer);
21866 rtx addr = PATTERN (consumer);
21868 if (GET_CODE (value) == COND_EXEC)
21869 value = COND_EXEC_CODE (value);
21870 if (GET_CODE (value) == PARALLEL)
21871 value = XVECEXP (value, 0, 0);
21872 value = XEXP (value, 0);
21873 if (GET_CODE (addr) == COND_EXEC)
21874 addr = COND_EXEC_CODE (addr);
21875 if (GET_CODE (addr) == PARALLEL)
21876 addr = XVECEXP (addr, 0, 0);
21877 addr = XEXP (addr, 0);
21879 return !reg_overlap_mentioned_p (value, addr);
21882 /* Return nonzero if the CONSUMER instruction (a store) does need
21883 PRODUCER's value to calculate the address. */
21886 arm_early_store_addr_dep (rtx producer, rtx consumer)
21888 return !arm_no_early_store_addr_dep (producer, consumer);
21891 /* Return nonzero if the CONSUMER instruction (a load) does need
21892 PRODUCER's value to calculate the address. */
21895 arm_early_load_addr_dep (rtx producer, rtx consumer)
21897 rtx value = PATTERN (producer);
21898 rtx addr = PATTERN (consumer);
21900 if (GET_CODE (value) == COND_EXEC)
21901 value = COND_EXEC_CODE (value);
21902 if (GET_CODE (value) == PARALLEL)
21903 value = XVECEXP (value, 0, 0);
21904 value = XEXP (value, 0);
21905 if (GET_CODE (addr) == COND_EXEC)
21906 addr = COND_EXEC_CODE (addr);
21907 if (GET_CODE (addr) == PARALLEL)
21908 addr = XVECEXP (addr, 0, 0);
21909 addr = XEXP (addr, 1);
21911 return reg_overlap_mentioned_p (value, addr);
21914 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21915 have an early register shift value or amount dependency on the
21916 result of PRODUCER. */
21919 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21921 rtx value = PATTERN (producer);
21922 rtx op = PATTERN (consumer);
21923 rtx early_op;
21925 if (GET_CODE (value) == COND_EXEC)
21926 value = COND_EXEC_CODE (value);
21927 if (GET_CODE (value) == PARALLEL)
21928 value = XVECEXP (value, 0, 0);
21929 value = XEXP (value, 0);
21930 if (GET_CODE (op) == COND_EXEC)
21931 op = COND_EXEC_CODE (op);
21932 if (GET_CODE (op) == PARALLEL)
21933 op = XVECEXP (op, 0, 0);
21934 op = XEXP (op, 1);
21936 early_op = XEXP (op, 0);
21937 /* This is either an actual independent shift, or a shift applied to
21938 the first operand of another operation. We want the whole shift
21939 operation. */
21940 if (GET_CODE (early_op) == REG)
21941 early_op = op;
21943 return !reg_overlap_mentioned_p (value, early_op);
21946 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21947 have an early register shift value dependency on the result of
21948 PRODUCER. */
21951 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21953 rtx value = PATTERN (producer);
21954 rtx op = PATTERN (consumer);
21955 rtx early_op;
21957 if (GET_CODE (value) == COND_EXEC)
21958 value = COND_EXEC_CODE (value);
21959 if (GET_CODE (value) == PARALLEL)
21960 value = XVECEXP (value, 0, 0);
21961 value = XEXP (value, 0);
21962 if (GET_CODE (op) == COND_EXEC)
21963 op = COND_EXEC_CODE (op);
21964 if (GET_CODE (op) == PARALLEL)
21965 op = XVECEXP (op, 0, 0);
21966 op = XEXP (op, 1);
21968 early_op = XEXP (op, 0);
21970 /* This is either an actual independent shift, or a shift applied to
21971 the first operand of another operation. We want the value being
21972 shifted, in either case. */
21973 if (GET_CODE (early_op) != REG)
21974 early_op = XEXP (early_op, 0);
21976 return !reg_overlap_mentioned_p (value, early_op);
21979 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21980 have an early register mult dependency on the result of
21981 PRODUCER. */
21984 arm_no_early_mul_dep (rtx producer, rtx consumer)
21986 rtx value = PATTERN (producer);
21987 rtx op = PATTERN (consumer);
21989 if (GET_CODE (value) == COND_EXEC)
21990 value = COND_EXEC_CODE (value);
21991 if (GET_CODE (value) == PARALLEL)
21992 value = XVECEXP (value, 0, 0);
21993 value = XEXP (value, 0);
21994 if (GET_CODE (op) == COND_EXEC)
21995 op = COND_EXEC_CODE (op);
21996 if (GET_CODE (op) == PARALLEL)
21997 op = XVECEXP (op, 0, 0);
21998 op = XEXP (op, 1);
22000 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22002 if (GET_CODE (XEXP (op, 0)) == MULT)
22003 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22004 else
22005 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22008 return 0;
22011 /* We can't rely on the caller doing the proper promotion when
22012 using APCS or ATPCS. */
22014 static bool
22015 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22017 return !TARGET_AAPCS_BASED;
22020 static enum machine_mode
22021 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22022 enum machine_mode mode,
22023 int *punsignedp ATTRIBUTE_UNUSED,
22024 const_tree fntype ATTRIBUTE_UNUSED,
22025 int for_return ATTRIBUTE_UNUSED)
22027 if (GET_MODE_CLASS (mode) == MODE_INT
22028 && GET_MODE_SIZE (mode) < 4)
22029 return SImode;
22031 return mode;
22034 /* AAPCS based ABIs use short enums by default. */
22036 static bool
22037 arm_default_short_enums (void)
22039 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22043 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22045 static bool
22046 arm_align_anon_bitfield (void)
22048 return TARGET_AAPCS_BASED;
22052 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22054 static tree
22055 arm_cxx_guard_type (void)
22057 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22060 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22061 has an accumulator dependency on the result of the producer (a
22062 multiplication instruction) and no other dependency on that result. */
22064 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22066 rtx mul = PATTERN (producer);
22067 rtx mac = PATTERN (consumer);
22068 rtx mul_result;
22069 rtx mac_op0, mac_op1, mac_acc;
22071 if (GET_CODE (mul) == COND_EXEC)
22072 mul = COND_EXEC_CODE (mul);
22073 if (GET_CODE (mac) == COND_EXEC)
22074 mac = COND_EXEC_CODE (mac);
22076 /* Check that mul is of the form (set (...) (mult ...))
22077 and mla is of the form (set (...) (plus (mult ...) (...))). */
22078 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22079 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22080 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22081 return 0;
22083 mul_result = XEXP (mul, 0);
22084 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22085 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22086 mac_acc = XEXP (XEXP (mac, 1), 1);
22088 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22089 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22090 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22094 /* The EABI says test the least significant bit of a guard variable. */
22096 static bool
22097 arm_cxx_guard_mask_bit (void)
22099 return TARGET_AAPCS_BASED;
22103 /* The EABI specifies that all array cookies are 8 bytes long. */
22105 static tree
22106 arm_get_cookie_size (tree type)
22108 tree size;
22110 if (!TARGET_AAPCS_BASED)
22111 return default_cxx_get_cookie_size (type);
22113 size = build_int_cst (sizetype, 8);
22114 return size;
22118 /* The EABI says that array cookies should also contain the element size. */
22120 static bool
22121 arm_cookie_has_size (void)
22123 return TARGET_AAPCS_BASED;
22127 /* The EABI says constructors and destructors should return a pointer to
22128 the object constructed/destroyed. */
22130 static bool
22131 arm_cxx_cdtor_returns_this (void)
22133 return TARGET_AAPCS_BASED;
22136 /* The EABI says that an inline function may never be the key
22137 method. */
22139 static bool
22140 arm_cxx_key_method_may_be_inline (void)
22142 return !TARGET_AAPCS_BASED;
22145 static void
22146 arm_cxx_determine_class_data_visibility (tree decl)
22148 if (!TARGET_AAPCS_BASED
22149 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22150 return;
22152 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22153 is exported. However, on systems without dynamic vague linkage,
22154 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22155 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22156 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22157 else
22158 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22159 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22162 static bool
22163 arm_cxx_class_data_always_comdat (void)
22165 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22166 vague linkage if the class has no key function. */
22167 return !TARGET_AAPCS_BASED;
22171 /* The EABI says __aeabi_atexit should be used to register static
22172 destructors. */
22174 static bool
22175 arm_cxx_use_aeabi_atexit (void)
22177 return TARGET_AAPCS_BASED;
22181 void
22182 arm_set_return_address (rtx source, rtx scratch)
22184 arm_stack_offsets *offsets;
22185 HOST_WIDE_INT delta;
22186 rtx addr;
22187 unsigned long saved_regs;
22189 offsets = arm_get_frame_offsets ();
22190 saved_regs = offsets->saved_regs_mask;
22192 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22193 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22194 else
22196 if (frame_pointer_needed)
22197 addr = plus_constant(hard_frame_pointer_rtx, -4);
22198 else
22200 /* LR will be the first saved register. */
22201 delta = offsets->outgoing_args - (offsets->frame + 4);
22204 if (delta >= 4096)
22206 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22207 GEN_INT (delta & ~4095)));
22208 addr = scratch;
22209 delta &= 4095;
22211 else
22212 addr = stack_pointer_rtx;
22214 addr = plus_constant (addr, delta);
22216 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22221 void
22222 thumb_set_return_address (rtx source, rtx scratch)
22224 arm_stack_offsets *offsets;
22225 HOST_WIDE_INT delta;
22226 HOST_WIDE_INT limit;
22227 int reg;
22228 rtx addr;
22229 unsigned long mask;
22231 emit_use (source);
22233 offsets = arm_get_frame_offsets ();
22234 mask = offsets->saved_regs_mask;
22235 if (mask & (1 << LR_REGNUM))
22237 limit = 1024;
22238 /* Find the saved regs. */
22239 if (frame_pointer_needed)
22241 delta = offsets->soft_frame - offsets->saved_args;
22242 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22243 if (TARGET_THUMB1)
22244 limit = 128;
22246 else
22248 delta = offsets->outgoing_args - offsets->saved_args;
22249 reg = SP_REGNUM;
22251 /* Allow for the stack frame. */
22252 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22253 delta -= 16;
22254 /* The link register is always the first saved register. */
22255 delta -= 4;
22257 /* Construct the address. */
22258 addr = gen_rtx_REG (SImode, reg);
22259 if (delta > limit)
22261 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22262 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22263 addr = scratch;
22265 else
22266 addr = plus_constant (addr, delta);
22268 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22270 else
22271 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22274 /* Implements target hook vector_mode_supported_p. */
22275 bool
22276 arm_vector_mode_supported_p (enum machine_mode mode)
22278 /* Neon also supports V2SImode, etc. listed in the clause below. */
22279 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22280 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22281 return true;
22283 if ((TARGET_NEON || TARGET_IWMMXT)
22284 && ((mode == V2SImode)
22285 || (mode == V4HImode)
22286 || (mode == V8QImode)))
22287 return true;
22289 return false;
22292 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22293 registers when autovectorizing for Neon, at least until multiple vector
22294 widths are supported properly by the middle-end. */
22296 static enum machine_mode
22297 arm_preferred_simd_mode (enum machine_mode mode)
22299 if (TARGET_NEON)
22300 switch (mode)
22302 case SFmode:
22303 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22304 case SImode:
22305 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22306 case HImode:
22307 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22308 case QImode:
22309 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22310 case DImode:
22311 if (TARGET_NEON_VECTORIZE_QUAD)
22312 return V2DImode;
22313 break;
22315 default:;
22318 if (TARGET_REALLY_IWMMXT)
22319 switch (mode)
22321 case SImode:
22322 return V2SImode;
22323 case HImode:
22324 return V4HImode;
22325 case QImode:
22326 return V8QImode;
22328 default:;
22331 return word_mode;
22334 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22336 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22337 using r0-r4 for function arguments, r7 for the stack frame and don't have
22338 enough left over to do doubleword arithmetic. For Thumb-2 all the
22339 potentially problematic instructions accept high registers so this is not
22340 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22341 that require many low registers. */
22342 static bool
22343 arm_class_likely_spilled_p (reg_class_t rclass)
22345 if ((TARGET_THUMB1 && rclass == LO_REGS)
22346 || rclass == CC_REG)
22347 return true;
22349 return false;
22352 /* Implements target hook small_register_classes_for_mode_p. */
22353 bool
22354 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22356 return TARGET_THUMB1;
22359 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22360 ARM insns and therefore guarantee that the shift count is modulo 256.
22361 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22362 guarantee no particular behavior for out-of-range counts. */
22364 static unsigned HOST_WIDE_INT
22365 arm_shift_truncation_mask (enum machine_mode mode)
22367 return mode == SImode ? 255 : 0;
22371 /* Map internal gcc register numbers to DWARF2 register numbers. */
22373 unsigned int
22374 arm_dbx_register_number (unsigned int regno)
22376 if (regno < 16)
22377 return regno;
22379 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22380 compatibility. The EABI defines them as registers 96-103. */
22381 if (IS_FPA_REGNUM (regno))
22382 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22384 if (IS_VFP_REGNUM (regno))
22386 /* See comment in arm_dwarf_register_span. */
22387 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22388 return 64 + regno - FIRST_VFP_REGNUM;
22389 else
22390 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22393 if (IS_IWMMXT_GR_REGNUM (regno))
22394 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22396 if (IS_IWMMXT_REGNUM (regno))
22397 return 112 + regno - FIRST_IWMMXT_REGNUM;
22399 gcc_unreachable ();
22402 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22403 GCC models tham as 64 32-bit registers, so we need to describe this to
22404 the DWARF generation code. Other registers can use the default. */
22405 static rtx
22406 arm_dwarf_register_span (rtx rtl)
22408 unsigned regno;
22409 int nregs;
22410 int i;
22411 rtx p;
22413 regno = REGNO (rtl);
22414 if (!IS_VFP_REGNUM (regno))
22415 return NULL_RTX;
22417 /* XXX FIXME: The EABI defines two VFP register ranges:
22418 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22419 256-287: D0-D31
22420 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22421 corresponding D register. Until GDB supports this, we shall use the
22422 legacy encodings. We also use these encodings for D0-D15 for
22423 compatibility with older debuggers. */
22424 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22425 return NULL_RTX;
22427 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22428 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22429 regno = (regno - FIRST_VFP_REGNUM) / 2;
22430 for (i = 0; i < nregs; i++)
22431 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22433 return p;
22436 #if ARM_UNWIND_INFO
22437 /* Emit unwind directives for a store-multiple instruction or stack pointer
22438 push during alignment.
22439 These should only ever be generated by the function prologue code, so
22440 expect them to have a particular form. */
22442 static void
22443 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22445 int i;
22446 HOST_WIDE_INT offset;
22447 HOST_WIDE_INT nregs;
22448 int reg_size;
22449 unsigned reg;
22450 unsigned lastreg;
22451 rtx e;
22453 e = XVECEXP (p, 0, 0);
22454 if (GET_CODE (e) != SET)
22455 abort ();
22457 /* First insn will adjust the stack pointer. */
22458 if (GET_CODE (e) != SET
22459 || GET_CODE (XEXP (e, 0)) != REG
22460 || REGNO (XEXP (e, 0)) != SP_REGNUM
22461 || GET_CODE (XEXP (e, 1)) != PLUS)
22462 abort ();
22464 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22465 nregs = XVECLEN (p, 0) - 1;
22467 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22468 if (reg < 16)
22470 /* The function prologue may also push pc, but not annotate it as it is
22471 never restored. We turn this into a stack pointer adjustment. */
22472 if (nregs * 4 == offset - 4)
22474 fprintf (asm_out_file, "\t.pad #4\n");
22475 offset -= 4;
22477 reg_size = 4;
22478 fprintf (asm_out_file, "\t.save {");
22480 else if (IS_VFP_REGNUM (reg))
22482 reg_size = 8;
22483 fprintf (asm_out_file, "\t.vsave {");
22485 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22487 /* FPA registers are done differently. */
22488 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22489 return;
22491 else
22492 /* Unknown register type. */
22493 abort ();
22495 /* If the stack increment doesn't match the size of the saved registers,
22496 something has gone horribly wrong. */
22497 if (offset != nregs * reg_size)
22498 abort ();
22500 offset = 0;
22501 lastreg = 0;
22502 /* The remaining insns will describe the stores. */
22503 for (i = 1; i <= nregs; i++)
22505 /* Expect (set (mem <addr>) (reg)).
22506 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22507 e = XVECEXP (p, 0, i);
22508 if (GET_CODE (e) != SET
22509 || GET_CODE (XEXP (e, 0)) != MEM
22510 || GET_CODE (XEXP (e, 1)) != REG)
22511 abort ();
22513 reg = REGNO (XEXP (e, 1));
22514 if (reg < lastreg)
22515 abort ();
22517 if (i != 1)
22518 fprintf (asm_out_file, ", ");
22519 /* We can't use %r for vfp because we need to use the
22520 double precision register names. */
22521 if (IS_VFP_REGNUM (reg))
22522 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22523 else
22524 asm_fprintf (asm_out_file, "%r", reg);
22526 #ifdef ENABLE_CHECKING
22527 /* Check that the addresses are consecutive. */
22528 e = XEXP (XEXP (e, 0), 0);
22529 if (GET_CODE (e) == PLUS)
22531 offset += reg_size;
22532 if (GET_CODE (XEXP (e, 0)) != REG
22533 || REGNO (XEXP (e, 0)) != SP_REGNUM
22534 || GET_CODE (XEXP (e, 1)) != CONST_INT
22535 || offset != INTVAL (XEXP (e, 1)))
22536 abort ();
22538 else if (i != 1
22539 || GET_CODE (e) != REG
22540 || REGNO (e) != SP_REGNUM)
22541 abort ();
22542 #endif
22544 fprintf (asm_out_file, "}\n");
22547 /* Emit unwind directives for a SET. */
22549 static void
22550 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22552 rtx e0;
22553 rtx e1;
22554 unsigned reg;
22556 e0 = XEXP (p, 0);
22557 e1 = XEXP (p, 1);
22558 switch (GET_CODE (e0))
22560 case MEM:
22561 /* Pushing a single register. */
22562 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22563 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22564 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22565 abort ();
22567 asm_fprintf (asm_out_file, "\t.save ");
22568 if (IS_VFP_REGNUM (REGNO (e1)))
22569 asm_fprintf(asm_out_file, "{d%d}\n",
22570 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22571 else
22572 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22573 break;
22575 case REG:
22576 if (REGNO (e0) == SP_REGNUM)
22578 /* A stack increment. */
22579 if (GET_CODE (e1) != PLUS
22580 || GET_CODE (XEXP (e1, 0)) != REG
22581 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22582 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22583 abort ();
22585 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22586 -INTVAL (XEXP (e1, 1)));
22588 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22590 HOST_WIDE_INT offset;
22592 if (GET_CODE (e1) == PLUS)
22594 if (GET_CODE (XEXP (e1, 0)) != REG
22595 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22596 abort ();
22597 reg = REGNO (XEXP (e1, 0));
22598 offset = INTVAL (XEXP (e1, 1));
22599 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22600 HARD_FRAME_POINTER_REGNUM, reg,
22601 offset);
22603 else if (GET_CODE (e1) == REG)
22605 reg = REGNO (e1);
22606 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22607 HARD_FRAME_POINTER_REGNUM, reg);
22609 else
22610 abort ();
22612 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22614 /* Move from sp to reg. */
22615 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22617 else if (GET_CODE (e1) == PLUS
22618 && GET_CODE (XEXP (e1, 0)) == REG
22619 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22620 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22622 /* Set reg to offset from sp. */
22623 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22624 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22626 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22628 /* Stack pointer save before alignment. */
22629 reg = REGNO (e0);
22630 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22631 reg + 0x90, reg);
22633 else
22634 abort ();
22635 break;
22637 default:
22638 abort ();
22643 /* Emit unwind directives for the given insn. */
22645 static void
22646 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22648 rtx pat;
22650 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22651 return;
22653 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22654 && (TREE_NOTHROW (current_function_decl)
22655 || crtl->all_throwers_are_sibcalls))
22656 return;
22658 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22659 return;
22661 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22662 if (pat)
22663 pat = XEXP (pat, 0);
22664 else
22665 pat = PATTERN (insn);
22667 switch (GET_CODE (pat))
22669 case SET:
22670 arm_unwind_emit_set (asm_out_file, pat);
22671 break;
22673 case SEQUENCE:
22674 /* Store multiple. */
22675 arm_unwind_emit_sequence (asm_out_file, pat);
22676 break;
22678 default:
22679 abort();
22684 /* Output a reference from a function exception table to the type_info
22685 object X. The EABI specifies that the symbol should be relocated by
22686 an R_ARM_TARGET2 relocation. */
22688 static bool
22689 arm_output_ttype (rtx x)
22691 fputs ("\t.word\t", asm_out_file);
22692 output_addr_const (asm_out_file, x);
22693 /* Use special relocations for symbol references. */
22694 if (GET_CODE (x) != CONST_INT)
22695 fputs ("(TARGET2)", asm_out_file);
22696 fputc ('\n', asm_out_file);
22698 return TRUE;
22701 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22703 static void
22704 arm_asm_emit_except_personality (rtx personality)
22706 fputs ("\t.personality\t", asm_out_file);
22707 output_addr_const (asm_out_file, personality);
22708 fputc ('\n', asm_out_file);
22711 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22713 static void
22714 arm_asm_init_sections (void)
22716 exception_section = get_unnamed_section (0, output_section_asm_op,
22717 "\t.handlerdata");
22719 #endif /* ARM_UNWIND_INFO */
22721 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22723 static enum unwind_info_type
22724 arm_except_unwind_info (struct gcc_options *opts)
22726 /* Honor the --enable-sjlj-exceptions configure switch. */
22727 #ifdef CONFIG_SJLJ_EXCEPTIONS
22728 if (CONFIG_SJLJ_EXCEPTIONS)
22729 return UI_SJLJ;
22730 #endif
22732 /* If not using ARM EABI unwind tables... */
22733 if (ARM_UNWIND_INFO)
22735 /* For simplicity elsewhere in this file, indicate that all unwind
22736 info is disabled if we're not emitting unwind tables. */
22737 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22738 return UI_NONE;
22739 else
22740 return UI_TARGET;
22743 /* ... we use sjlj exceptions for backwards compatibility. */
22744 return UI_SJLJ;
22748 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22749 stack alignment. */
22751 static void
22752 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22754 rtx unspec = SET_SRC (pattern);
22755 gcc_assert (GET_CODE (unspec) == UNSPEC);
22757 switch (index)
22759 case UNSPEC_STACK_ALIGN:
22760 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22761 put anything on the stack, so hopefully it won't matter.
22762 CFA = SP will be correct after alignment. */
22763 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22764 SET_DEST (pattern));
22765 break;
22766 default:
22767 gcc_unreachable ();
22772 /* Output unwind directives for the start/end of a function. */
22774 void
22775 arm_output_fn_unwind (FILE * f, bool prologue)
22777 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22778 return;
22780 if (prologue)
22781 fputs ("\t.fnstart\n", f);
22782 else
22784 /* If this function will never be unwound, then mark it as such.
22785 The came condition is used in arm_unwind_emit to suppress
22786 the frame annotations. */
22787 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22788 && (TREE_NOTHROW (current_function_decl)
22789 || crtl->all_throwers_are_sibcalls))
22790 fputs("\t.cantunwind\n", f);
22792 fputs ("\t.fnend\n", f);
22796 static bool
22797 arm_emit_tls_decoration (FILE *fp, rtx x)
22799 enum tls_reloc reloc;
22800 rtx val;
22802 val = XVECEXP (x, 0, 0);
22803 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22805 output_addr_const (fp, val);
22807 switch (reloc)
22809 case TLS_GD32:
22810 fputs ("(tlsgd)", fp);
22811 break;
22812 case TLS_LDM32:
22813 fputs ("(tlsldm)", fp);
22814 break;
22815 case TLS_LDO32:
22816 fputs ("(tlsldo)", fp);
22817 break;
22818 case TLS_IE32:
22819 fputs ("(gottpoff)", fp);
22820 break;
22821 case TLS_LE32:
22822 fputs ("(tpoff)", fp);
22823 break;
22824 default:
22825 gcc_unreachable ();
22828 switch (reloc)
22830 case TLS_GD32:
22831 case TLS_LDM32:
22832 case TLS_IE32:
22833 fputs (" + (. - ", fp);
22834 output_addr_const (fp, XVECEXP (x, 0, 2));
22835 fputs (" - ", fp);
22836 output_addr_const (fp, XVECEXP (x, 0, 3));
22837 fputc (')', fp);
22838 break;
22839 default:
22840 break;
22843 return TRUE;
22846 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22848 static void
22849 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22851 gcc_assert (size == 4);
22852 fputs ("\t.word\t", file);
22853 output_addr_const (file, x);
22854 fputs ("(tlsldo)", file);
22857 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22859 static bool
22860 arm_output_addr_const_extra (FILE *fp, rtx x)
22862 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22863 return arm_emit_tls_decoration (fp, x);
22864 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22866 char label[256];
22867 int labelno = INTVAL (XVECEXP (x, 0, 0));
22869 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22870 assemble_name_raw (fp, label);
22872 return TRUE;
22874 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22876 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22877 if (GOT_PCREL)
22878 fputs ("+.", fp);
22879 fputs ("-(", fp);
22880 output_addr_const (fp, XVECEXP (x, 0, 0));
22881 fputc (')', fp);
22882 return TRUE;
22884 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22886 output_addr_const (fp, XVECEXP (x, 0, 0));
22887 if (GOT_PCREL)
22888 fputs ("+.", fp);
22889 fputs ("-(", fp);
22890 output_addr_const (fp, XVECEXP (x, 0, 1));
22891 fputc (')', fp);
22892 return TRUE;
22894 else if (GET_CODE (x) == CONST_VECTOR)
22895 return arm_emit_vector_const (fp, x);
22897 return FALSE;
22900 /* Output assembly for a shift instruction.
22901 SET_FLAGS determines how the instruction modifies the condition codes.
22902 0 - Do not set condition codes.
22903 1 - Set condition codes.
22904 2 - Use smallest instruction. */
22905 const char *
22906 arm_output_shift(rtx * operands, int set_flags)
22908 char pattern[100];
22909 static const char flag_chars[3] = {'?', '.', '!'};
22910 const char *shift;
22911 HOST_WIDE_INT val;
22912 char c;
22914 c = flag_chars[set_flags];
22915 if (TARGET_UNIFIED_ASM)
22917 shift = shift_op(operands[3], &val);
22918 if (shift)
22920 if (val != -1)
22921 operands[2] = GEN_INT(val);
22922 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22924 else
22925 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22927 else
22928 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22929 output_asm_insn (pattern, operands);
22930 return "";
22933 /* Output a Thumb-1 casesi dispatch sequence. */
22934 const char *
22935 thumb1_output_casesi (rtx *operands)
22937 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22939 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22941 switch (GET_MODE(diff_vec))
22943 case QImode:
22944 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22945 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22946 case HImode:
22947 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22948 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22949 case SImode:
22950 return "bl\t%___gnu_thumb1_case_si";
22951 default:
22952 gcc_unreachable ();
22956 /* Output a Thumb-2 casesi instruction. */
22957 const char *
22958 thumb2_output_casesi (rtx *operands)
22960 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22962 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22964 output_asm_insn ("cmp\t%0, %1", operands);
22965 output_asm_insn ("bhi\t%l3", operands);
22966 switch (GET_MODE(diff_vec))
22968 case QImode:
22969 return "tbb\t[%|pc, %0]";
22970 case HImode:
22971 return "tbh\t[%|pc, %0, lsl #1]";
22972 case SImode:
22973 if (flag_pic)
22975 output_asm_insn ("adr\t%4, %l2", operands);
22976 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22977 output_asm_insn ("add\t%4, %4, %5", operands);
22978 return "bx\t%4";
22980 else
22982 output_asm_insn ("adr\t%4, %l2", operands);
22983 return "ldr\t%|pc, [%4, %0, lsl #2]";
22985 default:
22986 gcc_unreachable ();
22990 /* Most ARM cores are single issue, but some newer ones can dual issue.
22991 The scheduler descriptions rely on this being correct. */
22992 static int
22993 arm_issue_rate (void)
22995 switch (arm_tune)
22997 case cortexr4:
22998 case cortexr4f:
22999 case cortexa5:
23000 case cortexa8:
23001 case cortexa9:
23002 case fa726te:
23003 return 2;
23005 default:
23006 return 1;
23010 /* A table and a function to perform ARM-specific name mangling for
23011 NEON vector types in order to conform to the AAPCS (see "Procedure
23012 Call Standard for the ARM Architecture", Appendix A). To qualify
23013 for emission with the mangled names defined in that document, a
23014 vector type must not only be of the correct mode but also be
23015 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23016 typedef struct
23018 enum machine_mode mode;
23019 const char *element_type_name;
23020 const char *aapcs_name;
23021 } arm_mangle_map_entry;
23023 static arm_mangle_map_entry arm_mangle_map[] = {
23024 /* 64-bit containerized types. */
23025 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23026 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23027 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23028 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23029 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23030 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23031 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23032 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23033 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23034 /* 128-bit containerized types. */
23035 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23036 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23037 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23038 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23039 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23040 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23041 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23042 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23043 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23044 { VOIDmode, NULL, NULL }
23047 const char *
23048 arm_mangle_type (const_tree type)
23050 arm_mangle_map_entry *pos = arm_mangle_map;
23052 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23053 has to be managled as if it is in the "std" namespace. */
23054 if (TARGET_AAPCS_BASED
23055 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23057 static bool warned;
23058 if (!warned && warn_psabi && !in_system_header)
23060 warned = true;
23061 inform (input_location,
23062 "the mangling of %<va_list%> has changed in GCC 4.4");
23064 return "St9__va_list";
23067 /* Half-precision float. */
23068 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23069 return "Dh";
23071 if (TREE_CODE (type) != VECTOR_TYPE)
23072 return NULL;
23074 /* Check the mode of the vector type, and the name of the vector
23075 element type, against the table. */
23076 while (pos->mode != VOIDmode)
23078 tree elt_type = TREE_TYPE (type);
23080 if (pos->mode == TYPE_MODE (type)
23081 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23082 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23083 pos->element_type_name))
23084 return pos->aapcs_name;
23086 pos++;
23089 /* Use the default mangling for unrecognized (possibly user-defined)
23090 vector types. */
23091 return NULL;
23094 /* Order of allocation of core registers for Thumb: this allocation is
23095 written over the corresponding initial entries of the array
23096 initialized with REG_ALLOC_ORDER. We allocate all low registers
23097 first. Saving and restoring a low register is usually cheaper than
23098 using a call-clobbered high register. */
23100 static const int thumb_core_reg_alloc_order[] =
23102 3, 2, 1, 0, 4, 5, 6, 7,
23103 14, 12, 8, 9, 10, 11, 13, 15
23106 /* Adjust register allocation order when compiling for Thumb. */
23108 void
23109 arm_order_regs_for_local_alloc (void)
23111 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23112 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23113 if (TARGET_THUMB)
23114 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23115 sizeof (thumb_core_reg_alloc_order));
23118 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23120 bool
23121 arm_frame_pointer_required (void)
23123 return (cfun->has_nonlocal_label
23124 || SUBTARGET_FRAME_POINTER_REQUIRED
23125 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23128 /* Only thumb1 can't support conditional execution, so return true if
23129 the target is not thumb1. */
23130 static bool
23131 arm_have_conditional_execution (void)
23133 return !TARGET_THUMB1;
23136 /* Legitimize a memory reference for sync primitive implemented using
23137 ldrex / strex. We currently force the form of the reference to be
23138 indirect without offset. We do not yet support the indirect offset
23139 addressing supported by some ARM targets for these
23140 instructions. */
23141 static rtx
23142 arm_legitimize_sync_memory (rtx memory)
23144 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23145 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23147 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23148 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23149 return legitimate_memory;
23152 /* An instruction emitter. */
23153 typedef void (* emit_f) (int label, const char *, rtx *);
23155 /* An instruction emitter that emits via the conventional
23156 output_asm_insn. */
23157 static void
23158 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23160 output_asm_insn (pattern, operands);
23163 /* Count the number of emitted synchronization instructions. */
23164 static unsigned arm_insn_count;
23166 /* An emitter that counts emitted instructions but does not actually
23167 emit instruction into the the instruction stream. */
23168 static void
23169 arm_count (int label,
23170 const char *pattern ATTRIBUTE_UNUSED,
23171 rtx *operands ATTRIBUTE_UNUSED)
23173 if (! label)
23174 ++ arm_insn_count;
23177 /* Construct a pattern using conventional output formatting and feed
23178 it to output_asm_insn. Provides a mechanism to construct the
23179 output pattern on the fly. Note the hard limit on the pattern
23180 buffer size. */
23181 static void ATTRIBUTE_PRINTF_4
23182 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23183 const char *pattern, ...)
23185 va_list ap;
23186 char buffer[256];
23188 va_start (ap, pattern);
23189 vsprintf (buffer, pattern, ap);
23190 va_end (ap);
23191 emit (label, buffer, operands);
23194 /* Emit the memory barrier instruction, if any, provided by this
23195 target to a specified emitter. */
23196 static void
23197 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23199 if (TARGET_HAVE_DMB)
23201 /* Note we issue a system level barrier. We should consider
23202 issuing a inner shareabilty zone barrier here instead, ie.
23203 "DMB ISH". */
23204 emit (0, "dmb\tsy", operands);
23205 return;
23208 if (TARGET_HAVE_DMB_MCR)
23210 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23211 return;
23214 gcc_unreachable ();
23217 /* Emit the memory barrier instruction, if any, provided by this
23218 target. */
23219 const char *
23220 arm_output_memory_barrier (rtx *operands)
23222 arm_process_output_memory_barrier (arm_emit, operands);
23223 return "";
23226 /* Helper to figure out the instruction suffix required on ldrex/strex
23227 for operations on an object of the specified mode. */
23228 static const char *
23229 arm_ldrex_suffix (enum machine_mode mode)
23231 switch (mode)
23233 case QImode: return "b";
23234 case HImode: return "h";
23235 case SImode: return "";
23236 case DImode: return "d";
23237 default:
23238 gcc_unreachable ();
23240 return "";
23243 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23244 mode. */
23245 static void
23246 arm_output_ldrex (emit_f emit,
23247 enum machine_mode mode,
23248 rtx target,
23249 rtx memory)
23251 const char *suffix = arm_ldrex_suffix (mode);
23252 rtx operands[2];
23254 operands[0] = target;
23255 operands[1] = memory;
23256 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23259 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23260 mode. */
23261 static void
23262 arm_output_strex (emit_f emit,
23263 enum machine_mode mode,
23264 const char *cc,
23265 rtx result,
23266 rtx value,
23267 rtx memory)
23269 const char *suffix = arm_ldrex_suffix (mode);
23270 rtx operands[3];
23272 operands[0] = result;
23273 operands[1] = value;
23274 operands[2] = memory;
23275 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23276 cc);
23279 /* Helper to emit a two operand instruction. */
23280 static void
23281 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23283 rtx operands[2];
23285 operands[0] = d;
23286 operands[1] = s;
23287 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23290 /* Helper to emit a three operand instruction. */
23291 static void
23292 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23294 rtx operands[3];
23296 operands[0] = d;
23297 operands[1] = a;
23298 operands[2] = b;
23299 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23302 /* Emit a load store exclusive synchronization loop.
23305 old_value = [mem]
23306 if old_value != required_value
23307 break;
23308 t1 = sync_op (old_value, new_value)
23309 [mem] = t1, t2 = [0|1]
23310 while ! t2
23312 Note:
23313 t1 == t2 is not permitted
23314 t1 == old_value is permitted
23316 required_value:
23318 RTX register or const_int representing the required old_value for
23319 the modify to continue, if NULL no comparsion is performed. */
23320 static void
23321 arm_output_sync_loop (emit_f emit,
23322 enum machine_mode mode,
23323 rtx old_value,
23324 rtx memory,
23325 rtx required_value,
23326 rtx new_value,
23327 rtx t1,
23328 rtx t2,
23329 enum attr_sync_op sync_op,
23330 int early_barrier_required)
23332 rtx operands[1];
23334 gcc_assert (t1 != t2);
23336 if (early_barrier_required)
23337 arm_process_output_memory_barrier (emit, NULL);
23339 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23341 arm_output_ldrex (emit, mode, old_value, memory);
23343 if (required_value)
23345 rtx operands[2];
23347 operands[0] = old_value;
23348 operands[1] = required_value;
23349 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23350 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23353 switch (sync_op)
23355 case SYNC_OP_ADD:
23356 arm_output_op3 (emit, "add", t1, old_value, new_value);
23357 break;
23359 case SYNC_OP_SUB:
23360 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23361 break;
23363 case SYNC_OP_IOR:
23364 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23365 break;
23367 case SYNC_OP_XOR:
23368 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23369 break;
23371 case SYNC_OP_AND:
23372 arm_output_op3 (emit,"and", t1, old_value, new_value);
23373 break;
23375 case SYNC_OP_NAND:
23376 arm_output_op3 (emit, "and", t1, old_value, new_value);
23377 arm_output_op2 (emit, "mvn", t1, t1);
23378 break;
23380 case SYNC_OP_NONE:
23381 t1 = new_value;
23382 break;
23385 if (t2)
23387 arm_output_strex (emit, mode, "", t2, t1, memory);
23388 operands[0] = t2;
23389 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23390 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23391 LOCAL_LABEL_PREFIX);
23393 else
23395 /* Use old_value for the return value because for some operations
23396 the old_value can easily be restored. This saves one register. */
23397 arm_output_strex (emit, mode, "", old_value, t1, memory);
23398 operands[0] = old_value;
23399 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23400 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23401 LOCAL_LABEL_PREFIX);
23403 switch (sync_op)
23405 case SYNC_OP_ADD:
23406 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23407 break;
23409 case SYNC_OP_SUB:
23410 arm_output_op3 (emit, "add", old_value, t1, new_value);
23411 break;
23413 case SYNC_OP_XOR:
23414 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23415 break;
23417 case SYNC_OP_NONE:
23418 arm_output_op2 (emit, "mov", old_value, required_value);
23419 break;
23421 default:
23422 gcc_unreachable ();
23426 arm_process_output_memory_barrier (emit, NULL);
23427 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23430 static rtx
23431 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23433 if (index > 0)
23434 default_value = operands[index - 1];
23436 return default_value;
23439 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23440 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23442 /* Extract the operands for a synchroniztion instruction from the
23443 instructions attributes and emit the instruction. */
23444 static void
23445 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23447 rtx result, memory, required_value, new_value, t1, t2;
23448 int early_barrier;
23449 enum machine_mode mode;
23450 enum attr_sync_op sync_op;
23452 result = FETCH_SYNC_OPERAND(result, 0);
23453 memory = FETCH_SYNC_OPERAND(memory, 0);
23454 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23455 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23456 t1 = FETCH_SYNC_OPERAND(t1, 0);
23457 t2 = FETCH_SYNC_OPERAND(t2, 0);
23458 early_barrier =
23459 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23460 sync_op = get_attr_sync_op (insn);
23461 mode = GET_MODE (memory);
23463 arm_output_sync_loop (emit, mode, result, memory, required_value,
23464 new_value, t1, t2, sync_op, early_barrier);
23467 /* Emit a synchronization instruction loop. */
23468 const char *
23469 arm_output_sync_insn (rtx insn, rtx *operands)
23471 arm_process_output_sync_insn (arm_emit, insn, operands);
23472 return "";
23475 /* Count the number of machine instruction that will be emitted for a
23476 synchronization instruction. Note that the emitter used does not
23477 emit instructions, it just counts instructions being carefull not
23478 to count labels. */
23479 unsigned int
23480 arm_sync_loop_insns (rtx insn, rtx *operands)
23482 arm_insn_count = 0;
23483 arm_process_output_sync_insn (arm_count, insn, operands);
23484 return arm_insn_count;
23487 /* Helper to call a target sync instruction generator, dealing with
23488 the variation in operands required by the different generators. */
23489 static rtx
23490 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23491 rtx memory, rtx required_value, rtx new_value)
23493 switch (generator->op)
23495 case arm_sync_generator_omn:
23496 gcc_assert (! required_value);
23497 return generator->u.omn (old_value, memory, new_value);
23499 case arm_sync_generator_omrn:
23500 gcc_assert (required_value);
23501 return generator->u.omrn (old_value, memory, required_value, new_value);
23504 return NULL;
23507 /* Expand a synchronization loop. The synchronization loop is expanded
23508 as an opaque block of instructions in order to ensure that we do
23509 not subsequently get extraneous memory accesses inserted within the
23510 critical region. The exclusive access property of ldrex/strex is
23511 only guaranteed in there are no intervening memory accesses. */
23512 void
23513 arm_expand_sync (enum machine_mode mode,
23514 struct arm_sync_generator *generator,
23515 rtx target, rtx memory, rtx required_value, rtx new_value)
23517 if (target == NULL)
23518 target = gen_reg_rtx (mode);
23520 memory = arm_legitimize_sync_memory (memory);
23521 if (mode != SImode)
23523 rtx load_temp = gen_reg_rtx (SImode);
23525 if (required_value)
23526 required_value = convert_modes (SImode, mode, required_value, true);
23528 new_value = convert_modes (SImode, mode, new_value, true);
23529 emit_insn (arm_call_generator (generator, load_temp, memory,
23530 required_value, new_value));
23531 emit_move_insn (target, gen_lowpart (mode, load_temp));
23533 else
23535 emit_insn (arm_call_generator (generator, target, memory, required_value,
23536 new_value));
23540 static unsigned int
23541 arm_autovectorize_vector_sizes (void)
23543 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
23546 static bool
23547 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23549 /* Vectors which aren't in packed structures will not be less aligned than
23550 the natural alignment of their element type, so this is safe. */
23551 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23552 return !is_packed;
23554 return default_builtin_vector_alignment_reachable (type, is_packed);
23557 static bool
23558 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23559 const_tree type, int misalignment,
23560 bool is_packed)
23562 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23564 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23566 if (is_packed)
23567 return align == 1;
23569 /* If the misalignment is unknown, we should be able to handle the access
23570 so long as it is not to a member of a packed data structure. */
23571 if (misalignment == -1)
23572 return true;
23574 /* Return true if the misalignment is a multiple of the natural alignment
23575 of the vector's element type. This is probably always going to be
23576 true in practice, since we've already established that this isn't a
23577 packed access. */
23578 return ((misalignment % align) == 0);
23581 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23582 is_packed);
23585 static void
23586 arm_conditional_register_usage (void)
23588 int regno;
23590 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23592 for (regno = FIRST_FPA_REGNUM;
23593 regno <= LAST_FPA_REGNUM; ++regno)
23594 fixed_regs[regno] = call_used_regs[regno] = 1;
23597 if (TARGET_THUMB1 && optimize_size)
23599 /* When optimizing for size on Thumb-1, it's better not
23600 to use the HI regs, because of the overhead of
23601 stacking them. */
23602 for (regno = FIRST_HI_REGNUM;
23603 regno <= LAST_HI_REGNUM; ++regno)
23604 fixed_regs[regno] = call_used_regs[regno] = 1;
23607 /* The link register can be clobbered by any branch insn,
23608 but we have no way to track that at present, so mark
23609 it as unavailable. */
23610 if (TARGET_THUMB1)
23611 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23613 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23615 if (TARGET_MAVERICK)
23617 for (regno = FIRST_FPA_REGNUM;
23618 regno <= LAST_FPA_REGNUM; ++ regno)
23619 fixed_regs[regno] = call_used_regs[regno] = 1;
23620 for (regno = FIRST_CIRRUS_FP_REGNUM;
23621 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23623 fixed_regs[regno] = 0;
23624 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23627 if (TARGET_VFP)
23629 /* VFPv3 registers are disabled when earlier VFP
23630 versions are selected due to the definition of
23631 LAST_VFP_REGNUM. */
23632 for (regno = FIRST_VFP_REGNUM;
23633 regno <= LAST_VFP_REGNUM; ++ regno)
23635 fixed_regs[regno] = 0;
23636 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23637 || regno >= FIRST_VFP_REGNUM + 32;
23642 if (TARGET_REALLY_IWMMXT)
23644 regno = FIRST_IWMMXT_GR_REGNUM;
23645 /* The 2002/10/09 revision of the XScale ABI has wCG0
23646 and wCG1 as call-preserved registers. The 2002/11/21
23647 revision changed this so that all wCG registers are
23648 scratch registers. */
23649 for (regno = FIRST_IWMMXT_GR_REGNUM;
23650 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23651 fixed_regs[regno] = 0;
23652 /* The XScale ABI has wR0 - wR9 as scratch registers,
23653 the rest as call-preserved registers. */
23654 for (regno = FIRST_IWMMXT_REGNUM;
23655 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23657 fixed_regs[regno] = 0;
23658 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23662 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23664 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23665 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23667 else if (TARGET_APCS_STACK)
23669 fixed_regs[10] = 1;
23670 call_used_regs[10] = 1;
23672 /* -mcaller-super-interworking reserves r11 for calls to
23673 _interwork_r11_call_via_rN(). Making the register global
23674 is an easy way of ensuring that it remains valid for all
23675 calls. */
23676 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23677 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23679 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23680 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23681 if (TARGET_CALLER_INTERWORKING)
23682 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23684 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23687 static reg_class_t
23688 arm_preferred_rename_class (reg_class_t rclass)
23690 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23691 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23692 and code size can be reduced. */
23693 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23694 return LO_REGS;
23695 else
23696 return NO_REGS;
23699 #include "gt-arm.h"