2011-03-21 Daniel Jacobowitz <dan@codesourcery.com>
[official-gcc.git] / gcc / config / arm / arm.c
blobe863e8c19689882f303204d1e8ac3c3d680f598e
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
96 int, HOST_WIDE_INT);
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
113 rtx);
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
125 #endif
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
137 const_tree, int);
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
144 tree);
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
166 tree, bool);
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
168 const_tree, bool);
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
170 const_tree, bool);
171 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
172 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
173 const_tree);
174 static int aapcs_select_return_coproc (const_tree, const_tree);
176 #ifdef OBJECT_FORMAT_ELF
177 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
178 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
179 #endif
180 #ifndef ARM_PE
181 static void arm_encode_section_info (tree, rtx, int);
182 #endif
184 static void arm_file_end (void);
185 static void arm_file_start (void);
187 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
188 tree, int *, int);
189 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
190 enum machine_mode, const_tree, bool);
191 static bool arm_promote_prototypes (const_tree);
192 static bool arm_default_short_enums (void);
193 static bool arm_align_anon_bitfield (void);
194 static bool arm_return_in_msb (const_tree);
195 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
196 static bool arm_return_in_memory (const_tree, const_tree);
197 #if ARM_UNWIND_INFO
198 static void arm_unwind_emit (FILE *, rtx);
199 static bool arm_output_ttype (rtx);
200 static void arm_asm_emit_except_personality (rtx);
201 static void arm_asm_init_sections (void);
202 #endif
203 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
204 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
205 static rtx arm_dwarf_register_span (rtx);
207 static tree arm_cxx_guard_type (void);
208 static bool arm_cxx_guard_mask_bit (void);
209 static tree arm_get_cookie_size (tree);
210 static bool arm_cookie_has_size (void);
211 static bool arm_cxx_cdtor_returns_this (void);
212 static bool arm_cxx_key_method_may_be_inline (void);
213 static void arm_cxx_determine_class_data_visibility (tree);
214 static bool arm_cxx_class_data_always_comdat (void);
215 static bool arm_cxx_use_aeabi_atexit (void);
216 static void arm_init_libfuncs (void);
217 static tree arm_build_builtin_va_list (void);
218 static void arm_expand_builtin_va_start (tree, rtx);
219 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
220 static void arm_option_override (void);
221 static bool arm_handle_option (size_t, const char *, int);
222 static void arm_target_help (void);
223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
224 static bool arm_cannot_copy_insn_p (rtx);
225 static bool arm_tls_symbol_p (rtx x);
226 static int arm_issue_rate (void);
227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
228 static bool arm_output_addr_const_extra (FILE *, rtx);
229 static bool arm_allocate_stack_slots_for_args (void);
230 static const char *arm_invalid_parameter_type (const_tree t);
231 static const char *arm_invalid_return_type (const_tree t);
232 static tree arm_promoted_type (const_tree t);
233 static tree arm_convert_to_type (tree type, tree expr);
234 static bool arm_scalar_mode_supported_p (enum machine_mode);
235 static bool arm_frame_pointer_required (void);
236 static bool arm_can_eliminate (const int, const int);
237 static void arm_asm_trampoline_template (FILE *);
238 static void arm_trampoline_init (rtx, tree, rtx);
239 static rtx arm_trampoline_adjust_address (rtx);
240 static rtx arm_pic_static_addr (rtx orig, rtx reg);
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
245 static bool arm_class_likely_spilled_p (reg_class_t);
246 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
247 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
248 const_tree type,
249 int misalignment,
250 bool is_packed);
251 static void arm_conditional_register_usage (void);
252 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
255 /* Table of machine attributes. */
256 static const struct attribute_spec arm_attribute_table[] =
258 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
259 affects_type_identity } */
260 /* Function calls made to this symbol must be done indirectly, because
261 it may lie outside of the 26 bit addressing range of a normal function
262 call. */
263 { "long_call", 0, 0, false, true, true, NULL, false },
264 /* Whereas these functions are always known to reside within the 26 bit
265 addressing range. */
266 { "short_call", 0, 0, false, true, true, NULL, false },
267 /* Specify the procedure call conventions for a function. */
268 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
269 false },
270 /* Interrupt Service Routines have special prologue and epilogue requirements. */
271 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
272 false },
273 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
274 false },
275 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
276 false },
277 #ifdef ARM_PE
278 /* ARM/PE has three new attributes:
279 interfacearm - ?
280 dllexport - for exporting a function/variable that will live in a dll
281 dllimport - for importing a function/variable from a dll
283 Microsoft allows multiple declspecs in one __declspec, separating
284 them with spaces. We do NOT support this. Instead, use __declspec
285 multiple times.
287 { "dllimport", 0, 0, true, false, false, NULL, false },
288 { "dllexport", 0, 0, true, false, false, NULL, false },
289 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
290 false },
291 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
292 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
293 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
294 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
295 false },
296 #endif
297 { NULL, 0, 0, false, false, false, NULL, false }
300 /* Set default optimization options. */
301 static const struct default_options arm_option_optimization_table[] =
303 /* Enable section anchors by default at -O1 or higher. */
304 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
305 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
306 { OPT_LEVELS_NONE, 0, NULL, 0 }
309 /* Initialize the GCC target structure. */
310 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 #undef TARGET_MERGE_DECL_ATTRIBUTES
312 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
313 #endif
315 #undef TARGET_LEGITIMIZE_ADDRESS
316 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
318 #undef TARGET_ATTRIBUTE_TABLE
319 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
321 #undef TARGET_ASM_FILE_START
322 #define TARGET_ASM_FILE_START arm_file_start
323 #undef TARGET_ASM_FILE_END
324 #define TARGET_ASM_FILE_END arm_file_end
326 #undef TARGET_ASM_ALIGNED_SI_OP
327 #define TARGET_ASM_ALIGNED_SI_OP NULL
328 #undef TARGET_ASM_INTEGER
329 #define TARGET_ASM_INTEGER arm_assemble_integer
331 #undef TARGET_PRINT_OPERAND
332 #define TARGET_PRINT_OPERAND arm_print_operand
333 #undef TARGET_PRINT_OPERAND_ADDRESS
334 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
335 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
336 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
338 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
339 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
341 #undef TARGET_ASM_FUNCTION_PROLOGUE
342 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
344 #undef TARGET_ASM_FUNCTION_EPILOGUE
345 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
347 #undef TARGET_DEFAULT_TARGET_FLAGS
348 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
349 #undef TARGET_HANDLE_OPTION
350 #define TARGET_HANDLE_OPTION arm_handle_option
351 #undef TARGET_HELP
352 #define TARGET_HELP arm_target_help
353 #undef TARGET_OPTION_OVERRIDE
354 #define TARGET_OPTION_OVERRIDE arm_option_override
355 #undef TARGET_OPTION_OPTIMIZATION_TABLE
356 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
358 #undef TARGET_COMP_TYPE_ATTRIBUTES
359 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
361 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
362 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
364 #undef TARGET_SCHED_ADJUST_COST
365 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
367 #undef TARGET_ENCODE_SECTION_INFO
368 #ifdef ARM_PE
369 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
370 #else
371 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
372 #endif
374 #undef TARGET_STRIP_NAME_ENCODING
375 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
377 #undef TARGET_ASM_INTERNAL_LABEL
378 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
380 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
381 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
383 #undef TARGET_FUNCTION_VALUE
384 #define TARGET_FUNCTION_VALUE arm_function_value
386 #undef TARGET_LIBCALL_VALUE
387 #define TARGET_LIBCALL_VALUE arm_libcall_value
389 #undef TARGET_ASM_OUTPUT_MI_THUNK
390 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
391 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
392 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
394 #undef TARGET_RTX_COSTS
395 #define TARGET_RTX_COSTS arm_rtx_costs
396 #undef TARGET_ADDRESS_COST
397 #define TARGET_ADDRESS_COST arm_address_cost
399 #undef TARGET_SHIFT_TRUNCATION_MASK
400 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
401 #undef TARGET_VECTOR_MODE_SUPPORTED_P
402 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
403 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
404 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
406 #undef TARGET_MACHINE_DEPENDENT_REORG
407 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
409 #undef TARGET_INIT_BUILTINS
410 #define TARGET_INIT_BUILTINS arm_init_builtins
411 #undef TARGET_EXPAND_BUILTIN
412 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
414 #undef TARGET_INIT_LIBFUNCS
415 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
417 #undef TARGET_PROMOTE_FUNCTION_MODE
418 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
419 #undef TARGET_PROMOTE_PROTOTYPES
420 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
421 #undef TARGET_PASS_BY_REFERENCE
422 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
423 #undef TARGET_ARG_PARTIAL_BYTES
424 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
425 #undef TARGET_FUNCTION_ARG
426 #define TARGET_FUNCTION_ARG arm_function_arg
427 #undef TARGET_FUNCTION_ARG_ADVANCE
428 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
429 #undef TARGET_FUNCTION_ARG_BOUNDARY
430 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
432 #undef TARGET_SETUP_INCOMING_VARARGS
433 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
435 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
436 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
438 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
439 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
440 #undef TARGET_TRAMPOLINE_INIT
441 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
442 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
443 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
445 #undef TARGET_DEFAULT_SHORT_ENUMS
446 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
448 #undef TARGET_ALIGN_ANON_BITFIELD
449 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
451 #undef TARGET_NARROW_VOLATILE_BITFIELD
452 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
454 #undef TARGET_CXX_GUARD_TYPE
455 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
457 #undef TARGET_CXX_GUARD_MASK_BIT
458 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
460 #undef TARGET_CXX_GET_COOKIE_SIZE
461 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
463 #undef TARGET_CXX_COOKIE_HAS_SIZE
464 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
466 #undef TARGET_CXX_CDTOR_RETURNS_THIS
467 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
469 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
470 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
472 #undef TARGET_CXX_USE_AEABI_ATEXIT
473 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
475 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
476 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
477 arm_cxx_determine_class_data_visibility
479 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
480 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
482 #undef TARGET_RETURN_IN_MSB
483 #define TARGET_RETURN_IN_MSB arm_return_in_msb
485 #undef TARGET_RETURN_IN_MEMORY
486 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
488 #undef TARGET_MUST_PASS_IN_STACK
489 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
491 #if ARM_UNWIND_INFO
492 #undef TARGET_ASM_UNWIND_EMIT
493 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
495 /* EABI unwinding tables use a different format for the typeinfo tables. */
496 #undef TARGET_ASM_TTYPE
497 #define TARGET_ASM_TTYPE arm_output_ttype
499 #undef TARGET_ARM_EABI_UNWINDER
500 #define TARGET_ARM_EABI_UNWINDER true
502 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
503 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
505 #undef TARGET_ASM_INIT_SECTIONS
506 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
507 #endif /* ARM_UNWIND_INFO */
509 #undef TARGET_EXCEPT_UNWIND_INFO
510 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
512 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
513 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
515 #undef TARGET_DWARF_REGISTER_SPAN
516 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
518 #undef TARGET_CANNOT_COPY_INSN_P
519 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
521 #ifdef HAVE_AS_TLS
522 #undef TARGET_HAVE_TLS
523 #define TARGET_HAVE_TLS true
524 #endif
526 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
527 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
529 #undef TARGET_CANNOT_FORCE_CONST_MEM
530 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
532 #undef TARGET_MAX_ANCHOR_OFFSET
533 #define TARGET_MAX_ANCHOR_OFFSET 4095
535 /* The minimum is set such that the total size of the block
536 for a particular anchor is -4088 + 1 + 4095 bytes, which is
537 divisible by eight, ensuring natural spacing of anchors. */
538 #undef TARGET_MIN_ANCHOR_OFFSET
539 #define TARGET_MIN_ANCHOR_OFFSET -4088
541 #undef TARGET_SCHED_ISSUE_RATE
542 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
544 #undef TARGET_MANGLE_TYPE
545 #define TARGET_MANGLE_TYPE arm_mangle_type
547 #undef TARGET_BUILD_BUILTIN_VA_LIST
548 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
549 #undef TARGET_EXPAND_BUILTIN_VA_START
550 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
551 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
552 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
554 #ifdef HAVE_AS_TLS
555 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
556 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
557 #endif
559 #undef TARGET_LEGITIMATE_ADDRESS_P
560 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
562 #undef TARGET_INVALID_PARAMETER_TYPE
563 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
565 #undef TARGET_INVALID_RETURN_TYPE
566 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
568 #undef TARGET_PROMOTED_TYPE
569 #define TARGET_PROMOTED_TYPE arm_promoted_type
571 #undef TARGET_CONVERT_TO_TYPE
572 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
574 #undef TARGET_SCALAR_MODE_SUPPORTED_P
575 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
577 #undef TARGET_FRAME_POINTER_REQUIRED
578 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
580 #undef TARGET_CAN_ELIMINATE
581 #define TARGET_CAN_ELIMINATE arm_can_eliminate
583 #undef TARGET_CONDITIONAL_REGISTER_USAGE
584 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
586 #undef TARGET_CLASS_LIKELY_SPILLED_P
587 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
589 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
590 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
591 arm_vector_alignment_reachable
593 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
594 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
595 arm_builtin_support_vector_misalignment
597 #undef TARGET_PREFERRED_RENAME_CLASS
598 #define TARGET_PREFERRED_RENAME_CLASS \
599 arm_preferred_rename_class
601 struct gcc_target targetm = TARGET_INITIALIZER;
603 /* Obstack for minipool constant handling. */
604 static struct obstack minipool_obstack;
605 static char * minipool_startobj;
607 /* The maximum number of insns skipped which
608 will be conditionalised if possible. */
609 static int max_insns_skipped = 5;
611 extern FILE * asm_out_file;
613 /* True if we are currently building a constant table. */
614 int making_const_table;
616 /* The processor for which instructions should be scheduled. */
617 enum processor_type arm_tune = arm_none;
619 /* The current tuning set. */
620 const struct tune_params *current_tune;
622 /* Which floating point hardware to schedule for. */
623 int arm_fpu_attr;
625 /* Which floating popint hardware to use. */
626 const struct arm_fpu_desc *arm_fpu_desc;
628 /* Whether to use floating point hardware. */
629 enum float_abi_type arm_float_abi;
631 /* Which __fp16 format to use. */
632 enum arm_fp16_format_type arm_fp16_format;
634 /* Which ABI to use. */
635 enum arm_abi_type arm_abi;
637 /* Which thread pointer model to use. */
638 enum arm_tp_type target_thread_pointer = TP_AUTO;
640 /* Used to parse -mstructure_size_boundary command line option. */
641 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
643 /* Used for Thumb call_via trampolines. */
644 rtx thumb_call_via_label[14];
645 static int thumb_call_reg_needed;
647 /* Bit values used to identify processor capabilities. */
648 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
649 #define FL_ARCH3M (1 << 1) /* Extended multiply */
650 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
651 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
652 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
653 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
654 #define FL_THUMB (1 << 6) /* Thumb aware */
655 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
656 #define FL_STRONG (1 << 8) /* StrongARM */
657 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
658 #define FL_XSCALE (1 << 10) /* XScale */
659 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
660 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
661 media instructions. */
662 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
663 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
664 Note: ARM6 & 7 derivatives only. */
665 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
666 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
667 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
668 profile. */
669 #define FL_DIV (1 << 18) /* Hardware divide. */
670 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
671 #define FL_NEON (1 << 20) /* Neon instructions. */
672 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
673 architecture. */
674 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
676 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
678 /* Flags that only effect tuning, not available instructions. */
679 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
680 | FL_CO_PROC)
682 #define FL_FOR_ARCH2 FL_NOTM
683 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
684 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
685 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
686 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
687 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
688 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
689 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
690 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
691 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
692 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
693 #define FL_FOR_ARCH6J FL_FOR_ARCH6
694 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
695 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
696 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
697 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
698 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
699 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
700 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
701 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
702 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
703 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
705 /* The bits in this mask specify which
706 instructions we are allowed to generate. */
707 static unsigned long insn_flags = 0;
709 /* The bits in this mask specify which instruction scheduling options should
710 be used. */
711 static unsigned long tune_flags = 0;
713 /* The following are used in the arm.md file as equivalents to bits
714 in the above two flag variables. */
716 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
717 int arm_arch3m = 0;
719 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
720 int arm_arch4 = 0;
722 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
723 int arm_arch4t = 0;
725 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
726 int arm_arch5 = 0;
728 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
729 int arm_arch5e = 0;
731 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
732 int arm_arch6 = 0;
734 /* Nonzero if this chip supports the ARM 6K extensions. */
735 int arm_arch6k = 0;
737 /* Nonzero if this chip supports the ARM 7 extensions. */
738 int arm_arch7 = 0;
740 /* Nonzero if instructions not present in the 'M' profile can be used. */
741 int arm_arch_notm = 0;
743 /* Nonzero if instructions present in ARMv7E-M can be used. */
744 int arm_arch7em = 0;
746 /* Nonzero if this chip can benefit from load scheduling. */
747 int arm_ld_sched = 0;
749 /* Nonzero if this chip is a StrongARM. */
750 int arm_tune_strongarm = 0;
752 /* Nonzero if this chip is a Cirrus variant. */
753 int arm_arch_cirrus = 0;
755 /* Nonzero if this chip supports Intel Wireless MMX technology. */
756 int arm_arch_iwmmxt = 0;
758 /* Nonzero if this chip is an XScale. */
759 int arm_arch_xscale = 0;
761 /* Nonzero if tuning for XScale */
762 int arm_tune_xscale = 0;
764 /* Nonzero if we want to tune for stores that access the write-buffer.
765 This typically means an ARM6 or ARM7 with MMU or MPU. */
766 int arm_tune_wbuf = 0;
768 /* Nonzero if tuning for Cortex-A9. */
769 int arm_tune_cortex_a9 = 0;
771 /* Nonzero if generating Thumb instructions. */
772 int thumb_code = 0;
774 /* Nonzero if generating Thumb-1 instructions. */
775 int thumb1_code = 0;
777 /* Nonzero if we should define __THUMB_INTERWORK__ in the
778 preprocessor.
779 XXX This is a bit of a hack, it's intended to help work around
780 problems in GLD which doesn't understand that armv5t code is
781 interworking clean. */
782 int arm_cpp_interwork = 0;
784 /* Nonzero if chip supports Thumb 2. */
785 int arm_arch_thumb2;
787 /* Nonzero if chip supports integer division instruction. */
788 int arm_arch_hwdiv;
790 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
791 we must report the mode of the memory reference from
792 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
793 enum machine_mode output_memory_reference_mode;
795 /* The register number to be used for the PIC offset register. */
796 unsigned arm_pic_register = INVALID_REGNUM;
798 /* Set to 1 after arm_reorg has started. Reset to start at the start of
799 the next function. */
800 static int after_arm_reorg = 0;
802 enum arm_pcs arm_pcs_default;
804 /* For an explanation of these variables, see final_prescan_insn below. */
805 int arm_ccfsm_state;
806 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
807 enum arm_cond_code arm_current_cc;
809 rtx arm_target_insn;
810 int arm_target_label;
811 /* The number of conditionally executed insns, including the current insn. */
812 int arm_condexec_count = 0;
813 /* A bitmask specifying the patterns for the IT block.
814 Zero means do not output an IT block before this insn. */
815 int arm_condexec_mask = 0;
816 /* The number of bits used in arm_condexec_mask. */
817 int arm_condexec_masklen = 0;
819 /* The condition codes of the ARM, and the inverse function. */
820 static const char * const arm_condition_codes[] =
822 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
823 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
826 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
827 int arm_regs_in_sequence[] =
829 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
832 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
833 #define streq(string1, string2) (strcmp (string1, string2) == 0)
835 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
836 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
837 | (1 << PIC_OFFSET_TABLE_REGNUM)))
839 /* Initialization code. */
841 struct processors
843 const char *const name;
844 enum processor_type core;
845 const char *arch;
846 const unsigned long flags;
847 const struct tune_params *const tune;
851 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
852 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
853 prefetch_slots, \
854 l1_size, \
855 l1_line_size
857 const struct tune_params arm_slowmul_tune =
859 arm_slowmul_rtx_costs,
860 NULL,
862 ARM_PREFETCH_NOT_BENEFICIAL
865 const struct tune_params arm_fastmul_tune =
867 arm_fastmul_rtx_costs,
868 NULL,
870 ARM_PREFETCH_NOT_BENEFICIAL
873 const struct tune_params arm_xscale_tune =
875 arm_xscale_rtx_costs,
876 xscale_sched_adjust_cost,
878 ARM_PREFETCH_NOT_BENEFICIAL
881 const struct tune_params arm_9e_tune =
883 arm_9e_rtx_costs,
884 NULL,
886 ARM_PREFETCH_NOT_BENEFICIAL
889 const struct tune_params arm_cortex_a9_tune =
891 arm_9e_rtx_costs,
892 cortex_a9_sched_adjust_cost,
894 ARM_PREFETCH_BENEFICIAL(4,32,32)
897 const struct tune_params arm_fa726te_tune =
899 arm_9e_rtx_costs,
900 fa726te_sched_adjust_cost,
902 ARM_PREFETCH_NOT_BENEFICIAL
906 /* Not all of these give usefully different compilation alternatives,
907 but there is no simple way of generalizing them. */
908 static const struct processors all_cores[] =
910 /* ARM Cores */
911 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
912 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
913 #include "arm-cores.def"
914 #undef ARM_CORE
915 {NULL, arm_none, NULL, 0, NULL}
918 static const struct processors all_architectures[] =
920 /* ARM Architectures */
921 /* We don't specify tuning costs here as it will be figured out
922 from the core. */
924 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
925 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
926 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
927 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
928 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
929 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
930 implementations that support it, so we will leave it out for now. */
931 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
932 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
933 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
934 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
935 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
936 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
937 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
938 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
939 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
940 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
941 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
942 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
943 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
944 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
945 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
946 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
947 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
948 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
949 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
950 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
951 {NULL, arm_none, NULL, 0 , NULL}
955 /* These are populated as commandline arguments are processed, or NULL
956 if not specified. */
957 static const struct processors *arm_selected_arch;
958 static const struct processors *arm_selected_cpu;
959 static const struct processors *arm_selected_tune;
961 /* The name of the preprocessor macro to define for this architecture. */
963 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
965 /* Available values for -mfpu=. */
967 static const struct arm_fpu_desc all_fpus[] =
969 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
970 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
971 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
972 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
973 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
974 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
975 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
976 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
977 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
978 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
979 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
980 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
981 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
982 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
983 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
984 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
985 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
986 /* Compatibility aliases. */
987 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
991 struct float_abi
993 const char * name;
994 enum float_abi_type abi_type;
998 /* Available values for -mfloat-abi=. */
1000 static const struct float_abi all_float_abis[] =
1002 {"soft", ARM_FLOAT_ABI_SOFT},
1003 {"softfp", ARM_FLOAT_ABI_SOFTFP},
1004 {"hard", ARM_FLOAT_ABI_HARD}
1008 struct fp16_format
1010 const char *name;
1011 enum arm_fp16_format_type fp16_format_type;
1015 /* Available values for -mfp16-format=. */
1017 static const struct fp16_format all_fp16_formats[] =
1019 {"none", ARM_FP16_FORMAT_NONE},
1020 {"ieee", ARM_FP16_FORMAT_IEEE},
1021 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1025 struct abi_name
1027 const char *name;
1028 enum arm_abi_type abi_type;
1032 /* Available values for -mabi=. */
1034 static const struct abi_name arm_all_abis[] =
1036 {"apcs-gnu", ARM_ABI_APCS},
1037 {"atpcs", ARM_ABI_ATPCS},
1038 {"aapcs", ARM_ABI_AAPCS},
1039 {"iwmmxt", ARM_ABI_IWMMXT},
1040 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1043 /* Supported TLS relocations. */
1045 enum tls_reloc {
1046 TLS_GD32,
1047 TLS_LDM32,
1048 TLS_LDO32,
1049 TLS_IE32,
1050 TLS_LE32
1053 /* The maximum number of insns to be used when loading a constant. */
1054 inline static int
1055 arm_constant_limit (bool size_p)
1057 return size_p ? 1 : current_tune->constant_limit;
1060 /* Emit an insn that's a simple single-set. Both the operands must be known
1061 to be valid. */
1062 inline static rtx
1063 emit_set_insn (rtx x, rtx y)
1065 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1068 /* Return the number of bits set in VALUE. */
1069 static unsigned
1070 bit_count (unsigned long value)
1072 unsigned long count = 0;
1074 while (value)
1076 count++;
1077 value &= value - 1; /* Clear the least-significant set bit. */
1080 return count;
1083 /* Set up library functions unique to ARM. */
1085 static void
1086 arm_init_libfuncs (void)
1088 /* There are no special library functions unless we are using the
1089 ARM BPABI. */
1090 if (!TARGET_BPABI)
1091 return;
1093 /* The functions below are described in Section 4 of the "Run-Time
1094 ABI for the ARM architecture", Version 1.0. */
1096 /* Double-precision floating-point arithmetic. Table 2. */
1097 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1098 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1099 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1100 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1101 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1103 /* Double-precision comparisons. Table 3. */
1104 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1105 set_optab_libfunc (ne_optab, DFmode, NULL);
1106 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1107 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1108 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1109 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1110 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1112 /* Single-precision floating-point arithmetic. Table 4. */
1113 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1114 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1115 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1116 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1117 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1119 /* Single-precision comparisons. Table 5. */
1120 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1121 set_optab_libfunc (ne_optab, SFmode, NULL);
1122 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1123 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1124 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1125 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1126 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1128 /* Floating-point to integer conversions. Table 6. */
1129 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1130 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1131 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1132 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1133 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1134 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1135 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1136 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1138 /* Conversions between floating types. Table 7. */
1139 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1140 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1142 /* Integer to floating-point conversions. Table 8. */
1143 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1144 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1145 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1146 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1147 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1148 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1149 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1150 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1152 /* Long long. Table 9. */
1153 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1154 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1155 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1156 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1157 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1158 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1159 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1160 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1162 /* Integer (32/32->32) division. \S 4.3.1. */
1163 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1164 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1166 /* The divmod functions are designed so that they can be used for
1167 plain division, even though they return both the quotient and the
1168 remainder. The quotient is returned in the usual location (i.e.,
1169 r0 for SImode, {r0, r1} for DImode), just as would be expected
1170 for an ordinary division routine. Because the AAPCS calling
1171 conventions specify that all of { r0, r1, r2, r3 } are
1172 callee-saved registers, there is no need to tell the compiler
1173 explicitly that those registers are clobbered by these
1174 routines. */
1175 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1176 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1178 /* For SImode division the ABI provides div-without-mod routines,
1179 which are faster. */
1180 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1181 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1183 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1184 divmod libcalls instead. */
1185 set_optab_libfunc (smod_optab, DImode, NULL);
1186 set_optab_libfunc (umod_optab, DImode, NULL);
1187 set_optab_libfunc (smod_optab, SImode, NULL);
1188 set_optab_libfunc (umod_optab, SImode, NULL);
1190 /* Half-precision float operations. The compiler handles all operations
1191 with NULL libfuncs by converting the SFmode. */
1192 switch (arm_fp16_format)
1194 case ARM_FP16_FORMAT_IEEE:
1195 case ARM_FP16_FORMAT_ALTERNATIVE:
1197 /* Conversions. */
1198 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1199 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1200 ? "__gnu_f2h_ieee"
1201 : "__gnu_f2h_alternative"));
1202 set_conv_libfunc (sext_optab, SFmode, HFmode,
1203 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1204 ? "__gnu_h2f_ieee"
1205 : "__gnu_h2f_alternative"));
1207 /* Arithmetic. */
1208 set_optab_libfunc (add_optab, HFmode, NULL);
1209 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1210 set_optab_libfunc (smul_optab, HFmode, NULL);
1211 set_optab_libfunc (neg_optab, HFmode, NULL);
1212 set_optab_libfunc (sub_optab, HFmode, NULL);
1214 /* Comparisons. */
1215 set_optab_libfunc (eq_optab, HFmode, NULL);
1216 set_optab_libfunc (ne_optab, HFmode, NULL);
1217 set_optab_libfunc (lt_optab, HFmode, NULL);
1218 set_optab_libfunc (le_optab, HFmode, NULL);
1219 set_optab_libfunc (ge_optab, HFmode, NULL);
1220 set_optab_libfunc (gt_optab, HFmode, NULL);
1221 set_optab_libfunc (unord_optab, HFmode, NULL);
1222 break;
1224 default:
1225 break;
1228 if (TARGET_AAPCS_BASED)
1229 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1232 /* On AAPCS systems, this is the "struct __va_list". */
1233 static GTY(()) tree va_list_type;
1235 /* Return the type to use as __builtin_va_list. */
1236 static tree
1237 arm_build_builtin_va_list (void)
1239 tree va_list_name;
1240 tree ap_field;
1242 if (!TARGET_AAPCS_BASED)
1243 return std_build_builtin_va_list ();
1245 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1246 defined as:
1248 struct __va_list
1250 void *__ap;
1253 The C Library ABI further reinforces this definition in \S
1254 4.1.
1256 We must follow this definition exactly. The structure tag
1257 name is visible in C++ mangled names, and thus forms a part
1258 of the ABI. The field name may be used by people who
1259 #include <stdarg.h>. */
1260 /* Create the type. */
1261 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1262 /* Give it the required name. */
1263 va_list_name = build_decl (BUILTINS_LOCATION,
1264 TYPE_DECL,
1265 get_identifier ("__va_list"),
1266 va_list_type);
1267 DECL_ARTIFICIAL (va_list_name) = 1;
1268 TYPE_NAME (va_list_type) = va_list_name;
1269 TYPE_STUB_DECL (va_list_type) = va_list_name;
1270 /* Create the __ap field. */
1271 ap_field = build_decl (BUILTINS_LOCATION,
1272 FIELD_DECL,
1273 get_identifier ("__ap"),
1274 ptr_type_node);
1275 DECL_ARTIFICIAL (ap_field) = 1;
1276 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1277 TYPE_FIELDS (va_list_type) = ap_field;
1278 /* Compute its layout. */
1279 layout_type (va_list_type);
1281 return va_list_type;
1284 /* Return an expression of type "void *" pointing to the next
1285 available argument in a variable-argument list. VALIST is the
1286 user-level va_list object, of type __builtin_va_list. */
1287 static tree
1288 arm_extract_valist_ptr (tree valist)
1290 if (TREE_TYPE (valist) == error_mark_node)
1291 return error_mark_node;
1293 /* On an AAPCS target, the pointer is stored within "struct
1294 va_list". */
1295 if (TARGET_AAPCS_BASED)
1297 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1298 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1299 valist, ap_field, NULL_TREE);
1302 return valist;
1305 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1306 static void
1307 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1309 valist = arm_extract_valist_ptr (valist);
1310 std_expand_builtin_va_start (valist, nextarg);
1313 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1314 static tree
1315 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1316 gimple_seq *post_p)
1318 valist = arm_extract_valist_ptr (valist);
1319 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1322 /* Lookup NAME in SEL. */
1324 static const struct processors *
1325 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1327 if (!(name && *name))
1328 return NULL;
1330 for (; sel->name != NULL; sel++)
1332 if (streq (name, sel->name))
1333 return sel;
1336 error ("bad value (%s) for %s switch", name, desc);
1337 return NULL;
1340 /* Implement TARGET_HANDLE_OPTION. */
1342 static bool
1343 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1345 switch (code)
1347 case OPT_march_:
1348 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1349 return true;
1351 case OPT_mcpu_:
1352 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1353 return true;
1355 case OPT_mhard_float:
1356 target_float_abi_name = "hard";
1357 return true;
1359 case OPT_msoft_float:
1360 target_float_abi_name = "soft";
1361 return true;
1363 case OPT_mtune_:
1364 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1365 return true;
1367 default:
1368 return true;
1372 static void
1373 arm_target_help (void)
1375 int i;
1376 static int columns = 0;
1377 int remaining;
1379 /* If we have not done so already, obtain the desired maximum width of
1380 the output. Note - this is a duplication of the code at the start of
1381 gcc/opts.c:print_specific_help() - the two copies should probably be
1382 replaced by a single function. */
1383 if (columns == 0)
1385 const char *p;
1387 p = getenv ("COLUMNS");
1388 if (p != NULL)
1390 int value = atoi (p);
1392 if (value > 0)
1393 columns = value;
1396 if (columns == 0)
1397 /* Use a reasonable default. */
1398 columns = 80;
1401 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1403 /* The - 2 is because we know that the last entry in the array is NULL. */
1404 i = ARRAY_SIZE (all_cores) - 2;
1405 gcc_assert (i > 0);
1406 printf (" %s", all_cores[i].name);
1407 remaining = columns - (strlen (all_cores[i].name) + 4);
1408 gcc_assert (remaining >= 0);
1410 while (i--)
1412 int len = strlen (all_cores[i].name);
1414 if (remaining > len + 2)
1416 printf (", %s", all_cores[i].name);
1417 remaining -= len + 2;
1419 else
1421 if (remaining > 0)
1422 printf (",");
1423 printf ("\n %s", all_cores[i].name);
1424 remaining = columns - (len + 4);
1428 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1430 i = ARRAY_SIZE (all_architectures) - 2;
1431 gcc_assert (i > 0);
1433 printf (" %s", all_architectures[i].name);
1434 remaining = columns - (strlen (all_architectures[i].name) + 4);
1435 gcc_assert (remaining >= 0);
1437 while (i--)
1439 int len = strlen (all_architectures[i].name);
1441 if (remaining > len + 2)
1443 printf (", %s", all_architectures[i].name);
1444 remaining -= len + 2;
1446 else
1448 if (remaining > 0)
1449 printf (",");
1450 printf ("\n %s", all_architectures[i].name);
1451 remaining = columns - (len + 4);
1454 printf ("\n");
1458 /* Fix up any incompatible options that the user has specified. */
1459 static void
1460 arm_option_override (void)
1462 unsigned i;
1464 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1465 SUBTARGET_OVERRIDE_OPTIONS;
1466 #endif
1468 if (arm_selected_arch)
1470 if (arm_selected_cpu)
1472 /* Check for conflict between mcpu and march. */
1473 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1475 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1476 arm_selected_cpu->name, arm_selected_arch->name);
1477 /* -march wins for code generation.
1478 -mcpu wins for default tuning. */
1479 if (!arm_selected_tune)
1480 arm_selected_tune = arm_selected_cpu;
1482 arm_selected_cpu = arm_selected_arch;
1484 else
1485 /* -mcpu wins. */
1486 arm_selected_arch = NULL;
1488 else
1489 /* Pick a CPU based on the architecture. */
1490 arm_selected_cpu = arm_selected_arch;
1493 /* If the user did not specify a processor, choose one for them. */
1494 if (!arm_selected_cpu)
1496 const struct processors * sel;
1497 unsigned int sought;
1499 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1500 if (!arm_selected_cpu->name)
1502 #ifdef SUBTARGET_CPU_DEFAULT
1503 /* Use the subtarget default CPU if none was specified by
1504 configure. */
1505 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1506 #endif
1507 /* Default to ARM6. */
1508 if (!arm_selected_cpu->name)
1509 arm_selected_cpu = &all_cores[arm6];
1512 sel = arm_selected_cpu;
1513 insn_flags = sel->flags;
1515 /* Now check to see if the user has specified some command line
1516 switch that require certain abilities from the cpu. */
1517 sought = 0;
1519 if (TARGET_INTERWORK || TARGET_THUMB)
1521 sought |= (FL_THUMB | FL_MODE32);
1523 /* There are no ARM processors that support both APCS-26 and
1524 interworking. Therefore we force FL_MODE26 to be removed
1525 from insn_flags here (if it was set), so that the search
1526 below will always be able to find a compatible processor. */
1527 insn_flags &= ~FL_MODE26;
1530 if (sought != 0 && ((sought & insn_flags) != sought))
1532 /* Try to locate a CPU type that supports all of the abilities
1533 of the default CPU, plus the extra abilities requested by
1534 the user. */
1535 for (sel = all_cores; sel->name != NULL; sel++)
1536 if ((sel->flags & sought) == (sought | insn_flags))
1537 break;
1539 if (sel->name == NULL)
1541 unsigned current_bit_count = 0;
1542 const struct processors * best_fit = NULL;
1544 /* Ideally we would like to issue an error message here
1545 saying that it was not possible to find a CPU compatible
1546 with the default CPU, but which also supports the command
1547 line options specified by the programmer, and so they
1548 ought to use the -mcpu=<name> command line option to
1549 override the default CPU type.
1551 If we cannot find a cpu that has both the
1552 characteristics of the default cpu and the given
1553 command line options we scan the array again looking
1554 for a best match. */
1555 for (sel = all_cores; sel->name != NULL; sel++)
1556 if ((sel->flags & sought) == sought)
1558 unsigned count;
1560 count = bit_count (sel->flags & insn_flags);
1562 if (count >= current_bit_count)
1564 best_fit = sel;
1565 current_bit_count = count;
1569 gcc_assert (best_fit);
1570 sel = best_fit;
1573 arm_selected_cpu = sel;
1577 gcc_assert (arm_selected_cpu);
1578 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1579 if (!arm_selected_tune)
1580 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1582 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1583 insn_flags = arm_selected_cpu->flags;
1585 arm_tune = arm_selected_tune->core;
1586 tune_flags = arm_selected_tune->flags;
1587 current_tune = arm_selected_tune->tune;
1589 if (target_fp16_format_name)
1591 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1593 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1595 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1596 break;
1599 if (i == ARRAY_SIZE (all_fp16_formats))
1600 error ("invalid __fp16 format option: -mfp16-format=%s",
1601 target_fp16_format_name);
1603 else
1604 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1606 if (target_abi_name)
1608 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1610 if (streq (arm_all_abis[i].name, target_abi_name))
1612 arm_abi = arm_all_abis[i].abi_type;
1613 break;
1616 if (i == ARRAY_SIZE (arm_all_abis))
1617 error ("invalid ABI option: -mabi=%s", target_abi_name);
1619 else
1620 arm_abi = ARM_DEFAULT_ABI;
1622 /* Make sure that the processor choice does not conflict with any of the
1623 other command line choices. */
1624 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1625 error ("target CPU does not support ARM mode");
1627 /* BPABI targets use linker tricks to allow interworking on cores
1628 without thumb support. */
1629 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1631 warning (0, "target CPU does not support interworking" );
1632 target_flags &= ~MASK_INTERWORK;
1635 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1637 warning (0, "target CPU does not support THUMB instructions");
1638 target_flags &= ~MASK_THUMB;
1641 if (TARGET_APCS_FRAME && TARGET_THUMB)
1643 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1644 target_flags &= ~MASK_APCS_FRAME;
1647 /* Callee super interworking implies thumb interworking. Adding
1648 this to the flags here simplifies the logic elsewhere. */
1649 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1650 target_flags |= MASK_INTERWORK;
1652 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1653 from here where no function is being compiled currently. */
1654 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1655 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1657 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1658 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1660 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1662 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1663 target_flags |= MASK_APCS_FRAME;
1666 if (TARGET_POKE_FUNCTION_NAME)
1667 target_flags |= MASK_APCS_FRAME;
1669 if (TARGET_APCS_REENT && flag_pic)
1670 error ("-fpic and -mapcs-reent are incompatible");
1672 if (TARGET_APCS_REENT)
1673 warning (0, "APCS reentrant code not supported. Ignored");
1675 /* If this target is normally configured to use APCS frames, warn if they
1676 are turned off and debugging is turned on. */
1677 if (TARGET_ARM
1678 && write_symbols != NO_DEBUG
1679 && !TARGET_APCS_FRAME
1680 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1681 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1683 if (TARGET_APCS_FLOAT)
1684 warning (0, "passing floating point arguments in fp regs not yet supported");
1686 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1687 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1688 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1689 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1690 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1691 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1692 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1693 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1694 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1695 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1696 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1697 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1698 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1699 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1701 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1702 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1703 thumb_code = TARGET_ARM == 0;
1704 thumb1_code = TARGET_THUMB1 != 0;
1705 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1706 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1707 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1708 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1709 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1711 /* If we are not using the default (ARM mode) section anchor offset
1712 ranges, then set the correct ranges now. */
1713 if (TARGET_THUMB1)
1715 /* Thumb-1 LDR instructions cannot have negative offsets.
1716 Permissible positive offset ranges are 5-bit (for byte loads),
1717 6-bit (for halfword loads), or 7-bit (for word loads).
1718 Empirical results suggest a 7-bit anchor range gives the best
1719 overall code size. */
1720 targetm.min_anchor_offset = 0;
1721 targetm.max_anchor_offset = 127;
1723 else if (TARGET_THUMB2)
1725 /* The minimum is set such that the total size of the block
1726 for a particular anchor is 248 + 1 + 4095 bytes, which is
1727 divisible by eight, ensuring natural spacing of anchors. */
1728 targetm.min_anchor_offset = -248;
1729 targetm.max_anchor_offset = 4095;
1732 /* V5 code we generate is completely interworking capable, so we turn off
1733 TARGET_INTERWORK here to avoid many tests later on. */
1735 /* XXX However, we must pass the right pre-processor defines to CPP
1736 or GLD can get confused. This is a hack. */
1737 if (TARGET_INTERWORK)
1738 arm_cpp_interwork = 1;
1740 if (arm_arch5)
1741 target_flags &= ~MASK_INTERWORK;
1743 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1744 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1746 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1747 error ("iwmmxt abi requires an iwmmxt capable cpu");
1749 if (target_fpu_name == NULL && target_fpe_name != NULL)
1751 if (streq (target_fpe_name, "2"))
1752 target_fpu_name = "fpe2";
1753 else if (streq (target_fpe_name, "3"))
1754 target_fpu_name = "fpe3";
1755 else
1756 error ("invalid floating point emulation option: -mfpe=%s",
1757 target_fpe_name);
1760 if (target_fpu_name == NULL)
1762 #ifdef FPUTYPE_DEFAULT
1763 target_fpu_name = FPUTYPE_DEFAULT;
1764 #else
1765 if (arm_arch_cirrus)
1766 target_fpu_name = "maverick";
1767 else
1768 target_fpu_name = "fpe2";
1769 #endif
1772 arm_fpu_desc = NULL;
1773 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1775 if (streq (all_fpus[i].name, target_fpu_name))
1777 arm_fpu_desc = &all_fpus[i];
1778 break;
1782 if (!arm_fpu_desc)
1784 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1785 return;
1788 switch (arm_fpu_desc->model)
1790 case ARM_FP_MODEL_FPA:
1791 if (arm_fpu_desc->rev == 2)
1792 arm_fpu_attr = FPU_FPE2;
1793 else if (arm_fpu_desc->rev == 3)
1794 arm_fpu_attr = FPU_FPE3;
1795 else
1796 arm_fpu_attr = FPU_FPA;
1797 break;
1799 case ARM_FP_MODEL_MAVERICK:
1800 arm_fpu_attr = FPU_MAVERICK;
1801 break;
1803 case ARM_FP_MODEL_VFP:
1804 arm_fpu_attr = FPU_VFP;
1805 break;
1807 default:
1808 gcc_unreachable();
1811 if (target_float_abi_name != NULL)
1813 /* The user specified a FP ABI. */
1814 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1816 if (streq (all_float_abis[i].name, target_float_abi_name))
1818 arm_float_abi = all_float_abis[i].abi_type;
1819 break;
1822 if (i == ARRAY_SIZE (all_float_abis))
1823 error ("invalid floating point abi: -mfloat-abi=%s",
1824 target_float_abi_name);
1826 else
1827 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1829 if (TARGET_AAPCS_BASED
1830 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1831 error ("FPA is unsupported in the AAPCS");
1833 if (TARGET_AAPCS_BASED)
1835 if (TARGET_CALLER_INTERWORKING)
1836 error ("AAPCS does not support -mcaller-super-interworking");
1837 else
1838 if (TARGET_CALLEE_INTERWORKING)
1839 error ("AAPCS does not support -mcallee-super-interworking");
1842 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1843 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1844 will ever exist. GCC makes no attempt to support this combination. */
1845 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1846 sorry ("iWMMXt and hardware floating point");
1848 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1849 if (TARGET_THUMB2 && TARGET_IWMMXT)
1850 sorry ("Thumb-2 iWMMXt");
1852 /* __fp16 support currently assumes the core has ldrh. */
1853 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1854 sorry ("__fp16 and no ldrh");
1856 /* If soft-float is specified then don't use FPU. */
1857 if (TARGET_SOFT_FLOAT)
1858 arm_fpu_attr = FPU_NONE;
1860 if (TARGET_AAPCS_BASED)
1862 if (arm_abi == ARM_ABI_IWMMXT)
1863 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1864 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1865 && TARGET_HARD_FLOAT
1866 && TARGET_VFP)
1867 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1868 else
1869 arm_pcs_default = ARM_PCS_AAPCS;
1871 else
1873 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1874 sorry ("-mfloat-abi=hard and VFP");
1876 if (arm_abi == ARM_ABI_APCS)
1877 arm_pcs_default = ARM_PCS_APCS;
1878 else
1879 arm_pcs_default = ARM_PCS_ATPCS;
1882 /* For arm2/3 there is no need to do any scheduling if there is only
1883 a floating point emulator, or we are doing software floating-point. */
1884 if ((TARGET_SOFT_FLOAT
1885 || (TARGET_FPA && arm_fpu_desc->rev))
1886 && (tune_flags & FL_MODE32) == 0)
1887 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1889 if (target_thread_switch)
1891 if (strcmp (target_thread_switch, "soft") == 0)
1892 target_thread_pointer = TP_SOFT;
1893 else if (strcmp (target_thread_switch, "auto") == 0)
1894 target_thread_pointer = TP_AUTO;
1895 else if (strcmp (target_thread_switch, "cp15") == 0)
1896 target_thread_pointer = TP_CP15;
1897 else
1898 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1901 /* Use the cp15 method if it is available. */
1902 if (target_thread_pointer == TP_AUTO)
1904 if (arm_arch6k && !TARGET_THUMB1)
1905 target_thread_pointer = TP_CP15;
1906 else
1907 target_thread_pointer = TP_SOFT;
1910 if (TARGET_HARD_TP && TARGET_THUMB1)
1911 error ("can not use -mtp=cp15 with 16-bit Thumb");
1913 /* Override the default structure alignment for AAPCS ABI. */
1914 if (TARGET_AAPCS_BASED)
1915 arm_structure_size_boundary = 8;
1917 if (structure_size_string != NULL)
1919 int size = strtol (structure_size_string, NULL, 0);
1921 if (size == 8 || size == 32
1922 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1923 arm_structure_size_boundary = size;
1924 else
1925 warning (0, "structure size boundary can only be set to %s",
1926 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1929 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1931 error ("RTP PIC is incompatible with Thumb");
1932 flag_pic = 0;
1935 /* If stack checking is disabled, we can use r10 as the PIC register,
1936 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1937 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1939 if (TARGET_VXWORKS_RTP)
1940 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1941 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1944 if (flag_pic && TARGET_VXWORKS_RTP)
1945 arm_pic_register = 9;
1947 if (arm_pic_register_string != NULL)
1949 int pic_register = decode_reg_name (arm_pic_register_string);
1951 if (!flag_pic)
1952 warning (0, "-mpic-register= is useless without -fpic");
1954 /* Prevent the user from choosing an obviously stupid PIC register. */
1955 else if (pic_register < 0 || call_used_regs[pic_register]
1956 || pic_register == HARD_FRAME_POINTER_REGNUM
1957 || pic_register == STACK_POINTER_REGNUM
1958 || pic_register >= PC_REGNUM
1959 || (TARGET_VXWORKS_RTP
1960 && (unsigned int) pic_register != arm_pic_register))
1961 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1962 else
1963 arm_pic_register = pic_register;
1966 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1967 if (fix_cm3_ldrd == 2)
1969 if (arm_selected_cpu->core == cortexm3)
1970 fix_cm3_ldrd = 1;
1971 else
1972 fix_cm3_ldrd = 0;
1975 if (TARGET_THUMB1 && flag_schedule_insns)
1977 /* Don't warn since it's on by default in -O2. */
1978 flag_schedule_insns = 0;
1981 if (optimize_size)
1983 /* If optimizing for size, bump the number of instructions that we
1984 are prepared to conditionally execute (even on a StrongARM). */
1985 max_insns_skipped = 6;
1987 else
1989 /* StrongARM has early execution of branches, so a sequence
1990 that is worth skipping is shorter. */
1991 if (arm_tune_strongarm)
1992 max_insns_skipped = 3;
1995 /* Hot/Cold partitioning is not currently supported, since we can't
1996 handle literal pool placement in that case. */
1997 if (flag_reorder_blocks_and_partition)
1999 inform (input_location,
2000 "-freorder-blocks-and-partition not supported on this architecture");
2001 flag_reorder_blocks_and_partition = 0;
2002 flag_reorder_blocks = 1;
2005 if (flag_pic)
2006 /* Hoisting PIC address calculations more aggressively provides a small,
2007 but measurable, size reduction for PIC code. Therefore, we decrease
2008 the bar for unrestricted expression hoisting to the cost of PIC address
2009 calculation, which is 2 instructions. */
2010 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2011 global_options.x_param_values,
2012 global_options_set.x_param_values);
2014 /* ARM EABI defaults to strict volatile bitfields. */
2015 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
2016 flag_strict_volatile_bitfields = 1;
2018 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2019 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2020 if (flag_prefetch_loop_arrays < 0
2021 && HAVE_prefetch
2022 && optimize >= 3
2023 && current_tune->num_prefetch_slots > 0)
2024 flag_prefetch_loop_arrays = 1;
2026 /* Set up parameters to be used in prefetching algorithm. Do not override the
2027 defaults unless we are tuning for a core we have researched values for. */
2028 if (current_tune->num_prefetch_slots > 0)
2029 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2030 current_tune->num_prefetch_slots,
2031 global_options.x_param_values,
2032 global_options_set.x_param_values);
2033 if (current_tune->l1_cache_line_size >= 0)
2034 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2035 current_tune->l1_cache_line_size,
2036 global_options.x_param_values,
2037 global_options_set.x_param_values);
2038 if (current_tune->l1_cache_size >= 0)
2039 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2040 current_tune->l1_cache_size,
2041 global_options.x_param_values,
2042 global_options_set.x_param_values);
2044 /* Register global variables with the garbage collector. */
2045 arm_add_gc_roots ();
2048 static void
2049 arm_add_gc_roots (void)
2051 gcc_obstack_init(&minipool_obstack);
2052 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2055 /* A table of known ARM exception types.
2056 For use with the interrupt function attribute. */
2058 typedef struct
2060 const char *const arg;
2061 const unsigned long return_value;
2063 isr_attribute_arg;
2065 static const isr_attribute_arg isr_attribute_args [] =
2067 { "IRQ", ARM_FT_ISR },
2068 { "irq", ARM_FT_ISR },
2069 { "FIQ", ARM_FT_FIQ },
2070 { "fiq", ARM_FT_FIQ },
2071 { "ABORT", ARM_FT_ISR },
2072 { "abort", ARM_FT_ISR },
2073 { "ABORT", ARM_FT_ISR },
2074 { "abort", ARM_FT_ISR },
2075 { "UNDEF", ARM_FT_EXCEPTION },
2076 { "undef", ARM_FT_EXCEPTION },
2077 { "SWI", ARM_FT_EXCEPTION },
2078 { "swi", ARM_FT_EXCEPTION },
2079 { NULL, ARM_FT_NORMAL }
2082 /* Returns the (interrupt) function type of the current
2083 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2085 static unsigned long
2086 arm_isr_value (tree argument)
2088 const isr_attribute_arg * ptr;
2089 const char * arg;
2091 if (!arm_arch_notm)
2092 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2094 /* No argument - default to IRQ. */
2095 if (argument == NULL_TREE)
2096 return ARM_FT_ISR;
2098 /* Get the value of the argument. */
2099 if (TREE_VALUE (argument) == NULL_TREE
2100 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2101 return ARM_FT_UNKNOWN;
2103 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2105 /* Check it against the list of known arguments. */
2106 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2107 if (streq (arg, ptr->arg))
2108 return ptr->return_value;
2110 /* An unrecognized interrupt type. */
2111 return ARM_FT_UNKNOWN;
2114 /* Computes the type of the current function. */
2116 static unsigned long
2117 arm_compute_func_type (void)
2119 unsigned long type = ARM_FT_UNKNOWN;
2120 tree a;
2121 tree attr;
2123 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2125 /* Decide if the current function is volatile. Such functions
2126 never return, and many memory cycles can be saved by not storing
2127 register values that will never be needed again. This optimization
2128 was added to speed up context switching in a kernel application. */
2129 if (optimize > 0
2130 && (TREE_NOTHROW (current_function_decl)
2131 || !(flag_unwind_tables
2132 || (flag_exceptions
2133 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2134 && TREE_THIS_VOLATILE (current_function_decl))
2135 type |= ARM_FT_VOLATILE;
2137 if (cfun->static_chain_decl != NULL)
2138 type |= ARM_FT_NESTED;
2140 attr = DECL_ATTRIBUTES (current_function_decl);
2142 a = lookup_attribute ("naked", attr);
2143 if (a != NULL_TREE)
2144 type |= ARM_FT_NAKED;
2146 a = lookup_attribute ("isr", attr);
2147 if (a == NULL_TREE)
2148 a = lookup_attribute ("interrupt", attr);
2150 if (a == NULL_TREE)
2151 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2152 else
2153 type |= arm_isr_value (TREE_VALUE (a));
2155 return type;
2158 /* Returns the type of the current function. */
2160 unsigned long
2161 arm_current_func_type (void)
2163 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2164 cfun->machine->func_type = arm_compute_func_type ();
2166 return cfun->machine->func_type;
2169 bool
2170 arm_allocate_stack_slots_for_args (void)
2172 /* Naked functions should not allocate stack slots for arguments. */
2173 return !IS_NAKED (arm_current_func_type ());
2177 /* Output assembler code for a block containing the constant parts
2178 of a trampoline, leaving space for the variable parts.
2180 On the ARM, (if r8 is the static chain regnum, and remembering that
2181 referencing pc adds an offset of 8) the trampoline looks like:
2182 ldr r8, [pc, #0]
2183 ldr pc, [pc]
2184 .word static chain value
2185 .word function's address
2186 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2188 static void
2189 arm_asm_trampoline_template (FILE *f)
2191 if (TARGET_ARM)
2193 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2194 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2196 else if (TARGET_THUMB2)
2198 /* The Thumb-2 trampoline is similar to the arm implementation.
2199 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2200 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2201 STATIC_CHAIN_REGNUM, PC_REGNUM);
2202 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2204 else
2206 ASM_OUTPUT_ALIGN (f, 2);
2207 fprintf (f, "\t.code\t16\n");
2208 fprintf (f, ".Ltrampoline_start:\n");
2209 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2210 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2211 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2212 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2213 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2214 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2216 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2217 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2220 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2222 static void
2223 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2225 rtx fnaddr, mem, a_tramp;
2227 emit_block_move (m_tramp, assemble_trampoline_template (),
2228 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2230 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2231 emit_move_insn (mem, chain_value);
2233 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2234 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2235 emit_move_insn (mem, fnaddr);
2237 a_tramp = XEXP (m_tramp, 0);
2238 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2239 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2240 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2243 /* Thumb trampolines should be entered in thumb mode, so set
2244 the bottom bit of the address. */
2246 static rtx
2247 arm_trampoline_adjust_address (rtx addr)
2249 if (TARGET_THUMB)
2250 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2251 NULL, 0, OPTAB_LIB_WIDEN);
2252 return addr;
2255 /* Return 1 if it is possible to return using a single instruction.
2256 If SIBLING is non-null, this is a test for a return before a sibling
2257 call. SIBLING is the call insn, so we can examine its register usage. */
2260 use_return_insn (int iscond, rtx sibling)
2262 int regno;
2263 unsigned int func_type;
2264 unsigned long saved_int_regs;
2265 unsigned HOST_WIDE_INT stack_adjust;
2266 arm_stack_offsets *offsets;
2268 /* Never use a return instruction before reload has run. */
2269 if (!reload_completed)
2270 return 0;
2272 func_type = arm_current_func_type ();
2274 /* Naked, volatile and stack alignment functions need special
2275 consideration. */
2276 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2277 return 0;
2279 /* So do interrupt functions that use the frame pointer and Thumb
2280 interrupt functions. */
2281 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2282 return 0;
2284 offsets = arm_get_frame_offsets ();
2285 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2287 /* As do variadic functions. */
2288 if (crtl->args.pretend_args_size
2289 || cfun->machine->uses_anonymous_args
2290 /* Or if the function calls __builtin_eh_return () */
2291 || crtl->calls_eh_return
2292 /* Or if the function calls alloca */
2293 || cfun->calls_alloca
2294 /* Or if there is a stack adjustment. However, if the stack pointer
2295 is saved on the stack, we can use a pre-incrementing stack load. */
2296 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2297 && stack_adjust == 4)))
2298 return 0;
2300 saved_int_regs = offsets->saved_regs_mask;
2302 /* Unfortunately, the insn
2304 ldmib sp, {..., sp, ...}
2306 triggers a bug on most SA-110 based devices, such that the stack
2307 pointer won't be correctly restored if the instruction takes a
2308 page fault. We work around this problem by popping r3 along with
2309 the other registers, since that is never slower than executing
2310 another instruction.
2312 We test for !arm_arch5 here, because code for any architecture
2313 less than this could potentially be run on one of the buggy
2314 chips. */
2315 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2317 /* Validate that r3 is a call-clobbered register (always true in
2318 the default abi) ... */
2319 if (!call_used_regs[3])
2320 return 0;
2322 /* ... that it isn't being used for a return value ... */
2323 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2324 return 0;
2326 /* ... or for a tail-call argument ... */
2327 if (sibling)
2329 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2331 if (find_regno_fusage (sibling, USE, 3))
2332 return 0;
2335 /* ... and that there are no call-saved registers in r0-r2
2336 (always true in the default ABI). */
2337 if (saved_int_regs & 0x7)
2338 return 0;
2341 /* Can't be done if interworking with Thumb, and any registers have been
2342 stacked. */
2343 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2344 return 0;
2346 /* On StrongARM, conditional returns are expensive if they aren't
2347 taken and multiple registers have been stacked. */
2348 if (iscond && arm_tune_strongarm)
2350 /* Conditional return when just the LR is stored is a simple
2351 conditional-load instruction, that's not expensive. */
2352 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2353 return 0;
2355 if (flag_pic
2356 && arm_pic_register != INVALID_REGNUM
2357 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2358 return 0;
2361 /* If there are saved registers but the LR isn't saved, then we need
2362 two instructions for the return. */
2363 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2364 return 0;
2366 /* Can't be done if any of the FPA regs are pushed,
2367 since this also requires an insn. */
2368 if (TARGET_HARD_FLOAT && TARGET_FPA)
2369 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2370 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2371 return 0;
2373 /* Likewise VFP regs. */
2374 if (TARGET_HARD_FLOAT && TARGET_VFP)
2375 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2376 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2377 return 0;
2379 if (TARGET_REALLY_IWMMXT)
2380 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2381 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2382 return 0;
2384 return 1;
2387 /* Return TRUE if int I is a valid immediate ARM constant. */
2390 const_ok_for_arm (HOST_WIDE_INT i)
2392 int lowbit;
2394 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2395 be all zero, or all one. */
2396 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2397 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2398 != ((~(unsigned HOST_WIDE_INT) 0)
2399 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2400 return FALSE;
2402 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2404 /* Fast return for 0 and small values. We must do this for zero, since
2405 the code below can't handle that one case. */
2406 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2407 return TRUE;
2409 /* Get the number of trailing zeros. */
2410 lowbit = ffs((int) i) - 1;
2412 /* Only even shifts are allowed in ARM mode so round down to the
2413 nearest even number. */
2414 if (TARGET_ARM)
2415 lowbit &= ~1;
2417 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2418 return TRUE;
2420 if (TARGET_ARM)
2422 /* Allow rotated constants in ARM mode. */
2423 if (lowbit <= 4
2424 && ((i & ~0xc000003f) == 0
2425 || (i & ~0xf000000f) == 0
2426 || (i & ~0xfc000003) == 0))
2427 return TRUE;
2429 else
2431 HOST_WIDE_INT v;
2433 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2434 v = i & 0xff;
2435 v |= v << 16;
2436 if (i == v || i == (v | (v << 8)))
2437 return TRUE;
2439 /* Allow repeated pattern 0xXY00XY00. */
2440 v = i & 0xff00;
2441 v |= v << 16;
2442 if (i == v)
2443 return TRUE;
2446 return FALSE;
2449 /* Return true if I is a valid constant for the operation CODE. */
2450 static int
2451 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2453 if (const_ok_for_arm (i))
2454 return 1;
2456 switch (code)
2458 case PLUS:
2459 case COMPARE:
2460 case EQ:
2461 case NE:
2462 case GT:
2463 case LE:
2464 case LT:
2465 case GE:
2466 case GEU:
2467 case LTU:
2468 case GTU:
2469 case LEU:
2470 case UNORDERED:
2471 case ORDERED:
2472 case UNEQ:
2473 case UNGE:
2474 case UNLT:
2475 case UNGT:
2476 case UNLE:
2477 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2479 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2480 case XOR:
2481 return 0;
2483 case IOR:
2484 if (TARGET_THUMB2)
2485 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2486 return 0;
2488 case AND:
2489 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2491 default:
2492 gcc_unreachable ();
2496 /* Emit a sequence of insns to handle a large constant.
2497 CODE is the code of the operation required, it can be any of SET, PLUS,
2498 IOR, AND, XOR, MINUS;
2499 MODE is the mode in which the operation is being performed;
2500 VAL is the integer to operate on;
2501 SOURCE is the other operand (a register, or a null-pointer for SET);
2502 SUBTARGETS means it is safe to create scratch registers if that will
2503 either produce a simpler sequence, or we will want to cse the values.
2504 Return value is the number of insns emitted. */
2506 /* ??? Tweak this for thumb2. */
2508 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2509 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2511 rtx cond;
2513 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2514 cond = COND_EXEC_TEST (PATTERN (insn));
2515 else
2516 cond = NULL_RTX;
2518 if (subtargets || code == SET
2519 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2520 && REGNO (target) != REGNO (source)))
2522 /* After arm_reorg has been called, we can't fix up expensive
2523 constants by pushing them into memory so we must synthesize
2524 them in-line, regardless of the cost. This is only likely to
2525 be more costly on chips that have load delay slots and we are
2526 compiling without running the scheduler (so no splitting
2527 occurred before the final instruction emission).
2529 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2531 if (!after_arm_reorg
2532 && !cond
2533 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2534 1, 0)
2535 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2536 + (code != SET))))
2538 if (code == SET)
2540 /* Currently SET is the only monadic value for CODE, all
2541 the rest are diadic. */
2542 if (TARGET_USE_MOVT)
2543 arm_emit_movpair (target, GEN_INT (val));
2544 else
2545 emit_set_insn (target, GEN_INT (val));
2547 return 1;
2549 else
2551 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2553 if (TARGET_USE_MOVT)
2554 arm_emit_movpair (temp, GEN_INT (val));
2555 else
2556 emit_set_insn (temp, GEN_INT (val));
2558 /* For MINUS, the value is subtracted from, since we never
2559 have subtraction of a constant. */
2560 if (code == MINUS)
2561 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2562 else
2563 emit_set_insn (target,
2564 gen_rtx_fmt_ee (code, mode, source, temp));
2565 return 2;
2570 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2574 /* Return the number of instructions required to synthesize the given
2575 constant, if we start emitting them from bit-position I. */
2576 static int
2577 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2579 HOST_WIDE_INT temp1;
2580 int step_size = TARGET_ARM ? 2 : 1;
2581 int num_insns = 0;
2583 gcc_assert (TARGET_ARM || i == 0);
2587 int end;
2589 if (i <= 0)
2590 i += 32;
2591 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2593 end = i - 8;
2594 if (end < 0)
2595 end += 32;
2596 temp1 = remainder & ((0x0ff << end)
2597 | ((i < end) ? (0xff >> (32 - end)) : 0));
2598 remainder &= ~temp1;
2599 num_insns++;
2600 i -= 8 - step_size;
2602 i -= step_size;
2603 } while (remainder);
2604 return num_insns;
2607 static int
2608 find_best_start (unsigned HOST_WIDE_INT remainder)
2610 int best_consecutive_zeros = 0;
2611 int i;
2612 int best_start = 0;
2614 /* If we aren't targetting ARM, the best place to start is always at
2615 the bottom. */
2616 if (! TARGET_ARM)
2617 return 0;
2619 for (i = 0; i < 32; i += 2)
2621 int consecutive_zeros = 0;
2623 if (!(remainder & (3 << i)))
2625 while ((i < 32) && !(remainder & (3 << i)))
2627 consecutive_zeros += 2;
2628 i += 2;
2630 if (consecutive_zeros > best_consecutive_zeros)
2632 best_consecutive_zeros = consecutive_zeros;
2633 best_start = i - consecutive_zeros;
2635 i -= 2;
2639 /* So long as it won't require any more insns to do so, it's
2640 desirable to emit a small constant (in bits 0...9) in the last
2641 insn. This way there is more chance that it can be combined with
2642 a later addressing insn to form a pre-indexed load or store
2643 operation. Consider:
2645 *((volatile int *)0xe0000100) = 1;
2646 *((volatile int *)0xe0000110) = 2;
2648 We want this to wind up as:
2650 mov rA, #0xe0000000
2651 mov rB, #1
2652 str rB, [rA, #0x100]
2653 mov rB, #2
2654 str rB, [rA, #0x110]
2656 rather than having to synthesize both large constants from scratch.
2658 Therefore, we calculate how many insns would be required to emit
2659 the constant starting from `best_start', and also starting from
2660 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2661 yield a shorter sequence, we may as well use zero. */
2662 if (best_start != 0
2663 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2664 && (count_insns_for_constant (remainder, 0) <=
2665 count_insns_for_constant (remainder, best_start)))
2666 best_start = 0;
2668 return best_start;
2671 /* Emit an instruction with the indicated PATTERN. If COND is
2672 non-NULL, conditionalize the execution of the instruction on COND
2673 being true. */
2675 static void
2676 emit_constant_insn (rtx cond, rtx pattern)
2678 if (cond)
2679 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2680 emit_insn (pattern);
2683 /* As above, but extra parameter GENERATE which, if clear, suppresses
2684 RTL generation. */
2685 /* ??? This needs more work for thumb2. */
2687 static int
2688 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2689 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2690 int generate)
2692 int can_invert = 0;
2693 int can_negate = 0;
2694 int final_invert = 0;
2695 int can_negate_initial = 0;
2696 int i;
2697 int num_bits_set = 0;
2698 int set_sign_bit_copies = 0;
2699 int clear_sign_bit_copies = 0;
2700 int clear_zero_bit_copies = 0;
2701 int set_zero_bit_copies = 0;
2702 int insns = 0;
2703 unsigned HOST_WIDE_INT temp1, temp2;
2704 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2705 int step_size = TARGET_ARM ? 2 : 1;
2707 /* Find out which operations are safe for a given CODE. Also do a quick
2708 check for degenerate cases; these can occur when DImode operations
2709 are split. */
2710 switch (code)
2712 case SET:
2713 can_invert = 1;
2714 can_negate = 1;
2715 break;
2717 case PLUS:
2718 can_negate = 1;
2719 can_negate_initial = 1;
2720 break;
2722 case IOR:
2723 if (remainder == 0xffffffff)
2725 if (generate)
2726 emit_constant_insn (cond,
2727 gen_rtx_SET (VOIDmode, target,
2728 GEN_INT (ARM_SIGN_EXTEND (val))));
2729 return 1;
2732 if (remainder == 0)
2734 if (reload_completed && rtx_equal_p (target, source))
2735 return 0;
2737 if (generate)
2738 emit_constant_insn (cond,
2739 gen_rtx_SET (VOIDmode, target, source));
2740 return 1;
2743 if (TARGET_THUMB2)
2744 can_invert = 1;
2745 break;
2747 case AND:
2748 if (remainder == 0)
2750 if (generate)
2751 emit_constant_insn (cond,
2752 gen_rtx_SET (VOIDmode, target, const0_rtx));
2753 return 1;
2755 if (remainder == 0xffffffff)
2757 if (reload_completed && rtx_equal_p (target, source))
2758 return 0;
2759 if (generate)
2760 emit_constant_insn (cond,
2761 gen_rtx_SET (VOIDmode, target, source));
2762 return 1;
2764 can_invert = 1;
2765 break;
2767 case XOR:
2768 if (remainder == 0)
2770 if (reload_completed && rtx_equal_p (target, source))
2771 return 0;
2772 if (generate)
2773 emit_constant_insn (cond,
2774 gen_rtx_SET (VOIDmode, target, source));
2775 return 1;
2778 if (remainder == 0xffffffff)
2780 if (generate)
2781 emit_constant_insn (cond,
2782 gen_rtx_SET (VOIDmode, target,
2783 gen_rtx_NOT (mode, source)));
2784 return 1;
2786 break;
2788 case MINUS:
2789 /* We treat MINUS as (val - source), since (source - val) is always
2790 passed as (source + (-val)). */
2791 if (remainder == 0)
2793 if (generate)
2794 emit_constant_insn (cond,
2795 gen_rtx_SET (VOIDmode, target,
2796 gen_rtx_NEG (mode, source)));
2797 return 1;
2799 if (const_ok_for_arm (val))
2801 if (generate)
2802 emit_constant_insn (cond,
2803 gen_rtx_SET (VOIDmode, target,
2804 gen_rtx_MINUS (mode, GEN_INT (val),
2805 source)));
2806 return 1;
2808 can_negate = 1;
2810 break;
2812 default:
2813 gcc_unreachable ();
2816 /* If we can do it in one insn get out quickly. */
2817 if (const_ok_for_arm (val)
2818 || (can_negate_initial && const_ok_for_arm (-val))
2819 || (can_invert && const_ok_for_arm (~val)))
2821 if (generate)
2822 emit_constant_insn (cond,
2823 gen_rtx_SET (VOIDmode, target,
2824 (source
2825 ? gen_rtx_fmt_ee (code, mode, source,
2826 GEN_INT (val))
2827 : GEN_INT (val))));
2828 return 1;
2831 /* Calculate a few attributes that may be useful for specific
2832 optimizations. */
2833 /* Count number of leading zeros. */
2834 for (i = 31; i >= 0; i--)
2836 if ((remainder & (1 << i)) == 0)
2837 clear_sign_bit_copies++;
2838 else
2839 break;
2842 /* Count number of leading 1's. */
2843 for (i = 31; i >= 0; i--)
2845 if ((remainder & (1 << i)) != 0)
2846 set_sign_bit_copies++;
2847 else
2848 break;
2851 /* Count number of trailing zero's. */
2852 for (i = 0; i <= 31; i++)
2854 if ((remainder & (1 << i)) == 0)
2855 clear_zero_bit_copies++;
2856 else
2857 break;
2860 /* Count number of trailing 1's. */
2861 for (i = 0; i <= 31; i++)
2863 if ((remainder & (1 << i)) != 0)
2864 set_zero_bit_copies++;
2865 else
2866 break;
2869 switch (code)
2871 case SET:
2872 /* See if we can use movw. */
2873 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2875 if (generate)
2876 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2877 GEN_INT (val)));
2878 return 1;
2881 /* See if we can do this by sign_extending a constant that is known
2882 to be negative. This is a good, way of doing it, since the shift
2883 may well merge into a subsequent insn. */
2884 if (set_sign_bit_copies > 1)
2886 if (const_ok_for_arm
2887 (temp1 = ARM_SIGN_EXTEND (remainder
2888 << (set_sign_bit_copies - 1))))
2890 if (generate)
2892 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2893 emit_constant_insn (cond,
2894 gen_rtx_SET (VOIDmode, new_src,
2895 GEN_INT (temp1)));
2896 emit_constant_insn (cond,
2897 gen_ashrsi3 (target, new_src,
2898 GEN_INT (set_sign_bit_copies - 1)));
2900 return 2;
2902 /* For an inverted constant, we will need to set the low bits,
2903 these will be shifted out of harm's way. */
2904 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2905 if (const_ok_for_arm (~temp1))
2907 if (generate)
2909 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2910 emit_constant_insn (cond,
2911 gen_rtx_SET (VOIDmode, new_src,
2912 GEN_INT (temp1)));
2913 emit_constant_insn (cond,
2914 gen_ashrsi3 (target, new_src,
2915 GEN_INT (set_sign_bit_copies - 1)));
2917 return 2;
2921 /* See if we can calculate the value as the difference between two
2922 valid immediates. */
2923 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2925 int topshift = clear_sign_bit_copies & ~1;
2927 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2928 & (0xff000000 >> topshift));
2930 /* If temp1 is zero, then that means the 9 most significant
2931 bits of remainder were 1 and we've caused it to overflow.
2932 When topshift is 0 we don't need to do anything since we
2933 can borrow from 'bit 32'. */
2934 if (temp1 == 0 && topshift != 0)
2935 temp1 = 0x80000000 >> (topshift - 1);
2937 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2939 if (const_ok_for_arm (temp2))
2941 if (generate)
2943 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2944 emit_constant_insn (cond,
2945 gen_rtx_SET (VOIDmode, new_src,
2946 GEN_INT (temp1)));
2947 emit_constant_insn (cond,
2948 gen_addsi3 (target, new_src,
2949 GEN_INT (-temp2)));
2952 return 2;
2956 /* See if we can generate this by setting the bottom (or the top)
2957 16 bits, and then shifting these into the other half of the
2958 word. We only look for the simplest cases, to do more would cost
2959 too much. Be careful, however, not to generate this when the
2960 alternative would take fewer insns. */
2961 if (val & 0xffff0000)
2963 temp1 = remainder & 0xffff0000;
2964 temp2 = remainder & 0x0000ffff;
2966 /* Overlaps outside this range are best done using other methods. */
2967 for (i = 9; i < 24; i++)
2969 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2970 && !const_ok_for_arm (temp2))
2972 rtx new_src = (subtargets
2973 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2974 : target);
2975 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2976 source, subtargets, generate);
2977 source = new_src;
2978 if (generate)
2979 emit_constant_insn
2980 (cond,
2981 gen_rtx_SET
2982 (VOIDmode, target,
2983 gen_rtx_IOR (mode,
2984 gen_rtx_ASHIFT (mode, source,
2985 GEN_INT (i)),
2986 source)));
2987 return insns + 1;
2991 /* Don't duplicate cases already considered. */
2992 for (i = 17; i < 24; i++)
2994 if (((temp1 | (temp1 >> i)) == remainder)
2995 && !const_ok_for_arm (temp1))
2997 rtx new_src = (subtargets
2998 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2999 : target);
3000 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3001 source, subtargets, generate);
3002 source = new_src;
3003 if (generate)
3004 emit_constant_insn
3005 (cond,
3006 gen_rtx_SET (VOIDmode, target,
3007 gen_rtx_IOR
3008 (mode,
3009 gen_rtx_LSHIFTRT (mode, source,
3010 GEN_INT (i)),
3011 source)));
3012 return insns + 1;
3016 break;
3018 case IOR:
3019 case XOR:
3020 /* If we have IOR or XOR, and the constant can be loaded in a
3021 single instruction, and we can find a temporary to put it in,
3022 then this can be done in two instructions instead of 3-4. */
3023 if (subtargets
3024 /* TARGET can't be NULL if SUBTARGETS is 0 */
3025 || (reload_completed && !reg_mentioned_p (target, source)))
3027 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3029 if (generate)
3031 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3033 emit_constant_insn (cond,
3034 gen_rtx_SET (VOIDmode, sub,
3035 GEN_INT (val)));
3036 emit_constant_insn (cond,
3037 gen_rtx_SET (VOIDmode, target,
3038 gen_rtx_fmt_ee (code, mode,
3039 source, sub)));
3041 return 2;
3045 if (code == XOR)
3046 break;
3048 /* Convert.
3049 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3050 and the remainder 0s for e.g. 0xfff00000)
3051 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3053 This can be done in 2 instructions by using shifts with mov or mvn.
3054 e.g. for
3055 x = x | 0xfff00000;
3056 we generate.
3057 mvn r0, r0, asl #12
3058 mvn r0, r0, lsr #12 */
3059 if (set_sign_bit_copies > 8
3060 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3062 if (generate)
3064 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3065 rtx shift = GEN_INT (set_sign_bit_copies);
3067 emit_constant_insn
3068 (cond,
3069 gen_rtx_SET (VOIDmode, sub,
3070 gen_rtx_NOT (mode,
3071 gen_rtx_ASHIFT (mode,
3072 source,
3073 shift))));
3074 emit_constant_insn
3075 (cond,
3076 gen_rtx_SET (VOIDmode, target,
3077 gen_rtx_NOT (mode,
3078 gen_rtx_LSHIFTRT (mode, sub,
3079 shift))));
3081 return 2;
3084 /* Convert
3085 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3087 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3089 For eg. r0 = r0 | 0xfff
3090 mvn r0, r0, lsr #12
3091 mvn r0, r0, asl #12
3094 if (set_zero_bit_copies > 8
3095 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3097 if (generate)
3099 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3100 rtx shift = GEN_INT (set_zero_bit_copies);
3102 emit_constant_insn
3103 (cond,
3104 gen_rtx_SET (VOIDmode, sub,
3105 gen_rtx_NOT (mode,
3106 gen_rtx_LSHIFTRT (mode,
3107 source,
3108 shift))));
3109 emit_constant_insn
3110 (cond,
3111 gen_rtx_SET (VOIDmode, target,
3112 gen_rtx_NOT (mode,
3113 gen_rtx_ASHIFT (mode, sub,
3114 shift))));
3116 return 2;
3119 /* This will never be reached for Thumb2 because orn is a valid
3120 instruction. This is for Thumb1 and the ARM 32 bit cases.
3122 x = y | constant (such that ~constant is a valid constant)
3123 Transform this to
3124 x = ~(~y & ~constant).
3126 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3128 if (generate)
3130 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3131 emit_constant_insn (cond,
3132 gen_rtx_SET (VOIDmode, sub,
3133 gen_rtx_NOT (mode, source)));
3134 source = sub;
3135 if (subtargets)
3136 sub = gen_reg_rtx (mode);
3137 emit_constant_insn (cond,
3138 gen_rtx_SET (VOIDmode, sub,
3139 gen_rtx_AND (mode, source,
3140 GEN_INT (temp1))));
3141 emit_constant_insn (cond,
3142 gen_rtx_SET (VOIDmode, target,
3143 gen_rtx_NOT (mode, sub)));
3145 return 3;
3147 break;
3149 case AND:
3150 /* See if two shifts will do 2 or more insn's worth of work. */
3151 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3153 HOST_WIDE_INT shift_mask = ((0xffffffff
3154 << (32 - clear_sign_bit_copies))
3155 & 0xffffffff);
3157 if ((remainder | shift_mask) != 0xffffffff)
3159 if (generate)
3161 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3162 insns = arm_gen_constant (AND, mode, cond,
3163 remainder | shift_mask,
3164 new_src, source, subtargets, 1);
3165 source = new_src;
3167 else
3169 rtx targ = subtargets ? NULL_RTX : target;
3170 insns = arm_gen_constant (AND, mode, cond,
3171 remainder | shift_mask,
3172 targ, source, subtargets, 0);
3176 if (generate)
3178 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3179 rtx shift = GEN_INT (clear_sign_bit_copies);
3181 emit_insn (gen_ashlsi3 (new_src, source, shift));
3182 emit_insn (gen_lshrsi3 (target, new_src, shift));
3185 return insns + 2;
3188 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3190 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3192 if ((remainder | shift_mask) != 0xffffffff)
3194 if (generate)
3196 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3198 insns = arm_gen_constant (AND, mode, cond,
3199 remainder | shift_mask,
3200 new_src, source, subtargets, 1);
3201 source = new_src;
3203 else
3205 rtx targ = subtargets ? NULL_RTX : target;
3207 insns = arm_gen_constant (AND, mode, cond,
3208 remainder | shift_mask,
3209 targ, source, subtargets, 0);
3213 if (generate)
3215 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3216 rtx shift = GEN_INT (clear_zero_bit_copies);
3218 emit_insn (gen_lshrsi3 (new_src, source, shift));
3219 emit_insn (gen_ashlsi3 (target, new_src, shift));
3222 return insns + 2;
3225 break;
3227 default:
3228 break;
3231 for (i = 0; i < 32; i++)
3232 if (remainder & (1 << i))
3233 num_bits_set++;
3235 if ((code == AND)
3236 || (code != IOR && can_invert && num_bits_set > 16))
3237 remainder ^= 0xffffffff;
3238 else if (code == PLUS && num_bits_set > 16)
3239 remainder = (-remainder) & 0xffffffff;
3241 /* For XOR, if more than half the bits are set and there's a sequence
3242 of more than 8 consecutive ones in the pattern then we can XOR by the
3243 inverted constant and then invert the final result; this may save an
3244 instruction and might also lead to the final mvn being merged with
3245 some other operation. */
3246 else if (code == XOR && num_bits_set > 16
3247 && (count_insns_for_constant (remainder ^ 0xffffffff,
3248 find_best_start
3249 (remainder ^ 0xffffffff))
3250 < count_insns_for_constant (remainder,
3251 find_best_start (remainder))))
3253 remainder ^= 0xffffffff;
3254 final_invert = 1;
3256 else
3258 can_invert = 0;
3259 can_negate = 0;
3262 /* Now try and find a way of doing the job in either two or three
3263 instructions.
3264 We start by looking for the largest block of zeros that are aligned on
3265 a 2-bit boundary, we then fill up the temps, wrapping around to the
3266 top of the word when we drop off the bottom.
3267 In the worst case this code should produce no more than four insns.
3268 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3269 best place to start. */
3271 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3272 the same. */
3274 /* Now start emitting the insns. */
3275 i = find_best_start (remainder);
3278 int end;
3280 if (i <= 0)
3281 i += 32;
3282 if (remainder & (3 << (i - 2)))
3284 end = i - 8;
3285 if (end < 0)
3286 end += 32;
3287 temp1 = remainder & ((0x0ff << end)
3288 | ((i < end) ? (0xff >> (32 - end)) : 0));
3289 remainder &= ~temp1;
3291 if (generate)
3293 rtx new_src, temp1_rtx;
3295 if (code == SET || code == MINUS)
3297 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3298 if (can_invert && code != MINUS)
3299 temp1 = ~temp1;
3301 else
3303 if ((final_invert || remainder) && subtargets)
3304 new_src = gen_reg_rtx (mode);
3305 else
3306 new_src = target;
3307 if (can_invert)
3308 temp1 = ~temp1;
3309 else if (can_negate)
3310 temp1 = -temp1;
3313 temp1 = trunc_int_for_mode (temp1, mode);
3314 temp1_rtx = GEN_INT (temp1);
3316 if (code == SET)
3318 else if (code == MINUS)
3319 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3320 else
3321 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3323 emit_constant_insn (cond,
3324 gen_rtx_SET (VOIDmode, new_src,
3325 temp1_rtx));
3326 source = new_src;
3329 if (code == SET)
3331 can_invert = 0;
3332 code = PLUS;
3334 else if (code == MINUS)
3335 code = PLUS;
3337 insns++;
3338 i -= 8 - step_size;
3340 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3341 shifts. */
3342 i -= step_size;
3344 while (remainder);
3347 if (final_invert)
3349 if (generate)
3350 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3351 gen_rtx_NOT (mode, source)));
3352 insns++;
3355 return insns;
3358 /* Canonicalize a comparison so that we are more likely to recognize it.
3359 This can be done for a few constant compares, where we can make the
3360 immediate value easier to load. */
3362 enum rtx_code
3363 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3365 enum machine_mode mode;
3366 unsigned HOST_WIDE_INT i, maxval;
3368 mode = GET_MODE (*op0);
3369 if (mode == VOIDmode)
3370 mode = GET_MODE (*op1);
3372 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3374 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3375 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3376 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3377 for GTU/LEU in Thumb mode. */
3378 if (mode == DImode)
3380 rtx tem;
3382 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3383 available. */
3384 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3385 return code;
3387 if (code == GT || code == LE
3388 || (!TARGET_ARM && (code == GTU || code == LEU)))
3390 /* Missing comparison. First try to use an available
3391 comparison. */
3392 if (GET_CODE (*op1) == CONST_INT)
3394 i = INTVAL (*op1);
3395 switch (code)
3397 case GT:
3398 case LE:
3399 if (i != maxval
3400 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3402 *op1 = GEN_INT (i + 1);
3403 return code == GT ? GE : LT;
3405 break;
3406 case GTU:
3407 case LEU:
3408 if (i != ~((unsigned HOST_WIDE_INT) 0)
3409 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3411 *op1 = GEN_INT (i + 1);
3412 return code == GTU ? GEU : LTU;
3414 break;
3415 default:
3416 gcc_unreachable ();
3420 /* If that did not work, reverse the condition. */
3421 tem = *op0;
3422 *op0 = *op1;
3423 *op1 = tem;
3424 return swap_condition (code);
3427 return code;
3430 /* Comparisons smaller than DImode. Only adjust comparisons against
3431 an out-of-range constant. */
3432 if (GET_CODE (*op1) != CONST_INT
3433 || const_ok_for_arm (INTVAL (*op1))
3434 || const_ok_for_arm (- INTVAL (*op1)))
3435 return code;
3437 i = INTVAL (*op1);
3439 switch (code)
3441 case EQ:
3442 case NE:
3443 return code;
3445 case GT:
3446 case LE:
3447 if (i != maxval
3448 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3450 *op1 = GEN_INT (i + 1);
3451 return code == GT ? GE : LT;
3453 break;
3455 case GE:
3456 case LT:
3457 if (i != ~maxval
3458 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3460 *op1 = GEN_INT (i - 1);
3461 return code == GE ? GT : LE;
3463 break;
3465 case GTU:
3466 case LEU:
3467 if (i != ~((unsigned HOST_WIDE_INT) 0)
3468 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3470 *op1 = GEN_INT (i + 1);
3471 return code == GTU ? GEU : LTU;
3473 break;
3475 case GEU:
3476 case LTU:
3477 if (i != 0
3478 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3480 *op1 = GEN_INT (i - 1);
3481 return code == GEU ? GTU : LEU;
3483 break;
3485 default:
3486 gcc_unreachable ();
3489 return code;
3493 /* Define how to find the value returned by a function. */
3495 static rtx
3496 arm_function_value(const_tree type, const_tree func,
3497 bool outgoing ATTRIBUTE_UNUSED)
3499 enum machine_mode mode;
3500 int unsignedp ATTRIBUTE_UNUSED;
3501 rtx r ATTRIBUTE_UNUSED;
3503 mode = TYPE_MODE (type);
3505 if (TARGET_AAPCS_BASED)
3506 return aapcs_allocate_return_reg (mode, type, func);
3508 /* Promote integer types. */
3509 if (INTEGRAL_TYPE_P (type))
3510 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3512 /* Promotes small structs returned in a register to full-word size
3513 for big-endian AAPCS. */
3514 if (arm_return_in_msb (type))
3516 HOST_WIDE_INT size = int_size_in_bytes (type);
3517 if (size % UNITS_PER_WORD != 0)
3519 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3520 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3524 return LIBCALL_VALUE (mode);
3527 static int
3528 libcall_eq (const void *p1, const void *p2)
3530 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3533 static hashval_t
3534 libcall_hash (const void *p1)
3536 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3539 static void
3540 add_libcall (htab_t htab, rtx libcall)
3542 *htab_find_slot (htab, libcall, INSERT) = libcall;
3545 static bool
3546 arm_libcall_uses_aapcs_base (const_rtx libcall)
3548 static bool init_done = false;
3549 static htab_t libcall_htab;
3551 if (!init_done)
3553 init_done = true;
3555 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3556 NULL);
3557 add_libcall (libcall_htab,
3558 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3559 add_libcall (libcall_htab,
3560 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3561 add_libcall (libcall_htab,
3562 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3563 add_libcall (libcall_htab,
3564 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3566 add_libcall (libcall_htab,
3567 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3568 add_libcall (libcall_htab,
3569 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3570 add_libcall (libcall_htab,
3571 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3572 add_libcall (libcall_htab,
3573 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3575 add_libcall (libcall_htab,
3576 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3577 add_libcall (libcall_htab,
3578 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3579 add_libcall (libcall_htab,
3580 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3581 add_libcall (libcall_htab,
3582 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3583 add_libcall (libcall_htab,
3584 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3585 add_libcall (libcall_htab,
3586 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3589 return libcall && htab_find (libcall_htab, libcall) != NULL;
3593 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3595 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3596 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3598 /* The following libcalls return their result in integer registers,
3599 even though they return a floating point value. */
3600 if (arm_libcall_uses_aapcs_base (libcall))
3601 return gen_rtx_REG (mode, ARG_REGISTER(1));
3605 return LIBCALL_VALUE (mode);
3608 /* Determine the amount of memory needed to store the possible return
3609 registers of an untyped call. */
3611 arm_apply_result_size (void)
3613 int size = 16;
3615 if (TARGET_32BIT)
3617 if (TARGET_HARD_FLOAT_ABI)
3619 if (TARGET_VFP)
3620 size += 32;
3621 if (TARGET_FPA)
3622 size += 12;
3623 if (TARGET_MAVERICK)
3624 size += 8;
3626 if (TARGET_IWMMXT_ABI)
3627 size += 8;
3630 return size;
3633 /* Decide whether TYPE should be returned in memory (true)
3634 or in a register (false). FNTYPE is the type of the function making
3635 the call. */
3636 static bool
3637 arm_return_in_memory (const_tree type, const_tree fntype)
3639 HOST_WIDE_INT size;
3641 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3643 if (TARGET_AAPCS_BASED)
3645 /* Simple, non-aggregate types (ie not including vectors and
3646 complex) are always returned in a register (or registers).
3647 We don't care about which register here, so we can short-cut
3648 some of the detail. */
3649 if (!AGGREGATE_TYPE_P (type)
3650 && TREE_CODE (type) != VECTOR_TYPE
3651 && TREE_CODE (type) != COMPLEX_TYPE)
3652 return false;
3654 /* Any return value that is no larger than one word can be
3655 returned in r0. */
3656 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3657 return false;
3659 /* Check any available co-processors to see if they accept the
3660 type as a register candidate (VFP, for example, can return
3661 some aggregates in consecutive registers). These aren't
3662 available if the call is variadic. */
3663 if (aapcs_select_return_coproc (type, fntype) >= 0)
3664 return false;
3666 /* Vector values should be returned using ARM registers, not
3667 memory (unless they're over 16 bytes, which will break since
3668 we only have four call-clobbered registers to play with). */
3669 if (TREE_CODE (type) == VECTOR_TYPE)
3670 return (size < 0 || size > (4 * UNITS_PER_WORD));
3672 /* The rest go in memory. */
3673 return true;
3676 if (TREE_CODE (type) == VECTOR_TYPE)
3677 return (size < 0 || size > (4 * UNITS_PER_WORD));
3679 if (!AGGREGATE_TYPE_P (type) &&
3680 (TREE_CODE (type) != VECTOR_TYPE))
3681 /* All simple types are returned in registers. */
3682 return false;
3684 if (arm_abi != ARM_ABI_APCS)
3686 /* ATPCS and later return aggregate types in memory only if they are
3687 larger than a word (or are variable size). */
3688 return (size < 0 || size > UNITS_PER_WORD);
3691 /* For the arm-wince targets we choose to be compatible with Microsoft's
3692 ARM and Thumb compilers, which always return aggregates in memory. */
3693 #ifndef ARM_WINCE
3694 /* All structures/unions bigger than one word are returned in memory.
3695 Also catch the case where int_size_in_bytes returns -1. In this case
3696 the aggregate is either huge or of variable size, and in either case
3697 we will want to return it via memory and not in a register. */
3698 if (size < 0 || size > UNITS_PER_WORD)
3699 return true;
3701 if (TREE_CODE (type) == RECORD_TYPE)
3703 tree field;
3705 /* For a struct the APCS says that we only return in a register
3706 if the type is 'integer like' and every addressable element
3707 has an offset of zero. For practical purposes this means
3708 that the structure can have at most one non bit-field element
3709 and that this element must be the first one in the structure. */
3711 /* Find the first field, ignoring non FIELD_DECL things which will
3712 have been created by C++. */
3713 for (field = TYPE_FIELDS (type);
3714 field && TREE_CODE (field) != FIELD_DECL;
3715 field = DECL_CHAIN (field))
3716 continue;
3718 if (field == NULL)
3719 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3721 /* Check that the first field is valid for returning in a register. */
3723 /* ... Floats are not allowed */
3724 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3725 return true;
3727 /* ... Aggregates that are not themselves valid for returning in
3728 a register are not allowed. */
3729 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3730 return true;
3732 /* Now check the remaining fields, if any. Only bitfields are allowed,
3733 since they are not addressable. */
3734 for (field = DECL_CHAIN (field);
3735 field;
3736 field = DECL_CHAIN (field))
3738 if (TREE_CODE (field) != FIELD_DECL)
3739 continue;
3741 if (!DECL_BIT_FIELD_TYPE (field))
3742 return true;
3745 return false;
3748 if (TREE_CODE (type) == UNION_TYPE)
3750 tree field;
3752 /* Unions can be returned in registers if every element is
3753 integral, or can be returned in an integer register. */
3754 for (field = TYPE_FIELDS (type);
3755 field;
3756 field = DECL_CHAIN (field))
3758 if (TREE_CODE (field) != FIELD_DECL)
3759 continue;
3761 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3762 return true;
3764 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3765 return true;
3768 return false;
3770 #endif /* not ARM_WINCE */
3772 /* Return all other types in memory. */
3773 return true;
3776 /* Indicate whether or not words of a double are in big-endian order. */
3779 arm_float_words_big_endian (void)
3781 if (TARGET_MAVERICK)
3782 return 0;
3784 /* For FPA, float words are always big-endian. For VFP, floats words
3785 follow the memory system mode. */
3787 if (TARGET_FPA)
3789 return 1;
3792 if (TARGET_VFP)
3793 return (TARGET_BIG_END ? 1 : 0);
3795 return 1;
3798 const struct pcs_attribute_arg
3800 const char *arg;
3801 enum arm_pcs value;
3802 } pcs_attribute_args[] =
3804 {"aapcs", ARM_PCS_AAPCS},
3805 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3806 #if 0
3807 /* We could recognize these, but changes would be needed elsewhere
3808 * to implement them. */
3809 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3810 {"atpcs", ARM_PCS_ATPCS},
3811 {"apcs", ARM_PCS_APCS},
3812 #endif
3813 {NULL, ARM_PCS_UNKNOWN}
3816 static enum arm_pcs
3817 arm_pcs_from_attribute (tree attr)
3819 const struct pcs_attribute_arg *ptr;
3820 const char *arg;
3822 /* Get the value of the argument. */
3823 if (TREE_VALUE (attr) == NULL_TREE
3824 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3825 return ARM_PCS_UNKNOWN;
3827 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3829 /* Check it against the list of known arguments. */
3830 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3831 if (streq (arg, ptr->arg))
3832 return ptr->value;
3834 /* An unrecognized interrupt type. */
3835 return ARM_PCS_UNKNOWN;
3838 /* Get the PCS variant to use for this call. TYPE is the function's type
3839 specification, DECL is the specific declartion. DECL may be null if
3840 the call could be indirect or if this is a library call. */
3841 static enum arm_pcs
3842 arm_get_pcs_model (const_tree type, const_tree decl)
3844 bool user_convention = false;
3845 enum arm_pcs user_pcs = arm_pcs_default;
3846 tree attr;
3848 gcc_assert (type);
3850 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3851 if (attr)
3853 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3854 user_convention = true;
3857 if (TARGET_AAPCS_BASED)
3859 /* Detect varargs functions. These always use the base rules
3860 (no argument is ever a candidate for a co-processor
3861 register). */
3862 bool base_rules = stdarg_p (type);
3864 if (user_convention)
3866 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3867 sorry ("non-AAPCS derived PCS variant");
3868 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3869 error ("variadic functions must use the base AAPCS variant");
3872 if (base_rules)
3873 return ARM_PCS_AAPCS;
3874 else if (user_convention)
3875 return user_pcs;
3876 else if (decl && flag_unit_at_a_time)
3878 /* Local functions never leak outside this compilation unit,
3879 so we are free to use whatever conventions are
3880 appropriate. */
3881 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3882 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3883 if (i && i->local)
3884 return ARM_PCS_AAPCS_LOCAL;
3887 else if (user_convention && user_pcs != arm_pcs_default)
3888 sorry ("PCS variant");
3890 /* For everything else we use the target's default. */
3891 return arm_pcs_default;
3895 static void
3896 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3897 const_tree fntype ATTRIBUTE_UNUSED,
3898 rtx libcall ATTRIBUTE_UNUSED,
3899 const_tree fndecl ATTRIBUTE_UNUSED)
3901 /* Record the unallocated VFP registers. */
3902 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3903 pcum->aapcs_vfp_reg_alloc = 0;
3906 /* Walk down the type tree of TYPE counting consecutive base elements.
3907 If *MODEP is VOIDmode, then set it to the first valid floating point
3908 type. If a non-floating point type is found, or if a floating point
3909 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3910 otherwise return the count in the sub-tree. */
3911 static int
3912 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3914 enum machine_mode mode;
3915 HOST_WIDE_INT size;
3917 switch (TREE_CODE (type))
3919 case REAL_TYPE:
3920 mode = TYPE_MODE (type);
3921 if (mode != DFmode && mode != SFmode)
3922 return -1;
3924 if (*modep == VOIDmode)
3925 *modep = mode;
3927 if (*modep == mode)
3928 return 1;
3930 break;
3932 case COMPLEX_TYPE:
3933 mode = TYPE_MODE (TREE_TYPE (type));
3934 if (mode != DFmode && mode != SFmode)
3935 return -1;
3937 if (*modep == VOIDmode)
3938 *modep = mode;
3940 if (*modep == mode)
3941 return 2;
3943 break;
3945 case VECTOR_TYPE:
3946 /* Use V2SImode and V4SImode as representatives of all 64-bit
3947 and 128-bit vector types, whether or not those modes are
3948 supported with the present options. */
3949 size = int_size_in_bytes (type);
3950 switch (size)
3952 case 8:
3953 mode = V2SImode;
3954 break;
3955 case 16:
3956 mode = V4SImode;
3957 break;
3958 default:
3959 return -1;
3962 if (*modep == VOIDmode)
3963 *modep = mode;
3965 /* Vector modes are considered to be opaque: two vectors are
3966 equivalent for the purposes of being homogeneous aggregates
3967 if they are the same size. */
3968 if (*modep == mode)
3969 return 1;
3971 break;
3973 case ARRAY_TYPE:
3975 int count;
3976 tree index = TYPE_DOMAIN (type);
3978 /* Can't handle incomplete types. */
3979 if (!COMPLETE_TYPE_P(type))
3980 return -1;
3982 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3983 if (count == -1
3984 || !index
3985 || !TYPE_MAX_VALUE (index)
3986 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3987 || !TYPE_MIN_VALUE (index)
3988 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3989 || count < 0)
3990 return -1;
3992 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3993 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3995 /* There must be no padding. */
3996 if (!host_integerp (TYPE_SIZE (type), 1)
3997 || (tree_low_cst (TYPE_SIZE (type), 1)
3998 != count * GET_MODE_BITSIZE (*modep)))
3999 return -1;
4001 return count;
4004 case RECORD_TYPE:
4006 int count = 0;
4007 int sub_count;
4008 tree field;
4010 /* Can't handle incomplete types. */
4011 if (!COMPLETE_TYPE_P(type))
4012 return -1;
4014 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4016 if (TREE_CODE (field) != FIELD_DECL)
4017 continue;
4019 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4020 if (sub_count < 0)
4021 return -1;
4022 count += sub_count;
4025 /* There must be no padding. */
4026 if (!host_integerp (TYPE_SIZE (type), 1)
4027 || (tree_low_cst (TYPE_SIZE (type), 1)
4028 != count * GET_MODE_BITSIZE (*modep)))
4029 return -1;
4031 return count;
4034 case UNION_TYPE:
4035 case QUAL_UNION_TYPE:
4037 /* These aren't very interesting except in a degenerate case. */
4038 int count = 0;
4039 int sub_count;
4040 tree field;
4042 /* Can't handle incomplete types. */
4043 if (!COMPLETE_TYPE_P(type))
4044 return -1;
4046 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4048 if (TREE_CODE (field) != FIELD_DECL)
4049 continue;
4051 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4052 if (sub_count < 0)
4053 return -1;
4054 count = count > sub_count ? count : sub_count;
4057 /* There must be no padding. */
4058 if (!host_integerp (TYPE_SIZE (type), 1)
4059 || (tree_low_cst (TYPE_SIZE (type), 1)
4060 != count * GET_MODE_BITSIZE (*modep)))
4061 return -1;
4063 return count;
4066 default:
4067 break;
4070 return -1;
4073 /* Return true if PCS_VARIANT should use VFP registers. */
4074 static bool
4075 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4077 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4079 static bool seen_thumb1_vfp = false;
4081 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4083 sorry ("Thumb-1 hard-float VFP ABI");
4084 /* sorry() is not immediately fatal, so only display this once. */
4085 seen_thumb1_vfp = true;
4088 return true;
4091 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4092 return false;
4094 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4095 (TARGET_VFP_DOUBLE || !is_double));
4098 static bool
4099 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4100 enum machine_mode mode, const_tree type,
4101 enum machine_mode *base_mode, int *count)
4103 enum machine_mode new_mode = VOIDmode;
4105 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4106 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4107 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4109 *count = 1;
4110 new_mode = mode;
4112 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4114 *count = 2;
4115 new_mode = (mode == DCmode ? DFmode : SFmode);
4117 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4119 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4121 if (ag_count > 0 && ag_count <= 4)
4122 *count = ag_count;
4123 else
4124 return false;
4126 else
4127 return false;
4130 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4131 return false;
4133 *base_mode = new_mode;
4134 return true;
4137 static bool
4138 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4139 enum machine_mode mode, const_tree type)
4141 int count ATTRIBUTE_UNUSED;
4142 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4144 if (!use_vfp_abi (pcs_variant, false))
4145 return false;
4146 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4147 &ag_mode, &count);
4150 static bool
4151 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4152 const_tree type)
4154 if (!use_vfp_abi (pcum->pcs_variant, false))
4155 return false;
4157 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4158 &pcum->aapcs_vfp_rmode,
4159 &pcum->aapcs_vfp_rcount);
4162 static bool
4163 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4164 const_tree type ATTRIBUTE_UNUSED)
4166 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4167 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4168 int regno;
4170 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4171 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4173 pcum->aapcs_vfp_reg_alloc = mask << regno;
4174 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4176 int i;
4177 int rcount = pcum->aapcs_vfp_rcount;
4178 int rshift = shift;
4179 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4180 rtx par;
4181 if (!TARGET_NEON)
4183 /* Avoid using unsupported vector modes. */
4184 if (rmode == V2SImode)
4185 rmode = DImode;
4186 else if (rmode == V4SImode)
4188 rmode = DImode;
4189 rcount *= 2;
4190 rshift /= 2;
4193 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4194 for (i = 0; i < rcount; i++)
4196 rtx tmp = gen_rtx_REG (rmode,
4197 FIRST_VFP_REGNUM + regno + i * rshift);
4198 tmp = gen_rtx_EXPR_LIST
4199 (VOIDmode, tmp,
4200 GEN_INT (i * GET_MODE_SIZE (rmode)));
4201 XVECEXP (par, 0, i) = tmp;
4204 pcum->aapcs_reg = par;
4206 else
4207 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4208 return true;
4210 return false;
4213 static rtx
4214 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4215 enum machine_mode mode,
4216 const_tree type ATTRIBUTE_UNUSED)
4218 if (!use_vfp_abi (pcs_variant, false))
4219 return false;
4221 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4223 int count;
4224 enum machine_mode ag_mode;
4225 int i;
4226 rtx par;
4227 int shift;
4229 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4230 &ag_mode, &count);
4232 if (!TARGET_NEON)
4234 if (ag_mode == V2SImode)
4235 ag_mode = DImode;
4236 else if (ag_mode == V4SImode)
4238 ag_mode = DImode;
4239 count *= 2;
4242 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4243 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4244 for (i = 0; i < count; i++)
4246 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4247 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4248 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4249 XVECEXP (par, 0, i) = tmp;
4252 return par;
4255 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4258 static void
4259 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4260 enum machine_mode mode ATTRIBUTE_UNUSED,
4261 const_tree type ATTRIBUTE_UNUSED)
4263 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4264 pcum->aapcs_vfp_reg_alloc = 0;
4265 return;
4268 #define AAPCS_CP(X) \
4270 aapcs_ ## X ## _cum_init, \
4271 aapcs_ ## X ## _is_call_candidate, \
4272 aapcs_ ## X ## _allocate, \
4273 aapcs_ ## X ## _is_return_candidate, \
4274 aapcs_ ## X ## _allocate_return_reg, \
4275 aapcs_ ## X ## _advance \
4278 /* Table of co-processors that can be used to pass arguments in
4279 registers. Idealy no arugment should be a candidate for more than
4280 one co-processor table entry, but the table is processed in order
4281 and stops after the first match. If that entry then fails to put
4282 the argument into a co-processor register, the argument will go on
4283 the stack. */
4284 static struct
4286 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4287 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4289 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4290 BLKmode) is a candidate for this co-processor's registers; this
4291 function should ignore any position-dependent state in
4292 CUMULATIVE_ARGS and only use call-type dependent information. */
4293 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4295 /* Return true if the argument does get a co-processor register; it
4296 should set aapcs_reg to an RTX of the register allocated as is
4297 required for a return from FUNCTION_ARG. */
4298 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4300 /* Return true if a result of mode MODE (or type TYPE if MODE is
4301 BLKmode) is can be returned in this co-processor's registers. */
4302 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4304 /* Allocate and return an RTX element to hold the return type of a
4305 call, this routine must not fail and will only be called if
4306 is_return_candidate returned true with the same parameters. */
4307 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4309 /* Finish processing this argument and prepare to start processing
4310 the next one. */
4311 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4312 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4314 AAPCS_CP(vfp)
4317 #undef AAPCS_CP
4319 static int
4320 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4321 const_tree type)
4323 int i;
4325 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4326 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4327 return i;
4329 return -1;
4332 static int
4333 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4335 /* We aren't passed a decl, so we can't check that a call is local.
4336 However, it isn't clear that that would be a win anyway, since it
4337 might limit some tail-calling opportunities. */
4338 enum arm_pcs pcs_variant;
4340 if (fntype)
4342 const_tree fndecl = NULL_TREE;
4344 if (TREE_CODE (fntype) == FUNCTION_DECL)
4346 fndecl = fntype;
4347 fntype = TREE_TYPE (fntype);
4350 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4352 else
4353 pcs_variant = arm_pcs_default;
4355 if (pcs_variant != ARM_PCS_AAPCS)
4357 int i;
4359 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4360 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4361 TYPE_MODE (type),
4362 type))
4363 return i;
4365 return -1;
4368 static rtx
4369 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4370 const_tree fntype)
4372 /* We aren't passed a decl, so we can't check that a call is local.
4373 However, it isn't clear that that would be a win anyway, since it
4374 might limit some tail-calling opportunities. */
4375 enum arm_pcs pcs_variant;
4376 int unsignedp ATTRIBUTE_UNUSED;
4378 if (fntype)
4380 const_tree fndecl = NULL_TREE;
4382 if (TREE_CODE (fntype) == FUNCTION_DECL)
4384 fndecl = fntype;
4385 fntype = TREE_TYPE (fntype);
4388 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4390 else
4391 pcs_variant = arm_pcs_default;
4393 /* Promote integer types. */
4394 if (type && INTEGRAL_TYPE_P (type))
4395 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4397 if (pcs_variant != ARM_PCS_AAPCS)
4399 int i;
4401 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4402 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4403 type))
4404 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4405 mode, type);
4408 /* Promotes small structs returned in a register to full-word size
4409 for big-endian AAPCS. */
4410 if (type && arm_return_in_msb (type))
4412 HOST_WIDE_INT size = int_size_in_bytes (type);
4413 if (size % UNITS_PER_WORD != 0)
4415 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4416 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4420 return gen_rtx_REG (mode, R0_REGNUM);
4424 aapcs_libcall_value (enum machine_mode mode)
4426 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4429 /* Lay out a function argument using the AAPCS rules. The rule
4430 numbers referred to here are those in the AAPCS. */
4431 static void
4432 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4433 const_tree type, bool named)
4435 int nregs, nregs2;
4436 int ncrn;
4438 /* We only need to do this once per argument. */
4439 if (pcum->aapcs_arg_processed)
4440 return;
4442 pcum->aapcs_arg_processed = true;
4444 /* Special case: if named is false then we are handling an incoming
4445 anonymous argument which is on the stack. */
4446 if (!named)
4447 return;
4449 /* Is this a potential co-processor register candidate? */
4450 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4452 int slot = aapcs_select_call_coproc (pcum, mode, type);
4453 pcum->aapcs_cprc_slot = slot;
4455 /* We don't have to apply any of the rules from part B of the
4456 preparation phase, these are handled elsewhere in the
4457 compiler. */
4459 if (slot >= 0)
4461 /* A Co-processor register candidate goes either in its own
4462 class of registers or on the stack. */
4463 if (!pcum->aapcs_cprc_failed[slot])
4465 /* C1.cp - Try to allocate the argument to co-processor
4466 registers. */
4467 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4468 return;
4470 /* C2.cp - Put the argument on the stack and note that we
4471 can't assign any more candidates in this slot. We also
4472 need to note that we have allocated stack space, so that
4473 we won't later try to split a non-cprc candidate between
4474 core registers and the stack. */
4475 pcum->aapcs_cprc_failed[slot] = true;
4476 pcum->can_split = false;
4479 /* We didn't get a register, so this argument goes on the
4480 stack. */
4481 gcc_assert (pcum->can_split == false);
4482 return;
4486 /* C3 - For double-word aligned arguments, round the NCRN up to the
4487 next even number. */
4488 ncrn = pcum->aapcs_ncrn;
4489 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4490 ncrn++;
4492 nregs = ARM_NUM_REGS2(mode, type);
4494 /* Sigh, this test should really assert that nregs > 0, but a GCC
4495 extension allows empty structs and then gives them empty size; it
4496 then allows such a structure to be passed by value. For some of
4497 the code below we have to pretend that such an argument has
4498 non-zero size so that we 'locate' it correctly either in
4499 registers or on the stack. */
4500 gcc_assert (nregs >= 0);
4502 nregs2 = nregs ? nregs : 1;
4504 /* C4 - Argument fits entirely in core registers. */
4505 if (ncrn + nregs2 <= NUM_ARG_REGS)
4507 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4508 pcum->aapcs_next_ncrn = ncrn + nregs;
4509 return;
4512 /* C5 - Some core registers left and there are no arguments already
4513 on the stack: split this argument between the remaining core
4514 registers and the stack. */
4515 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4517 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4518 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4519 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4520 return;
4523 /* C6 - NCRN is set to 4. */
4524 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4526 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4527 return;
4530 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4531 for a call to a function whose data type is FNTYPE.
4532 For a library call, FNTYPE is NULL. */
4533 void
4534 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4535 rtx libname,
4536 tree fndecl ATTRIBUTE_UNUSED)
4538 /* Long call handling. */
4539 if (fntype)
4540 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4541 else
4542 pcum->pcs_variant = arm_pcs_default;
4544 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4546 if (arm_libcall_uses_aapcs_base (libname))
4547 pcum->pcs_variant = ARM_PCS_AAPCS;
4549 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4550 pcum->aapcs_reg = NULL_RTX;
4551 pcum->aapcs_partial = 0;
4552 pcum->aapcs_arg_processed = false;
4553 pcum->aapcs_cprc_slot = -1;
4554 pcum->can_split = true;
4556 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4558 int i;
4560 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4562 pcum->aapcs_cprc_failed[i] = false;
4563 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4566 return;
4569 /* Legacy ABIs */
4571 /* On the ARM, the offset starts at 0. */
4572 pcum->nregs = 0;
4573 pcum->iwmmxt_nregs = 0;
4574 pcum->can_split = true;
4576 /* Varargs vectors are treated the same as long long.
4577 named_count avoids having to change the way arm handles 'named' */
4578 pcum->named_count = 0;
4579 pcum->nargs = 0;
4581 if (TARGET_REALLY_IWMMXT && fntype)
4583 tree fn_arg;
4585 for (fn_arg = TYPE_ARG_TYPES (fntype);
4586 fn_arg;
4587 fn_arg = TREE_CHAIN (fn_arg))
4588 pcum->named_count += 1;
4590 if (! pcum->named_count)
4591 pcum->named_count = INT_MAX;
4596 /* Return true if mode/type need doubleword alignment. */
4597 static bool
4598 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4600 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4601 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4605 /* Determine where to put an argument to a function.
4606 Value is zero to push the argument on the stack,
4607 or a hard register in which to store the argument.
4609 MODE is the argument's machine mode.
4610 TYPE is the data type of the argument (as a tree).
4611 This is null for libcalls where that information may
4612 not be available.
4613 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4614 the preceding args and about the function being called.
4615 NAMED is nonzero if this argument is a named parameter
4616 (otherwise it is an extra parameter matching an ellipsis).
4618 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4619 other arguments are passed on the stack. If (NAMED == 0) (which happens
4620 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4621 defined), say it is passed in the stack (function_prologue will
4622 indeed make it pass in the stack if necessary). */
4624 static rtx
4625 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4626 const_tree type, bool named)
4628 int nregs;
4630 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4631 a call insn (op3 of a call_value insn). */
4632 if (mode == VOIDmode)
4633 return const0_rtx;
4635 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4637 aapcs_layout_arg (pcum, mode, type, named);
4638 return pcum->aapcs_reg;
4641 /* Varargs vectors are treated the same as long long.
4642 named_count avoids having to change the way arm handles 'named' */
4643 if (TARGET_IWMMXT_ABI
4644 && arm_vector_mode_supported_p (mode)
4645 && pcum->named_count > pcum->nargs + 1)
4647 if (pcum->iwmmxt_nregs <= 9)
4648 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4649 else
4651 pcum->can_split = false;
4652 return NULL_RTX;
4656 /* Put doubleword aligned quantities in even register pairs. */
4657 if (pcum->nregs & 1
4658 && ARM_DOUBLEWORD_ALIGN
4659 && arm_needs_doubleword_align (mode, type))
4660 pcum->nregs++;
4662 /* Only allow splitting an arg between regs and memory if all preceding
4663 args were allocated to regs. For args passed by reference we only count
4664 the reference pointer. */
4665 if (pcum->can_split)
4666 nregs = 1;
4667 else
4668 nregs = ARM_NUM_REGS2 (mode, type);
4670 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4671 return NULL_RTX;
4673 return gen_rtx_REG (mode, pcum->nregs);
4676 static unsigned int
4677 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4679 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4680 ? DOUBLEWORD_ALIGNMENT
4681 : PARM_BOUNDARY);
4684 static int
4685 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4686 tree type, bool named)
4688 int nregs = pcum->nregs;
4690 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4692 aapcs_layout_arg (pcum, mode, type, named);
4693 return pcum->aapcs_partial;
4696 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4697 return 0;
4699 if (NUM_ARG_REGS > nregs
4700 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4701 && pcum->can_split)
4702 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4704 return 0;
4707 /* Update the data in PCUM to advance over an argument
4708 of mode MODE and data type TYPE.
4709 (TYPE is null for libcalls where that information may not be available.) */
4711 static void
4712 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4713 const_tree type, bool named)
4715 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4717 aapcs_layout_arg (pcum, mode, type, named);
4719 if (pcum->aapcs_cprc_slot >= 0)
4721 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4722 type);
4723 pcum->aapcs_cprc_slot = -1;
4726 /* Generic stuff. */
4727 pcum->aapcs_arg_processed = false;
4728 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4729 pcum->aapcs_reg = NULL_RTX;
4730 pcum->aapcs_partial = 0;
4732 else
4734 pcum->nargs += 1;
4735 if (arm_vector_mode_supported_p (mode)
4736 && pcum->named_count > pcum->nargs
4737 && TARGET_IWMMXT_ABI)
4738 pcum->iwmmxt_nregs += 1;
4739 else
4740 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4744 /* Variable sized types are passed by reference. This is a GCC
4745 extension to the ARM ABI. */
4747 static bool
4748 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4749 enum machine_mode mode ATTRIBUTE_UNUSED,
4750 const_tree type, bool named ATTRIBUTE_UNUSED)
4752 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4755 /* Encode the current state of the #pragma [no_]long_calls. */
4756 typedef enum
4758 OFF, /* No #pragma [no_]long_calls is in effect. */
4759 LONG, /* #pragma long_calls is in effect. */
4760 SHORT /* #pragma no_long_calls is in effect. */
4761 } arm_pragma_enum;
4763 static arm_pragma_enum arm_pragma_long_calls = OFF;
4765 void
4766 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4768 arm_pragma_long_calls = LONG;
4771 void
4772 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4774 arm_pragma_long_calls = SHORT;
4777 void
4778 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4780 arm_pragma_long_calls = OFF;
4783 /* Handle an attribute requiring a FUNCTION_DECL;
4784 arguments as in struct attribute_spec.handler. */
4785 static tree
4786 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4787 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4789 if (TREE_CODE (*node) != FUNCTION_DECL)
4791 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4792 name);
4793 *no_add_attrs = true;
4796 return NULL_TREE;
4799 /* Handle an "interrupt" or "isr" attribute;
4800 arguments as in struct attribute_spec.handler. */
4801 static tree
4802 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4803 bool *no_add_attrs)
4805 if (DECL_P (*node))
4807 if (TREE_CODE (*node) != FUNCTION_DECL)
4809 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4810 name);
4811 *no_add_attrs = true;
4813 /* FIXME: the argument if any is checked for type attributes;
4814 should it be checked for decl ones? */
4816 else
4818 if (TREE_CODE (*node) == FUNCTION_TYPE
4819 || TREE_CODE (*node) == METHOD_TYPE)
4821 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4823 warning (OPT_Wattributes, "%qE attribute ignored",
4824 name);
4825 *no_add_attrs = true;
4828 else if (TREE_CODE (*node) == POINTER_TYPE
4829 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4830 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4831 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4833 *node = build_variant_type_copy (*node);
4834 TREE_TYPE (*node) = build_type_attribute_variant
4835 (TREE_TYPE (*node),
4836 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4837 *no_add_attrs = true;
4839 else
4841 /* Possibly pass this attribute on from the type to a decl. */
4842 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4843 | (int) ATTR_FLAG_FUNCTION_NEXT
4844 | (int) ATTR_FLAG_ARRAY_NEXT))
4846 *no_add_attrs = true;
4847 return tree_cons (name, args, NULL_TREE);
4849 else
4851 warning (OPT_Wattributes, "%qE attribute ignored",
4852 name);
4857 return NULL_TREE;
4860 /* Handle a "pcs" attribute; arguments as in struct
4861 attribute_spec.handler. */
4862 static tree
4863 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4864 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4866 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4868 warning (OPT_Wattributes, "%qE attribute ignored", name);
4869 *no_add_attrs = true;
4871 return NULL_TREE;
4874 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4875 /* Handle the "notshared" attribute. This attribute is another way of
4876 requesting hidden visibility. ARM's compiler supports
4877 "__declspec(notshared)"; we support the same thing via an
4878 attribute. */
4880 static tree
4881 arm_handle_notshared_attribute (tree *node,
4882 tree name ATTRIBUTE_UNUSED,
4883 tree args ATTRIBUTE_UNUSED,
4884 int flags ATTRIBUTE_UNUSED,
4885 bool *no_add_attrs)
4887 tree decl = TYPE_NAME (*node);
4889 if (decl)
4891 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4892 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4893 *no_add_attrs = false;
4895 return NULL_TREE;
4897 #endif
4899 /* Return 0 if the attributes for two types are incompatible, 1 if they
4900 are compatible, and 2 if they are nearly compatible (which causes a
4901 warning to be generated). */
4902 static int
4903 arm_comp_type_attributes (const_tree type1, const_tree type2)
4905 int l1, l2, s1, s2;
4907 /* Check for mismatch of non-default calling convention. */
4908 if (TREE_CODE (type1) != FUNCTION_TYPE)
4909 return 1;
4911 /* Check for mismatched call attributes. */
4912 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4913 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4914 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4915 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4917 /* Only bother to check if an attribute is defined. */
4918 if (l1 | l2 | s1 | s2)
4920 /* If one type has an attribute, the other must have the same attribute. */
4921 if ((l1 != l2) || (s1 != s2))
4922 return 0;
4924 /* Disallow mixed attributes. */
4925 if ((l1 & s2) || (l2 & s1))
4926 return 0;
4929 /* Check for mismatched ISR attribute. */
4930 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4931 if (! l1)
4932 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4933 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4934 if (! l2)
4935 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4936 if (l1 != l2)
4937 return 0;
4939 return 1;
4942 /* Assigns default attributes to newly defined type. This is used to
4943 set short_call/long_call attributes for function types of
4944 functions defined inside corresponding #pragma scopes. */
4945 static void
4946 arm_set_default_type_attributes (tree type)
4948 /* Add __attribute__ ((long_call)) to all functions, when
4949 inside #pragma long_calls or __attribute__ ((short_call)),
4950 when inside #pragma no_long_calls. */
4951 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4953 tree type_attr_list, attr_name;
4954 type_attr_list = TYPE_ATTRIBUTES (type);
4956 if (arm_pragma_long_calls == LONG)
4957 attr_name = get_identifier ("long_call");
4958 else if (arm_pragma_long_calls == SHORT)
4959 attr_name = get_identifier ("short_call");
4960 else
4961 return;
4963 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4964 TYPE_ATTRIBUTES (type) = type_attr_list;
4968 /* Return true if DECL is known to be linked into section SECTION. */
4970 static bool
4971 arm_function_in_section_p (tree decl, section *section)
4973 /* We can only be certain about functions defined in the same
4974 compilation unit. */
4975 if (!TREE_STATIC (decl))
4976 return false;
4978 /* Make sure that SYMBOL always binds to the definition in this
4979 compilation unit. */
4980 if (!targetm.binds_local_p (decl))
4981 return false;
4983 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4984 if (!DECL_SECTION_NAME (decl))
4986 /* Make sure that we will not create a unique section for DECL. */
4987 if (flag_function_sections || DECL_ONE_ONLY (decl))
4988 return false;
4991 return function_section (decl) == section;
4994 /* Return nonzero if a 32-bit "long_call" should be generated for
4995 a call from the current function to DECL. We generate a long_call
4996 if the function:
4998 a. has an __attribute__((long call))
4999 or b. is within the scope of a #pragma long_calls
5000 or c. the -mlong-calls command line switch has been specified
5002 However we do not generate a long call if the function:
5004 d. has an __attribute__ ((short_call))
5005 or e. is inside the scope of a #pragma no_long_calls
5006 or f. is defined in the same section as the current function. */
5008 bool
5009 arm_is_long_call_p (tree decl)
5011 tree attrs;
5013 if (!decl)
5014 return TARGET_LONG_CALLS;
5016 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5017 if (lookup_attribute ("short_call", attrs))
5018 return false;
5020 /* For "f", be conservative, and only cater for cases in which the
5021 whole of the current function is placed in the same section. */
5022 if (!flag_reorder_blocks_and_partition
5023 && TREE_CODE (decl) == FUNCTION_DECL
5024 && arm_function_in_section_p (decl, current_function_section ()))
5025 return false;
5027 if (lookup_attribute ("long_call", attrs))
5028 return true;
5030 return TARGET_LONG_CALLS;
5033 /* Return nonzero if it is ok to make a tail-call to DECL. */
5034 static bool
5035 arm_function_ok_for_sibcall (tree decl, tree exp)
5037 unsigned long func_type;
5039 if (cfun->machine->sibcall_blocked)
5040 return false;
5042 /* Never tailcall something for which we have no decl, or if we
5043 are generating code for Thumb-1. */
5044 if (decl == NULL || TARGET_THUMB1)
5045 return false;
5047 /* The PIC register is live on entry to VxWorks PLT entries, so we
5048 must make the call before restoring the PIC register. */
5049 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5050 return false;
5052 /* Cannot tail-call to long calls, since these are out of range of
5053 a branch instruction. */
5054 if (arm_is_long_call_p (decl))
5055 return false;
5057 /* If we are interworking and the function is not declared static
5058 then we can't tail-call it unless we know that it exists in this
5059 compilation unit (since it might be a Thumb routine). */
5060 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5061 return false;
5063 func_type = arm_current_func_type ();
5064 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5065 if (IS_INTERRUPT (func_type))
5066 return false;
5068 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5070 /* Check that the return value locations are the same. For
5071 example that we aren't returning a value from the sibling in
5072 a VFP register but then need to transfer it to a core
5073 register. */
5074 rtx a, b;
5076 a = arm_function_value (TREE_TYPE (exp), decl, false);
5077 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5078 cfun->decl, false);
5079 if (!rtx_equal_p (a, b))
5080 return false;
5083 /* Never tailcall if function may be called with a misaligned SP. */
5084 if (IS_STACKALIGN (func_type))
5085 return false;
5087 /* Everything else is ok. */
5088 return true;
5092 /* Addressing mode support functions. */
5094 /* Return nonzero if X is a legitimate immediate operand when compiling
5095 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5097 legitimate_pic_operand_p (rtx x)
5099 if (GET_CODE (x) == SYMBOL_REF
5100 || (GET_CODE (x) == CONST
5101 && GET_CODE (XEXP (x, 0)) == PLUS
5102 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5103 return 0;
5105 return 1;
5108 /* Record that the current function needs a PIC register. Initialize
5109 cfun->machine->pic_reg if we have not already done so. */
5111 static void
5112 require_pic_register (void)
5114 /* A lot of the logic here is made obscure by the fact that this
5115 routine gets called as part of the rtx cost estimation process.
5116 We don't want those calls to affect any assumptions about the real
5117 function; and further, we can't call entry_of_function() until we
5118 start the real expansion process. */
5119 if (!crtl->uses_pic_offset_table)
5121 gcc_assert (can_create_pseudo_p ());
5122 if (arm_pic_register != INVALID_REGNUM)
5124 if (!cfun->machine->pic_reg)
5125 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5127 /* Play games to avoid marking the function as needing pic
5128 if we are being called as part of the cost-estimation
5129 process. */
5130 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5131 crtl->uses_pic_offset_table = 1;
5133 else
5135 rtx seq, insn;
5137 if (!cfun->machine->pic_reg)
5138 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5140 /* Play games to avoid marking the function as needing pic
5141 if we are being called as part of the cost-estimation
5142 process. */
5143 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5145 crtl->uses_pic_offset_table = 1;
5146 start_sequence ();
5148 arm_load_pic_register (0UL);
5150 seq = get_insns ();
5151 end_sequence ();
5153 for (insn = seq; insn; insn = NEXT_INSN (insn))
5154 if (INSN_P (insn))
5155 INSN_LOCATOR (insn) = prologue_locator;
5157 /* We can be called during expansion of PHI nodes, where
5158 we can't yet emit instructions directly in the final
5159 insn stream. Queue the insns on the entry edge, they will
5160 be committed after everything else is expanded. */
5161 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5168 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5170 if (GET_CODE (orig) == SYMBOL_REF
5171 || GET_CODE (orig) == LABEL_REF)
5173 rtx insn;
5175 if (reg == 0)
5177 gcc_assert (can_create_pseudo_p ());
5178 reg = gen_reg_rtx (Pmode);
5181 /* VxWorks does not impose a fixed gap between segments; the run-time
5182 gap can be different from the object-file gap. We therefore can't
5183 use GOTOFF unless we are absolutely sure that the symbol is in the
5184 same segment as the GOT. Unfortunately, the flexibility of linker
5185 scripts means that we can't be sure of that in general, so assume
5186 that GOTOFF is never valid on VxWorks. */
5187 if ((GET_CODE (orig) == LABEL_REF
5188 || (GET_CODE (orig) == SYMBOL_REF &&
5189 SYMBOL_REF_LOCAL_P (orig)))
5190 && NEED_GOT_RELOC
5191 && !TARGET_VXWORKS_RTP)
5192 insn = arm_pic_static_addr (orig, reg);
5193 else
5195 rtx pat;
5196 rtx mem;
5198 /* If this function doesn't have a pic register, create one now. */
5199 require_pic_register ();
5201 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5203 /* Make the MEM as close to a constant as possible. */
5204 mem = SET_SRC (pat);
5205 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5206 MEM_READONLY_P (mem) = 1;
5207 MEM_NOTRAP_P (mem) = 1;
5209 insn = emit_insn (pat);
5212 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5213 by loop. */
5214 set_unique_reg_note (insn, REG_EQUAL, orig);
5216 return reg;
5218 else if (GET_CODE (orig) == CONST)
5220 rtx base, offset;
5222 if (GET_CODE (XEXP (orig, 0)) == PLUS
5223 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5224 return orig;
5226 /* Handle the case where we have: const (UNSPEC_TLS). */
5227 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5228 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5229 return orig;
5231 /* Handle the case where we have:
5232 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5233 CONST_INT. */
5234 if (GET_CODE (XEXP (orig, 0)) == PLUS
5235 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5236 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5238 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5239 return orig;
5242 if (reg == 0)
5244 gcc_assert (can_create_pseudo_p ());
5245 reg = gen_reg_rtx (Pmode);
5248 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5250 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5251 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5252 base == reg ? 0 : reg);
5254 if (GET_CODE (offset) == CONST_INT)
5256 /* The base register doesn't really matter, we only want to
5257 test the index for the appropriate mode. */
5258 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5260 gcc_assert (can_create_pseudo_p ());
5261 offset = force_reg (Pmode, offset);
5264 if (GET_CODE (offset) == CONST_INT)
5265 return plus_constant (base, INTVAL (offset));
5268 if (GET_MODE_SIZE (mode) > 4
5269 && (GET_MODE_CLASS (mode) == MODE_INT
5270 || TARGET_SOFT_FLOAT))
5272 emit_insn (gen_addsi3 (reg, base, offset));
5273 return reg;
5276 return gen_rtx_PLUS (Pmode, base, offset);
5279 return orig;
5283 /* Find a spare register to use during the prolog of a function. */
5285 static int
5286 thumb_find_work_register (unsigned long pushed_regs_mask)
5288 int reg;
5290 /* Check the argument registers first as these are call-used. The
5291 register allocation order means that sometimes r3 might be used
5292 but earlier argument registers might not, so check them all. */
5293 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5294 if (!df_regs_ever_live_p (reg))
5295 return reg;
5297 /* Before going on to check the call-saved registers we can try a couple
5298 more ways of deducing that r3 is available. The first is when we are
5299 pushing anonymous arguments onto the stack and we have less than 4
5300 registers worth of fixed arguments(*). In this case r3 will be part of
5301 the variable argument list and so we can be sure that it will be
5302 pushed right at the start of the function. Hence it will be available
5303 for the rest of the prologue.
5304 (*): ie crtl->args.pretend_args_size is greater than 0. */
5305 if (cfun->machine->uses_anonymous_args
5306 && crtl->args.pretend_args_size > 0)
5307 return LAST_ARG_REGNUM;
5309 /* The other case is when we have fixed arguments but less than 4 registers
5310 worth. In this case r3 might be used in the body of the function, but
5311 it is not being used to convey an argument into the function. In theory
5312 we could just check crtl->args.size to see how many bytes are
5313 being passed in argument registers, but it seems that it is unreliable.
5314 Sometimes it will have the value 0 when in fact arguments are being
5315 passed. (See testcase execute/20021111-1.c for an example). So we also
5316 check the args_info.nregs field as well. The problem with this field is
5317 that it makes no allowances for arguments that are passed to the
5318 function but which are not used. Hence we could miss an opportunity
5319 when a function has an unused argument in r3. But it is better to be
5320 safe than to be sorry. */
5321 if (! cfun->machine->uses_anonymous_args
5322 && crtl->args.size >= 0
5323 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5324 && crtl->args.info.nregs < 4)
5325 return LAST_ARG_REGNUM;
5327 /* Otherwise look for a call-saved register that is going to be pushed. */
5328 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5329 if (pushed_regs_mask & (1 << reg))
5330 return reg;
5332 if (TARGET_THUMB2)
5334 /* Thumb-2 can use high regs. */
5335 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5336 if (pushed_regs_mask & (1 << reg))
5337 return reg;
5339 /* Something went wrong - thumb_compute_save_reg_mask()
5340 should have arranged for a suitable register to be pushed. */
5341 gcc_unreachable ();
5344 static GTY(()) int pic_labelno;
5346 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5347 low register. */
5349 void
5350 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5352 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5354 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5355 return;
5357 gcc_assert (flag_pic);
5359 pic_reg = cfun->machine->pic_reg;
5360 if (TARGET_VXWORKS_RTP)
5362 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5363 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5364 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5366 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5368 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5369 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5371 else
5373 /* We use an UNSPEC rather than a LABEL_REF because this label
5374 never appears in the code stream. */
5376 labelno = GEN_INT (pic_labelno++);
5377 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5378 l1 = gen_rtx_CONST (VOIDmode, l1);
5380 /* On the ARM the PC register contains 'dot + 8' at the time of the
5381 addition, on the Thumb it is 'dot + 4'. */
5382 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5383 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5384 UNSPEC_GOTSYM_OFF);
5385 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5387 if (TARGET_32BIT)
5389 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5390 if (TARGET_ARM)
5391 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5392 else
5393 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5395 else /* TARGET_THUMB1 */
5397 if (arm_pic_register != INVALID_REGNUM
5398 && REGNO (pic_reg) > LAST_LO_REGNUM)
5400 /* We will have pushed the pic register, so we should always be
5401 able to find a work register. */
5402 pic_tmp = gen_rtx_REG (SImode,
5403 thumb_find_work_register (saved_regs));
5404 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5405 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5407 else
5408 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5409 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5413 /* Need to emit this whether or not we obey regdecls,
5414 since setjmp/longjmp can cause life info to screw up. */
5415 emit_use (pic_reg);
5418 /* Generate code to load the address of a static var when flag_pic is set. */
5419 static rtx
5420 arm_pic_static_addr (rtx orig, rtx reg)
5422 rtx l1, labelno, offset_rtx, insn;
5424 gcc_assert (flag_pic);
5426 /* We use an UNSPEC rather than a LABEL_REF because this label
5427 never appears in the code stream. */
5428 labelno = GEN_INT (pic_labelno++);
5429 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5430 l1 = gen_rtx_CONST (VOIDmode, l1);
5432 /* On the ARM the PC register contains 'dot + 8' at the time of the
5433 addition, on the Thumb it is 'dot + 4'. */
5434 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5435 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5436 UNSPEC_SYMBOL_OFFSET);
5437 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5439 if (TARGET_32BIT)
5441 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5442 if (TARGET_ARM)
5443 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5444 else
5445 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5447 else /* TARGET_THUMB1 */
5449 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5450 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5453 return insn;
5456 /* Return nonzero if X is valid as an ARM state addressing register. */
5457 static int
5458 arm_address_register_rtx_p (rtx x, int strict_p)
5460 int regno;
5462 if (GET_CODE (x) != REG)
5463 return 0;
5465 regno = REGNO (x);
5467 if (strict_p)
5468 return ARM_REGNO_OK_FOR_BASE_P (regno);
5470 return (regno <= LAST_ARM_REGNUM
5471 || regno >= FIRST_PSEUDO_REGISTER
5472 || regno == FRAME_POINTER_REGNUM
5473 || regno == ARG_POINTER_REGNUM);
5476 /* Return TRUE if this rtx is the difference of a symbol and a label,
5477 and will reduce to a PC-relative relocation in the object file.
5478 Expressions like this can be left alone when generating PIC, rather
5479 than forced through the GOT. */
5480 static int
5481 pcrel_constant_p (rtx x)
5483 if (GET_CODE (x) == MINUS)
5484 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5486 return FALSE;
5489 /* Return true if X will surely end up in an index register after next
5490 splitting pass. */
5491 static bool
5492 will_be_in_index_register (const_rtx x)
5494 /* arm.md: calculate_pic_address will split this into a register. */
5495 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5498 /* Return nonzero if X is a valid ARM state address operand. */
5500 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5501 int strict_p)
5503 bool use_ldrd;
5504 enum rtx_code code = GET_CODE (x);
5506 if (arm_address_register_rtx_p (x, strict_p))
5507 return 1;
5509 use_ldrd = (TARGET_LDRD
5510 && (mode == DImode
5511 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5513 if (code == POST_INC || code == PRE_DEC
5514 || ((code == PRE_INC || code == POST_DEC)
5515 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5516 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5518 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5519 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5520 && GET_CODE (XEXP (x, 1)) == PLUS
5521 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5523 rtx addend = XEXP (XEXP (x, 1), 1);
5525 /* Don't allow ldrd post increment by register because it's hard
5526 to fixup invalid register choices. */
5527 if (use_ldrd
5528 && GET_CODE (x) == POST_MODIFY
5529 && GET_CODE (addend) == REG)
5530 return 0;
5532 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5533 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5536 /* After reload constants split into minipools will have addresses
5537 from a LABEL_REF. */
5538 else if (reload_completed
5539 && (code == LABEL_REF
5540 || (code == CONST
5541 && GET_CODE (XEXP (x, 0)) == PLUS
5542 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5543 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5544 return 1;
5546 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5547 return 0;
5549 else if (code == PLUS)
5551 rtx xop0 = XEXP (x, 0);
5552 rtx xop1 = XEXP (x, 1);
5554 return ((arm_address_register_rtx_p (xop0, strict_p)
5555 && ((GET_CODE(xop1) == CONST_INT
5556 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5557 || (!strict_p && will_be_in_index_register (xop1))))
5558 || (arm_address_register_rtx_p (xop1, strict_p)
5559 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5562 #if 0
5563 /* Reload currently can't handle MINUS, so disable this for now */
5564 else if (GET_CODE (x) == MINUS)
5566 rtx xop0 = XEXP (x, 0);
5567 rtx xop1 = XEXP (x, 1);
5569 return (arm_address_register_rtx_p (xop0, strict_p)
5570 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5572 #endif
5574 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5575 && code == SYMBOL_REF
5576 && CONSTANT_POOL_ADDRESS_P (x)
5577 && ! (flag_pic
5578 && symbol_mentioned_p (get_pool_constant (x))
5579 && ! pcrel_constant_p (get_pool_constant (x))))
5580 return 1;
5582 return 0;
5585 /* Return nonzero if X is a valid Thumb-2 address operand. */
5586 static int
5587 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5589 bool use_ldrd;
5590 enum rtx_code code = GET_CODE (x);
5592 if (arm_address_register_rtx_p (x, strict_p))
5593 return 1;
5595 use_ldrd = (TARGET_LDRD
5596 && (mode == DImode
5597 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5599 if (code == POST_INC || code == PRE_DEC
5600 || ((code == PRE_INC || code == POST_DEC)
5601 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5602 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5604 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5605 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5606 && GET_CODE (XEXP (x, 1)) == PLUS
5607 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5609 /* Thumb-2 only has autoincrement by constant. */
5610 rtx addend = XEXP (XEXP (x, 1), 1);
5611 HOST_WIDE_INT offset;
5613 if (GET_CODE (addend) != CONST_INT)
5614 return 0;
5616 offset = INTVAL(addend);
5617 if (GET_MODE_SIZE (mode) <= 4)
5618 return (offset > -256 && offset < 256);
5620 return (use_ldrd && offset > -1024 && offset < 1024
5621 && (offset & 3) == 0);
5624 /* After reload constants split into minipools will have addresses
5625 from a LABEL_REF. */
5626 else if (reload_completed
5627 && (code == LABEL_REF
5628 || (code == CONST
5629 && GET_CODE (XEXP (x, 0)) == PLUS
5630 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5631 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5632 return 1;
5634 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5635 return 0;
5637 else if (code == PLUS)
5639 rtx xop0 = XEXP (x, 0);
5640 rtx xop1 = XEXP (x, 1);
5642 return ((arm_address_register_rtx_p (xop0, strict_p)
5643 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5644 || (!strict_p && will_be_in_index_register (xop1))))
5645 || (arm_address_register_rtx_p (xop1, strict_p)
5646 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5649 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5650 && code == SYMBOL_REF
5651 && CONSTANT_POOL_ADDRESS_P (x)
5652 && ! (flag_pic
5653 && symbol_mentioned_p (get_pool_constant (x))
5654 && ! pcrel_constant_p (get_pool_constant (x))))
5655 return 1;
5657 return 0;
5660 /* Return nonzero if INDEX is valid for an address index operand in
5661 ARM state. */
5662 static int
5663 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5664 int strict_p)
5666 HOST_WIDE_INT range;
5667 enum rtx_code code = GET_CODE (index);
5669 /* Standard coprocessor addressing modes. */
5670 if (TARGET_HARD_FLOAT
5671 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5672 && (mode == SFmode || mode == DFmode
5673 || (TARGET_MAVERICK && mode == DImode)))
5674 return (code == CONST_INT && INTVAL (index) < 1024
5675 && INTVAL (index) > -1024
5676 && (INTVAL (index) & 3) == 0);
5678 /* For quad modes, we restrict the constant offset to be slightly less
5679 than what the instruction format permits. We do this because for
5680 quad mode moves, we will actually decompose them into two separate
5681 double-mode reads or writes. INDEX must therefore be a valid
5682 (double-mode) offset and so should INDEX+8. */
5683 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5684 return (code == CONST_INT
5685 && INTVAL (index) < 1016
5686 && INTVAL (index) > -1024
5687 && (INTVAL (index) & 3) == 0);
5689 /* We have no such constraint on double mode offsets, so we permit the
5690 full range of the instruction format. */
5691 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5692 return (code == CONST_INT
5693 && INTVAL (index) < 1024
5694 && INTVAL (index) > -1024
5695 && (INTVAL (index) & 3) == 0);
5697 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5698 return (code == CONST_INT
5699 && INTVAL (index) < 1024
5700 && INTVAL (index) > -1024
5701 && (INTVAL (index) & 3) == 0);
5703 if (arm_address_register_rtx_p (index, strict_p)
5704 && (GET_MODE_SIZE (mode) <= 4))
5705 return 1;
5707 if (mode == DImode || mode == DFmode)
5709 if (code == CONST_INT)
5711 HOST_WIDE_INT val = INTVAL (index);
5713 if (TARGET_LDRD)
5714 return val > -256 && val < 256;
5715 else
5716 return val > -4096 && val < 4092;
5719 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5722 if (GET_MODE_SIZE (mode) <= 4
5723 && ! (arm_arch4
5724 && (mode == HImode
5725 || mode == HFmode
5726 || (mode == QImode && outer == SIGN_EXTEND))))
5728 if (code == MULT)
5730 rtx xiop0 = XEXP (index, 0);
5731 rtx xiop1 = XEXP (index, 1);
5733 return ((arm_address_register_rtx_p (xiop0, strict_p)
5734 && power_of_two_operand (xiop1, SImode))
5735 || (arm_address_register_rtx_p (xiop1, strict_p)
5736 && power_of_two_operand (xiop0, SImode)));
5738 else if (code == LSHIFTRT || code == ASHIFTRT
5739 || code == ASHIFT || code == ROTATERT)
5741 rtx op = XEXP (index, 1);
5743 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5744 && GET_CODE (op) == CONST_INT
5745 && INTVAL (op) > 0
5746 && INTVAL (op) <= 31);
5750 /* For ARM v4 we may be doing a sign-extend operation during the
5751 load. */
5752 if (arm_arch4)
5754 if (mode == HImode
5755 || mode == HFmode
5756 || (outer == SIGN_EXTEND && mode == QImode))
5757 range = 256;
5758 else
5759 range = 4096;
5761 else
5762 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5764 return (code == CONST_INT
5765 && INTVAL (index) < range
5766 && INTVAL (index) > -range);
5769 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5770 index operand. i.e. 1, 2, 4 or 8. */
5771 static bool
5772 thumb2_index_mul_operand (rtx op)
5774 HOST_WIDE_INT val;
5776 if (GET_CODE(op) != CONST_INT)
5777 return false;
5779 val = INTVAL(op);
5780 return (val == 1 || val == 2 || val == 4 || val == 8);
5783 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5784 static int
5785 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5787 enum rtx_code code = GET_CODE (index);
5789 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5790 /* Standard coprocessor addressing modes. */
5791 if (TARGET_HARD_FLOAT
5792 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5793 && (mode == SFmode || mode == DFmode
5794 || (TARGET_MAVERICK && mode == DImode)))
5795 return (code == CONST_INT && INTVAL (index) < 1024
5796 /* Thumb-2 allows only > -256 index range for it's core register
5797 load/stores. Since we allow SF/DF in core registers, we have
5798 to use the intersection between -256~4096 (core) and -1024~1024
5799 (coprocessor). */
5800 && INTVAL (index) > -256
5801 && (INTVAL (index) & 3) == 0);
5803 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5805 /* For DImode assume values will usually live in core regs
5806 and only allow LDRD addressing modes. */
5807 if (!TARGET_LDRD || mode != DImode)
5808 return (code == CONST_INT
5809 && INTVAL (index) < 1024
5810 && INTVAL (index) > -1024
5811 && (INTVAL (index) & 3) == 0);
5814 /* For quad modes, we restrict the constant offset to be slightly less
5815 than what the instruction format permits. We do this because for
5816 quad mode moves, we will actually decompose them into two separate
5817 double-mode reads or writes. INDEX must therefore be a valid
5818 (double-mode) offset and so should INDEX+8. */
5819 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5820 return (code == CONST_INT
5821 && INTVAL (index) < 1016
5822 && INTVAL (index) > -1024
5823 && (INTVAL (index) & 3) == 0);
5825 /* We have no such constraint on double mode offsets, so we permit the
5826 full range of the instruction format. */
5827 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5828 return (code == CONST_INT
5829 && INTVAL (index) < 1024
5830 && INTVAL (index) > -1024
5831 && (INTVAL (index) & 3) == 0);
5833 if (arm_address_register_rtx_p (index, strict_p)
5834 && (GET_MODE_SIZE (mode) <= 4))
5835 return 1;
5837 if (mode == DImode || mode == DFmode)
5839 if (code == CONST_INT)
5841 HOST_WIDE_INT val = INTVAL (index);
5842 /* ??? Can we assume ldrd for thumb2? */
5843 /* Thumb-2 ldrd only has reg+const addressing modes. */
5844 /* ldrd supports offsets of +-1020.
5845 However the ldr fallback does not. */
5846 return val > -256 && val < 256 && (val & 3) == 0;
5848 else
5849 return 0;
5852 if (code == MULT)
5854 rtx xiop0 = XEXP (index, 0);
5855 rtx xiop1 = XEXP (index, 1);
5857 return ((arm_address_register_rtx_p (xiop0, strict_p)
5858 && thumb2_index_mul_operand (xiop1))
5859 || (arm_address_register_rtx_p (xiop1, strict_p)
5860 && thumb2_index_mul_operand (xiop0)));
5862 else if (code == ASHIFT)
5864 rtx op = XEXP (index, 1);
5866 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5867 && GET_CODE (op) == CONST_INT
5868 && INTVAL (op) > 0
5869 && INTVAL (op) <= 3);
5872 return (code == CONST_INT
5873 && INTVAL (index) < 4096
5874 && INTVAL (index) > -256);
5877 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5878 static int
5879 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5881 int regno;
5883 if (GET_CODE (x) != REG)
5884 return 0;
5886 regno = REGNO (x);
5888 if (strict_p)
5889 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5891 return (regno <= LAST_LO_REGNUM
5892 || regno > LAST_VIRTUAL_REGISTER
5893 || regno == FRAME_POINTER_REGNUM
5894 || (GET_MODE_SIZE (mode) >= 4
5895 && (regno == STACK_POINTER_REGNUM
5896 || regno >= FIRST_PSEUDO_REGISTER
5897 || x == hard_frame_pointer_rtx
5898 || x == arg_pointer_rtx)));
5901 /* Return nonzero if x is a legitimate index register. This is the case
5902 for any base register that can access a QImode object. */
5903 inline static int
5904 thumb1_index_register_rtx_p (rtx x, int strict_p)
5906 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5909 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5911 The AP may be eliminated to either the SP or the FP, so we use the
5912 least common denominator, e.g. SImode, and offsets from 0 to 64.
5914 ??? Verify whether the above is the right approach.
5916 ??? Also, the FP may be eliminated to the SP, so perhaps that
5917 needs special handling also.
5919 ??? Look at how the mips16 port solves this problem. It probably uses
5920 better ways to solve some of these problems.
5922 Although it is not incorrect, we don't accept QImode and HImode
5923 addresses based on the frame pointer or arg pointer until the
5924 reload pass starts. This is so that eliminating such addresses
5925 into stack based ones won't produce impossible code. */
5926 static int
5927 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5929 /* ??? Not clear if this is right. Experiment. */
5930 if (GET_MODE_SIZE (mode) < 4
5931 && !(reload_in_progress || reload_completed)
5932 && (reg_mentioned_p (frame_pointer_rtx, x)
5933 || reg_mentioned_p (arg_pointer_rtx, x)
5934 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5935 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5936 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5937 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5938 return 0;
5940 /* Accept any base register. SP only in SImode or larger. */
5941 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5942 return 1;
5944 /* This is PC relative data before arm_reorg runs. */
5945 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5946 && GET_CODE (x) == SYMBOL_REF
5947 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5948 return 1;
5950 /* This is PC relative data after arm_reorg runs. */
5951 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5952 && reload_completed
5953 && (GET_CODE (x) == LABEL_REF
5954 || (GET_CODE (x) == CONST
5955 && GET_CODE (XEXP (x, 0)) == PLUS
5956 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5957 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5958 return 1;
5960 /* Post-inc indexing only supported for SImode and larger. */
5961 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5962 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5963 return 1;
5965 else if (GET_CODE (x) == PLUS)
5967 /* REG+REG address can be any two index registers. */
5968 /* We disallow FRAME+REG addressing since we know that FRAME
5969 will be replaced with STACK, and SP relative addressing only
5970 permits SP+OFFSET. */
5971 if (GET_MODE_SIZE (mode) <= 4
5972 && XEXP (x, 0) != frame_pointer_rtx
5973 && XEXP (x, 1) != frame_pointer_rtx
5974 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5975 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5976 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5977 return 1;
5979 /* REG+const has 5-7 bit offset for non-SP registers. */
5980 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5981 || XEXP (x, 0) == arg_pointer_rtx)
5982 && GET_CODE (XEXP (x, 1)) == CONST_INT
5983 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5984 return 1;
5986 /* REG+const has 10-bit offset for SP, but only SImode and
5987 larger is supported. */
5988 /* ??? Should probably check for DI/DFmode overflow here
5989 just like GO_IF_LEGITIMATE_OFFSET does. */
5990 else if (GET_CODE (XEXP (x, 0)) == REG
5991 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5992 && GET_MODE_SIZE (mode) >= 4
5993 && GET_CODE (XEXP (x, 1)) == CONST_INT
5994 && INTVAL (XEXP (x, 1)) >= 0
5995 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5996 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5997 return 1;
5999 else if (GET_CODE (XEXP (x, 0)) == REG
6000 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6001 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6002 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6003 && REGNO (XEXP (x, 0))
6004 <= LAST_VIRTUAL_POINTER_REGISTER))
6005 && GET_MODE_SIZE (mode) >= 4
6006 && GET_CODE (XEXP (x, 1)) == CONST_INT
6007 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6008 return 1;
6011 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6012 && GET_MODE_SIZE (mode) == 4
6013 && GET_CODE (x) == SYMBOL_REF
6014 && CONSTANT_POOL_ADDRESS_P (x)
6015 && ! (flag_pic
6016 && symbol_mentioned_p (get_pool_constant (x))
6017 && ! pcrel_constant_p (get_pool_constant (x))))
6018 return 1;
6020 return 0;
6023 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6024 instruction of mode MODE. */
6026 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6028 switch (GET_MODE_SIZE (mode))
6030 case 1:
6031 return val >= 0 && val < 32;
6033 case 2:
6034 return val >= 0 && val < 64 && (val & 1) == 0;
6036 default:
6037 return (val >= 0
6038 && (val + GET_MODE_SIZE (mode)) <= 128
6039 && (val & 3) == 0);
6043 bool
6044 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6046 if (TARGET_ARM)
6047 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6048 else if (TARGET_THUMB2)
6049 return thumb2_legitimate_address_p (mode, x, strict_p);
6050 else /* if (TARGET_THUMB1) */
6051 return thumb1_legitimate_address_p (mode, x, strict_p);
6054 /* Build the SYMBOL_REF for __tls_get_addr. */
6056 static GTY(()) rtx tls_get_addr_libfunc;
6058 static rtx
6059 get_tls_get_addr (void)
6061 if (!tls_get_addr_libfunc)
6062 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6063 return tls_get_addr_libfunc;
6066 static rtx
6067 arm_load_tp (rtx target)
6069 if (!target)
6070 target = gen_reg_rtx (SImode);
6072 if (TARGET_HARD_TP)
6074 /* Can return in any reg. */
6075 emit_insn (gen_load_tp_hard (target));
6077 else
6079 /* Always returned in r0. Immediately copy the result into a pseudo,
6080 otherwise other uses of r0 (e.g. setting up function arguments) may
6081 clobber the value. */
6083 rtx tmp;
6085 emit_insn (gen_load_tp_soft ());
6087 tmp = gen_rtx_REG (SImode, 0);
6088 emit_move_insn (target, tmp);
6090 return target;
6093 static rtx
6094 load_tls_operand (rtx x, rtx reg)
6096 rtx tmp;
6098 if (reg == NULL_RTX)
6099 reg = gen_reg_rtx (SImode);
6101 tmp = gen_rtx_CONST (SImode, x);
6103 emit_move_insn (reg, tmp);
6105 return reg;
6108 static rtx
6109 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6111 rtx insns, label, labelno, sum;
6113 start_sequence ();
6115 labelno = GEN_INT (pic_labelno++);
6116 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6117 label = gen_rtx_CONST (VOIDmode, label);
6119 sum = gen_rtx_UNSPEC (Pmode,
6120 gen_rtvec (4, x, GEN_INT (reloc), label,
6121 GEN_INT (TARGET_ARM ? 8 : 4)),
6122 UNSPEC_TLS);
6123 reg = load_tls_operand (sum, reg);
6125 if (TARGET_ARM)
6126 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6127 else if (TARGET_THUMB2)
6128 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6129 else /* TARGET_THUMB1 */
6130 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6132 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6133 Pmode, 1, reg, Pmode);
6135 insns = get_insns ();
6136 end_sequence ();
6138 return insns;
6142 legitimize_tls_address (rtx x, rtx reg)
6144 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6145 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6147 switch (model)
6149 case TLS_MODEL_GLOBAL_DYNAMIC:
6150 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6151 dest = gen_reg_rtx (Pmode);
6152 emit_libcall_block (insns, dest, ret, x);
6153 return dest;
6155 case TLS_MODEL_LOCAL_DYNAMIC:
6156 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6158 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6159 share the LDM result with other LD model accesses. */
6160 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6161 UNSPEC_TLS);
6162 dest = gen_reg_rtx (Pmode);
6163 emit_libcall_block (insns, dest, ret, eqv);
6165 /* Load the addend. */
6166 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6167 UNSPEC_TLS);
6168 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6169 return gen_rtx_PLUS (Pmode, dest, addend);
6171 case TLS_MODEL_INITIAL_EXEC:
6172 labelno = GEN_INT (pic_labelno++);
6173 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6174 label = gen_rtx_CONST (VOIDmode, label);
6175 sum = gen_rtx_UNSPEC (Pmode,
6176 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6177 GEN_INT (TARGET_ARM ? 8 : 4)),
6178 UNSPEC_TLS);
6179 reg = load_tls_operand (sum, reg);
6181 if (TARGET_ARM)
6182 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6183 else if (TARGET_THUMB2)
6184 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6185 else
6187 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6188 emit_move_insn (reg, gen_const_mem (SImode, reg));
6191 tp = arm_load_tp (NULL_RTX);
6193 return gen_rtx_PLUS (Pmode, tp, reg);
6195 case TLS_MODEL_LOCAL_EXEC:
6196 tp = arm_load_tp (NULL_RTX);
6198 reg = gen_rtx_UNSPEC (Pmode,
6199 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6200 UNSPEC_TLS);
6201 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6203 return gen_rtx_PLUS (Pmode, tp, reg);
6205 default:
6206 abort ();
6210 /* Try machine-dependent ways of modifying an illegitimate address
6211 to be legitimate. If we find one, return the new, valid address. */
6213 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6215 if (!TARGET_ARM)
6217 /* TODO: legitimize_address for Thumb2. */
6218 if (TARGET_THUMB2)
6219 return x;
6220 return thumb_legitimize_address (x, orig_x, mode);
6223 if (arm_tls_symbol_p (x))
6224 return legitimize_tls_address (x, NULL_RTX);
6226 if (GET_CODE (x) == PLUS)
6228 rtx xop0 = XEXP (x, 0);
6229 rtx xop1 = XEXP (x, 1);
6231 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6232 xop0 = force_reg (SImode, xop0);
6234 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6235 xop1 = force_reg (SImode, xop1);
6237 if (ARM_BASE_REGISTER_RTX_P (xop0)
6238 && GET_CODE (xop1) == CONST_INT)
6240 HOST_WIDE_INT n, low_n;
6241 rtx base_reg, val;
6242 n = INTVAL (xop1);
6244 /* VFP addressing modes actually allow greater offsets, but for
6245 now we just stick with the lowest common denominator. */
6246 if (mode == DImode
6247 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6249 low_n = n & 0x0f;
6250 n &= ~0x0f;
6251 if (low_n > 4)
6253 n += 16;
6254 low_n -= 16;
6257 else
6259 low_n = ((mode) == TImode ? 0
6260 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6261 n -= low_n;
6264 base_reg = gen_reg_rtx (SImode);
6265 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6266 emit_move_insn (base_reg, val);
6267 x = plus_constant (base_reg, low_n);
6269 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6270 x = gen_rtx_PLUS (SImode, xop0, xop1);
6273 /* XXX We don't allow MINUS any more -- see comment in
6274 arm_legitimate_address_outer_p (). */
6275 else if (GET_CODE (x) == MINUS)
6277 rtx xop0 = XEXP (x, 0);
6278 rtx xop1 = XEXP (x, 1);
6280 if (CONSTANT_P (xop0))
6281 xop0 = force_reg (SImode, xop0);
6283 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6284 xop1 = force_reg (SImode, xop1);
6286 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6287 x = gen_rtx_MINUS (SImode, xop0, xop1);
6290 /* Make sure to take full advantage of the pre-indexed addressing mode
6291 with absolute addresses which often allows for the base register to
6292 be factorized for multiple adjacent memory references, and it might
6293 even allows for the mini pool to be avoided entirely. */
6294 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6296 unsigned int bits;
6297 HOST_WIDE_INT mask, base, index;
6298 rtx base_reg;
6300 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6301 use a 8-bit index. So let's use a 12-bit index for SImode only and
6302 hope that arm_gen_constant will enable ldrb to use more bits. */
6303 bits = (mode == SImode) ? 12 : 8;
6304 mask = (1 << bits) - 1;
6305 base = INTVAL (x) & ~mask;
6306 index = INTVAL (x) & mask;
6307 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6309 /* It'll most probably be more efficient to generate the base
6310 with more bits set and use a negative index instead. */
6311 base |= mask;
6312 index -= mask;
6314 base_reg = force_reg (SImode, GEN_INT (base));
6315 x = plus_constant (base_reg, index);
6318 if (flag_pic)
6320 /* We need to find and carefully transform any SYMBOL and LABEL
6321 references; so go back to the original address expression. */
6322 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6324 if (new_x != orig_x)
6325 x = new_x;
6328 return x;
6332 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6333 to be legitimate. If we find one, return the new, valid address. */
6335 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6337 if (arm_tls_symbol_p (x))
6338 return legitimize_tls_address (x, NULL_RTX);
6340 if (GET_CODE (x) == PLUS
6341 && GET_CODE (XEXP (x, 1)) == CONST_INT
6342 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6343 || INTVAL (XEXP (x, 1)) < 0))
6345 rtx xop0 = XEXP (x, 0);
6346 rtx xop1 = XEXP (x, 1);
6347 HOST_WIDE_INT offset = INTVAL (xop1);
6349 /* Try and fold the offset into a biasing of the base register and
6350 then offsetting that. Don't do this when optimizing for space
6351 since it can cause too many CSEs. */
6352 if (optimize_size && offset >= 0
6353 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6355 HOST_WIDE_INT delta;
6357 if (offset >= 256)
6358 delta = offset - (256 - GET_MODE_SIZE (mode));
6359 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6360 delta = 31 * GET_MODE_SIZE (mode);
6361 else
6362 delta = offset & (~31 * GET_MODE_SIZE (mode));
6364 xop0 = force_operand (plus_constant (xop0, offset - delta),
6365 NULL_RTX);
6366 x = plus_constant (xop0, delta);
6368 else if (offset < 0 && offset > -256)
6369 /* Small negative offsets are best done with a subtract before the
6370 dereference, forcing these into a register normally takes two
6371 instructions. */
6372 x = force_operand (x, NULL_RTX);
6373 else
6375 /* For the remaining cases, force the constant into a register. */
6376 xop1 = force_reg (SImode, xop1);
6377 x = gen_rtx_PLUS (SImode, xop0, xop1);
6380 else if (GET_CODE (x) == PLUS
6381 && s_register_operand (XEXP (x, 1), SImode)
6382 && !s_register_operand (XEXP (x, 0), SImode))
6384 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6386 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6389 if (flag_pic)
6391 /* We need to find and carefully transform any SYMBOL and LABEL
6392 references; so go back to the original address expression. */
6393 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6395 if (new_x != orig_x)
6396 x = new_x;
6399 return x;
6402 bool
6403 arm_legitimize_reload_address (rtx *p,
6404 enum machine_mode mode,
6405 int opnum, int type,
6406 int ind_levels ATTRIBUTE_UNUSED)
6408 if (GET_CODE (*p) == PLUS
6409 && GET_CODE (XEXP (*p, 0)) == REG
6410 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6411 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6413 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6414 HOST_WIDE_INT low, high;
6416 if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT))
6417 low = ((val & 0xf) ^ 0x8) - 0x8;
6418 else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)
6419 /* Need to be careful, -256 is not a valid offset. */
6420 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6421 else if (mode == SImode
6422 || (mode == SFmode && TARGET_SOFT_FLOAT)
6423 || ((mode == HImode || mode == QImode) && ! arm_arch4))
6424 /* Need to be careful, -4096 is not a valid offset. */
6425 low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff);
6426 else if ((mode == HImode || mode == QImode) && arm_arch4)
6427 /* Need to be careful, -256 is not a valid offset. */
6428 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6429 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6430 && TARGET_HARD_FLOAT && TARGET_FPA)
6431 /* Need to be careful, -1024 is not a valid offset. */
6432 low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);
6433 else
6434 return false;
6436 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6437 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6438 - (unsigned HOST_WIDE_INT) 0x80000000);
6439 /* Check for overflow or zero */
6440 if (low == 0 || high == 0 || (high + low != val))
6441 return false;
6443 /* Reload the high part into a base reg; leave the low part
6444 in the mem. */
6445 *p = gen_rtx_PLUS (GET_MODE (*p),
6446 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6447 GEN_INT (high)),
6448 GEN_INT (low));
6449 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6450 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6451 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6452 return true;
6455 return false;
6459 thumb_legitimize_reload_address (rtx *x_p,
6460 enum machine_mode mode,
6461 int opnum, int type,
6462 int ind_levels ATTRIBUTE_UNUSED)
6464 rtx x = *x_p;
6466 if (GET_CODE (x) == PLUS
6467 && GET_MODE_SIZE (mode) < 4
6468 && REG_P (XEXP (x, 0))
6469 && XEXP (x, 0) == stack_pointer_rtx
6470 && GET_CODE (XEXP (x, 1)) == CONST_INT
6471 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6473 rtx orig_x = x;
6475 x = copy_rtx (x);
6476 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6477 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6478 return x;
6481 /* If both registers are hi-regs, then it's better to reload the
6482 entire expression rather than each register individually. That
6483 only requires one reload register rather than two. */
6484 if (GET_CODE (x) == PLUS
6485 && REG_P (XEXP (x, 0))
6486 && REG_P (XEXP (x, 1))
6487 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6488 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6490 rtx orig_x = x;
6492 x = copy_rtx (x);
6493 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6494 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6495 return x;
6498 return NULL;
6501 /* Test for various thread-local symbols. */
6503 /* Return TRUE if X is a thread-local symbol. */
6505 static bool
6506 arm_tls_symbol_p (rtx x)
6508 if (! TARGET_HAVE_TLS)
6509 return false;
6511 if (GET_CODE (x) != SYMBOL_REF)
6512 return false;
6514 return SYMBOL_REF_TLS_MODEL (x) != 0;
6517 /* Helper for arm_tls_referenced_p. */
6519 static int
6520 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6522 if (GET_CODE (*x) == SYMBOL_REF)
6523 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6525 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6526 TLS offsets, not real symbol references. */
6527 if (GET_CODE (*x) == UNSPEC
6528 && XINT (*x, 1) == UNSPEC_TLS)
6529 return -1;
6531 return 0;
6534 /* Return TRUE if X contains any TLS symbol references. */
6536 bool
6537 arm_tls_referenced_p (rtx x)
6539 if (! TARGET_HAVE_TLS)
6540 return false;
6542 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6545 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6547 bool
6548 arm_cannot_force_const_mem (rtx x)
6550 rtx base, offset;
6552 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6554 split_const (x, &base, &offset);
6555 if (GET_CODE (base) == SYMBOL_REF
6556 && !offset_within_block_p (base, INTVAL (offset)))
6557 return true;
6559 return arm_tls_referenced_p (x);
6562 #define REG_OR_SUBREG_REG(X) \
6563 (GET_CODE (X) == REG \
6564 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6566 #define REG_OR_SUBREG_RTX(X) \
6567 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6569 static inline int
6570 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6572 enum machine_mode mode = GET_MODE (x);
6573 int total;
6575 switch (code)
6577 case ASHIFT:
6578 case ASHIFTRT:
6579 case LSHIFTRT:
6580 case ROTATERT:
6581 case PLUS:
6582 case MINUS:
6583 case COMPARE:
6584 case NEG:
6585 case NOT:
6586 return COSTS_N_INSNS (1);
6588 case MULT:
6589 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6591 int cycles = 0;
6592 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6594 while (i)
6596 i >>= 2;
6597 cycles++;
6599 return COSTS_N_INSNS (2) + cycles;
6601 return COSTS_N_INSNS (1) + 16;
6603 case SET:
6604 return (COSTS_N_INSNS (1)
6605 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6606 + GET_CODE (SET_DEST (x)) == MEM));
6608 case CONST_INT:
6609 if (outer == SET)
6611 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6612 return 0;
6613 if (thumb_shiftable_const (INTVAL (x)))
6614 return COSTS_N_INSNS (2);
6615 return COSTS_N_INSNS (3);
6617 else if ((outer == PLUS || outer == COMPARE)
6618 && INTVAL (x) < 256 && INTVAL (x) > -256)
6619 return 0;
6620 else if ((outer == IOR || outer == XOR || outer == AND)
6621 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6622 return COSTS_N_INSNS (1);
6623 else if (outer == AND)
6625 int i;
6626 /* This duplicates the tests in the andsi3 expander. */
6627 for (i = 9; i <= 31; i++)
6628 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6629 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6630 return COSTS_N_INSNS (2);
6632 else if (outer == ASHIFT || outer == ASHIFTRT
6633 || outer == LSHIFTRT)
6634 return 0;
6635 return COSTS_N_INSNS (2);
6637 case CONST:
6638 case CONST_DOUBLE:
6639 case LABEL_REF:
6640 case SYMBOL_REF:
6641 return COSTS_N_INSNS (3);
6643 case UDIV:
6644 case UMOD:
6645 case DIV:
6646 case MOD:
6647 return 100;
6649 case TRUNCATE:
6650 return 99;
6652 case AND:
6653 case XOR:
6654 case IOR:
6655 /* XXX guess. */
6656 return 8;
6658 case MEM:
6659 /* XXX another guess. */
6660 /* Memory costs quite a lot for the first word, but subsequent words
6661 load at the equivalent of a single insn each. */
6662 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6663 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6664 ? 4 : 0));
6666 case IF_THEN_ELSE:
6667 /* XXX a guess. */
6668 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6669 return 14;
6670 return 2;
6672 case SIGN_EXTEND:
6673 case ZERO_EXTEND:
6674 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6675 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6677 if (mode == SImode)
6678 return total;
6680 if (arm_arch6)
6681 return total + COSTS_N_INSNS (1);
6683 /* Assume a two-shift sequence. Increase the cost slightly so
6684 we prefer actual shifts over an extend operation. */
6685 return total + 1 + COSTS_N_INSNS (2);
6687 default:
6688 return 99;
6692 static inline bool
6693 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6695 enum machine_mode mode = GET_MODE (x);
6696 enum rtx_code subcode;
6697 rtx operand;
6698 enum rtx_code code = GET_CODE (x);
6699 *total = 0;
6701 switch (code)
6703 case MEM:
6704 /* Memory costs quite a lot for the first word, but subsequent words
6705 load at the equivalent of a single insn each. */
6706 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6707 return true;
6709 case DIV:
6710 case MOD:
6711 case UDIV:
6712 case UMOD:
6713 if (TARGET_HARD_FLOAT && mode == SFmode)
6714 *total = COSTS_N_INSNS (2);
6715 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6716 *total = COSTS_N_INSNS (4);
6717 else
6718 *total = COSTS_N_INSNS (20);
6719 return false;
6721 case ROTATE:
6722 if (GET_CODE (XEXP (x, 1)) == REG)
6723 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6724 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6725 *total = rtx_cost (XEXP (x, 1), code, speed);
6727 /* Fall through */
6728 case ROTATERT:
6729 if (mode != SImode)
6731 *total += COSTS_N_INSNS (4);
6732 return true;
6735 /* Fall through */
6736 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6737 *total += rtx_cost (XEXP (x, 0), code, speed);
6738 if (mode == DImode)
6740 *total += COSTS_N_INSNS (3);
6741 return true;
6744 *total += COSTS_N_INSNS (1);
6745 /* Increase the cost of complex shifts because they aren't any faster,
6746 and reduce dual issue opportunities. */
6747 if (arm_tune_cortex_a9
6748 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6749 ++*total;
6751 return true;
6753 case MINUS:
6754 if (mode == DImode)
6756 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6757 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6758 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6760 *total += rtx_cost (XEXP (x, 1), code, speed);
6761 return true;
6764 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6765 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6767 *total += rtx_cost (XEXP (x, 0), code, speed);
6768 return true;
6771 return false;
6774 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6776 if (TARGET_HARD_FLOAT
6777 && (mode == SFmode
6778 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6780 *total = COSTS_N_INSNS (1);
6781 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6782 && arm_const_double_rtx (XEXP (x, 0)))
6784 *total += rtx_cost (XEXP (x, 1), code, speed);
6785 return true;
6788 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6789 && arm_const_double_rtx (XEXP (x, 1)))
6791 *total += rtx_cost (XEXP (x, 0), code, speed);
6792 return true;
6795 return false;
6797 *total = COSTS_N_INSNS (20);
6798 return false;
6801 *total = COSTS_N_INSNS (1);
6802 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6803 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6805 *total += rtx_cost (XEXP (x, 1), code, speed);
6806 return true;
6809 subcode = GET_CODE (XEXP (x, 1));
6810 if (subcode == ASHIFT || subcode == ASHIFTRT
6811 || subcode == LSHIFTRT
6812 || subcode == ROTATE || subcode == ROTATERT)
6814 *total += rtx_cost (XEXP (x, 0), code, speed);
6815 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6816 return true;
6819 /* A shift as a part of RSB costs no more than RSB itself. */
6820 if (GET_CODE (XEXP (x, 0)) == MULT
6821 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6823 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6824 *total += rtx_cost (XEXP (x, 1), code, speed);
6825 return true;
6828 if (subcode == MULT
6829 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6831 *total += rtx_cost (XEXP (x, 0), code, speed);
6832 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6833 return true;
6836 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6837 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6839 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6840 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6841 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6842 *total += COSTS_N_INSNS (1);
6844 return true;
6847 /* Fall through */
6849 case PLUS:
6850 if (code == PLUS && arm_arch6 && mode == SImode
6851 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6852 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6854 *total = COSTS_N_INSNS (1);
6855 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6856 speed);
6857 *total += rtx_cost (XEXP (x, 1), code, speed);
6858 return true;
6861 /* MLA: All arguments must be registers. We filter out
6862 multiplication by a power of two, so that we fall down into
6863 the code below. */
6864 if (GET_CODE (XEXP (x, 0)) == MULT
6865 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6867 /* The cost comes from the cost of the multiply. */
6868 return false;
6871 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6873 if (TARGET_HARD_FLOAT
6874 && (mode == SFmode
6875 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6877 *total = COSTS_N_INSNS (1);
6878 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6879 && arm_const_double_rtx (XEXP (x, 1)))
6881 *total += rtx_cost (XEXP (x, 0), code, speed);
6882 return true;
6885 return false;
6888 *total = COSTS_N_INSNS (20);
6889 return false;
6892 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6893 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6895 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6896 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6897 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6898 *total += COSTS_N_INSNS (1);
6899 return true;
6902 /* Fall through */
6904 case AND: case XOR: case IOR:
6906 /* Normally the frame registers will be spilt into reg+const during
6907 reload, so it is a bad idea to combine them with other instructions,
6908 since then they might not be moved outside of loops. As a compromise
6909 we allow integration with ops that have a constant as their second
6910 operand. */
6911 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6912 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6913 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6914 *total = COSTS_N_INSNS (1);
6916 if (mode == DImode)
6918 *total += COSTS_N_INSNS (2);
6919 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6920 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6922 *total += rtx_cost (XEXP (x, 0), code, speed);
6923 return true;
6926 return false;
6929 *total += COSTS_N_INSNS (1);
6930 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6931 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6933 *total += rtx_cost (XEXP (x, 0), code, speed);
6934 return true;
6936 subcode = GET_CODE (XEXP (x, 0));
6937 if (subcode == ASHIFT || subcode == ASHIFTRT
6938 || subcode == LSHIFTRT
6939 || subcode == ROTATE || subcode == ROTATERT)
6941 *total += rtx_cost (XEXP (x, 1), code, speed);
6942 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6943 return true;
6946 if (subcode == MULT
6947 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6949 *total += rtx_cost (XEXP (x, 1), code, speed);
6950 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6951 return true;
6954 if (subcode == UMIN || subcode == UMAX
6955 || subcode == SMIN || subcode == SMAX)
6957 *total = COSTS_N_INSNS (3);
6958 return true;
6961 return false;
6963 case MULT:
6964 /* This should have been handled by the CPU specific routines. */
6965 gcc_unreachable ();
6967 case TRUNCATE:
6968 if (arm_arch3m && mode == SImode
6969 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6970 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6971 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6972 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6973 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6974 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6976 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6977 return true;
6979 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6980 return false;
6982 case NEG:
6983 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6985 if (TARGET_HARD_FLOAT
6986 && (mode == SFmode
6987 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6989 *total = COSTS_N_INSNS (1);
6990 return false;
6992 *total = COSTS_N_INSNS (2);
6993 return false;
6996 /* Fall through */
6997 case NOT:
6998 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6999 if (mode == SImode && code == NOT)
7001 subcode = GET_CODE (XEXP (x, 0));
7002 if (subcode == ASHIFT || subcode == ASHIFTRT
7003 || subcode == LSHIFTRT
7004 || subcode == ROTATE || subcode == ROTATERT
7005 || (subcode == MULT
7006 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7008 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7009 /* Register shifts cost an extra cycle. */
7010 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7011 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7012 subcode, speed);
7013 return true;
7017 return false;
7019 case IF_THEN_ELSE:
7020 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7022 *total = COSTS_N_INSNS (4);
7023 return true;
7026 operand = XEXP (x, 0);
7028 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7029 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7030 && GET_CODE (XEXP (operand, 0)) == REG
7031 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7032 *total += COSTS_N_INSNS (1);
7033 *total += (rtx_cost (XEXP (x, 1), code, speed)
7034 + rtx_cost (XEXP (x, 2), code, speed));
7035 return true;
7037 case NE:
7038 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7040 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7041 return true;
7043 goto scc_insn;
7045 case GE:
7046 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7047 && mode == SImode && XEXP (x, 1) == const0_rtx)
7049 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7050 return true;
7052 goto scc_insn;
7054 case LT:
7055 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7056 && mode == SImode && XEXP (x, 1) == const0_rtx)
7058 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7059 return true;
7061 goto scc_insn;
7063 case EQ:
7064 case GT:
7065 case LE:
7066 case GEU:
7067 case LTU:
7068 case GTU:
7069 case LEU:
7070 case UNORDERED:
7071 case ORDERED:
7072 case UNEQ:
7073 case UNGE:
7074 case UNLT:
7075 case UNGT:
7076 case UNLE:
7077 scc_insn:
7078 /* SCC insns. In the case where the comparison has already been
7079 performed, then they cost 2 instructions. Otherwise they need
7080 an additional comparison before them. */
7081 *total = COSTS_N_INSNS (2);
7082 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7084 return true;
7087 /* Fall through */
7088 case COMPARE:
7089 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7091 *total = 0;
7092 return true;
7095 *total += COSTS_N_INSNS (1);
7096 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7097 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7099 *total += rtx_cost (XEXP (x, 0), code, speed);
7100 return true;
7103 subcode = GET_CODE (XEXP (x, 0));
7104 if (subcode == ASHIFT || subcode == ASHIFTRT
7105 || subcode == LSHIFTRT
7106 || subcode == ROTATE || subcode == ROTATERT)
7108 *total += rtx_cost (XEXP (x, 1), code, speed);
7109 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7110 return true;
7113 if (subcode == MULT
7114 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7116 *total += rtx_cost (XEXP (x, 1), code, speed);
7117 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7118 return true;
7121 return false;
7123 case UMIN:
7124 case UMAX:
7125 case SMIN:
7126 case SMAX:
7127 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7128 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7129 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7130 *total += rtx_cost (XEXP (x, 1), code, speed);
7131 return true;
7133 case ABS:
7134 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7136 if (TARGET_HARD_FLOAT
7137 && (mode == SFmode
7138 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7140 *total = COSTS_N_INSNS (1);
7141 return false;
7143 *total = COSTS_N_INSNS (20);
7144 return false;
7146 *total = COSTS_N_INSNS (1);
7147 if (mode == DImode)
7148 *total += COSTS_N_INSNS (3);
7149 return false;
7151 case SIGN_EXTEND:
7152 case ZERO_EXTEND:
7153 *total = 0;
7154 if (GET_MODE_CLASS (mode) == MODE_INT)
7156 rtx op = XEXP (x, 0);
7157 enum machine_mode opmode = GET_MODE (op);
7159 if (mode == DImode)
7160 *total += COSTS_N_INSNS (1);
7162 if (opmode != SImode)
7164 if (MEM_P (op))
7166 /* If !arm_arch4, we use one of the extendhisi2_mem
7167 or movhi_bytes patterns for HImode. For a QImode
7168 sign extension, we first zero-extend from memory
7169 and then perform a shift sequence. */
7170 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7171 *total += COSTS_N_INSNS (2);
7173 else if (arm_arch6)
7174 *total += COSTS_N_INSNS (1);
7176 /* We don't have the necessary insn, so we need to perform some
7177 other operation. */
7178 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7179 /* An and with constant 255. */
7180 *total += COSTS_N_INSNS (1);
7181 else
7182 /* A shift sequence. Increase costs slightly to avoid
7183 combining two shifts into an extend operation. */
7184 *total += COSTS_N_INSNS (2) + 1;
7187 return false;
7190 switch (GET_MODE (XEXP (x, 0)))
7192 case V8QImode:
7193 case V4HImode:
7194 case V2SImode:
7195 case V4QImode:
7196 case V2HImode:
7197 *total = COSTS_N_INSNS (1);
7198 return false;
7200 default:
7201 gcc_unreachable ();
7203 gcc_unreachable ();
7205 case ZERO_EXTRACT:
7206 case SIGN_EXTRACT:
7207 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7208 return true;
7210 case CONST_INT:
7211 if (const_ok_for_arm (INTVAL (x))
7212 || const_ok_for_arm (~INTVAL (x)))
7213 *total = COSTS_N_INSNS (1);
7214 else
7215 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7216 INTVAL (x), NULL_RTX,
7217 NULL_RTX, 0, 0));
7218 return true;
7220 case CONST:
7221 case LABEL_REF:
7222 case SYMBOL_REF:
7223 *total = COSTS_N_INSNS (3);
7224 return true;
7226 case HIGH:
7227 *total = COSTS_N_INSNS (1);
7228 return true;
7230 case LO_SUM:
7231 *total = COSTS_N_INSNS (1);
7232 *total += rtx_cost (XEXP (x, 0), code, speed);
7233 return true;
7235 case CONST_DOUBLE:
7236 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7237 && (mode == SFmode || !TARGET_VFP_SINGLE))
7238 *total = COSTS_N_INSNS (1);
7239 else
7240 *total = COSTS_N_INSNS (4);
7241 return true;
7243 default:
7244 *total = COSTS_N_INSNS (4);
7245 return false;
7249 /* Estimates the size cost of thumb1 instructions.
7250 For now most of the code is copied from thumb1_rtx_costs. We need more
7251 fine grain tuning when we have more related test cases. */
7252 static inline int
7253 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7255 enum machine_mode mode = GET_MODE (x);
7257 switch (code)
7259 case ASHIFT:
7260 case ASHIFTRT:
7261 case LSHIFTRT:
7262 case ROTATERT:
7263 case PLUS:
7264 case MINUS:
7265 case COMPARE:
7266 case NEG:
7267 case NOT:
7268 return COSTS_N_INSNS (1);
7270 case MULT:
7271 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7273 /* Thumb1 mul instruction can't operate on const. We must Load it
7274 into a register first. */
7275 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7276 return COSTS_N_INSNS (1) + const_size;
7278 return COSTS_N_INSNS (1);
7280 case SET:
7281 return (COSTS_N_INSNS (1)
7282 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7283 + GET_CODE (SET_DEST (x)) == MEM));
7285 case CONST_INT:
7286 if (outer == SET)
7288 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7289 return COSTS_N_INSNS (1);
7290 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7291 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7292 return COSTS_N_INSNS (2);
7293 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7294 if (thumb_shiftable_const (INTVAL (x)))
7295 return COSTS_N_INSNS (2);
7296 return COSTS_N_INSNS (3);
7298 else if ((outer == PLUS || outer == COMPARE)
7299 && INTVAL (x) < 256 && INTVAL (x) > -256)
7300 return 0;
7301 else if ((outer == IOR || outer == XOR || outer == AND)
7302 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7303 return COSTS_N_INSNS (1);
7304 else if (outer == AND)
7306 int i;
7307 /* This duplicates the tests in the andsi3 expander. */
7308 for (i = 9; i <= 31; i++)
7309 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7310 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7311 return COSTS_N_INSNS (2);
7313 else if (outer == ASHIFT || outer == ASHIFTRT
7314 || outer == LSHIFTRT)
7315 return 0;
7316 return COSTS_N_INSNS (2);
7318 case CONST:
7319 case CONST_DOUBLE:
7320 case LABEL_REF:
7321 case SYMBOL_REF:
7322 return COSTS_N_INSNS (3);
7324 case UDIV:
7325 case UMOD:
7326 case DIV:
7327 case MOD:
7328 return 100;
7330 case TRUNCATE:
7331 return 99;
7333 case AND:
7334 case XOR:
7335 case IOR:
7336 /* XXX guess. */
7337 return 8;
7339 case MEM:
7340 /* XXX another guess. */
7341 /* Memory costs quite a lot for the first word, but subsequent words
7342 load at the equivalent of a single insn each. */
7343 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7344 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7345 ? 4 : 0));
7347 case IF_THEN_ELSE:
7348 /* XXX a guess. */
7349 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7350 return 14;
7351 return 2;
7353 case ZERO_EXTEND:
7354 /* XXX still guessing. */
7355 switch (GET_MODE (XEXP (x, 0)))
7357 case QImode:
7358 return (1 + (mode == DImode ? 4 : 0)
7359 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7361 case HImode:
7362 return (4 + (mode == DImode ? 4 : 0)
7363 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7365 case SImode:
7366 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7368 default:
7369 return 99;
7372 default:
7373 return 99;
7377 /* RTX costs when optimizing for size. */
7378 static bool
7379 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7380 int *total)
7382 enum machine_mode mode = GET_MODE (x);
7383 if (TARGET_THUMB1)
7385 *total = thumb1_size_rtx_costs (x, code, outer_code);
7386 return true;
7389 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7390 switch (code)
7392 case MEM:
7393 /* A memory access costs 1 insn if the mode is small, or the address is
7394 a single register, otherwise it costs one insn per word. */
7395 if (REG_P (XEXP (x, 0)))
7396 *total = COSTS_N_INSNS (1);
7397 else if (flag_pic
7398 && GET_CODE (XEXP (x, 0)) == PLUS
7399 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7400 /* This will be split into two instructions.
7401 See arm.md:calculate_pic_address. */
7402 *total = COSTS_N_INSNS (2);
7403 else
7404 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7405 return true;
7407 case DIV:
7408 case MOD:
7409 case UDIV:
7410 case UMOD:
7411 /* Needs a libcall, so it costs about this. */
7412 *total = COSTS_N_INSNS (2);
7413 return false;
7415 case ROTATE:
7416 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7418 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7419 return true;
7421 /* Fall through */
7422 case ROTATERT:
7423 case ASHIFT:
7424 case LSHIFTRT:
7425 case ASHIFTRT:
7426 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7428 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7429 return true;
7431 else if (mode == SImode)
7433 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7434 /* Slightly disparage register shifts, but not by much. */
7435 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7436 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7437 return true;
7440 /* Needs a libcall. */
7441 *total = COSTS_N_INSNS (2);
7442 return false;
7444 case MINUS:
7445 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7446 && (mode == SFmode || !TARGET_VFP_SINGLE))
7448 *total = COSTS_N_INSNS (1);
7449 return false;
7452 if (mode == SImode)
7454 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7455 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7457 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7458 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7459 || subcode1 == ROTATE || subcode1 == ROTATERT
7460 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7461 || subcode1 == ASHIFTRT)
7463 /* It's just the cost of the two operands. */
7464 *total = 0;
7465 return false;
7468 *total = COSTS_N_INSNS (1);
7469 return false;
7472 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7473 return false;
7475 case PLUS:
7476 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7477 && (mode == SFmode || !TARGET_VFP_SINGLE))
7479 *total = COSTS_N_INSNS (1);
7480 return false;
7483 /* A shift as a part of ADD costs nothing. */
7484 if (GET_CODE (XEXP (x, 0)) == MULT
7485 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7487 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7488 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7489 *total += rtx_cost (XEXP (x, 1), code, false);
7490 return true;
7493 /* Fall through */
7494 case AND: case XOR: case IOR:
7495 if (mode == SImode)
7497 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7499 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7500 || subcode == LSHIFTRT || subcode == ASHIFTRT
7501 || (code == AND && subcode == NOT))
7503 /* It's just the cost of the two operands. */
7504 *total = 0;
7505 return false;
7509 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7510 return false;
7512 case MULT:
7513 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7514 return false;
7516 case NEG:
7517 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7518 && (mode == SFmode || !TARGET_VFP_SINGLE))
7520 *total = COSTS_N_INSNS (1);
7521 return false;
7524 /* Fall through */
7525 case NOT:
7526 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7528 return false;
7530 case IF_THEN_ELSE:
7531 *total = 0;
7532 return false;
7534 case COMPARE:
7535 if (cc_register (XEXP (x, 0), VOIDmode))
7536 * total = 0;
7537 else
7538 *total = COSTS_N_INSNS (1);
7539 return false;
7541 case ABS:
7542 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7543 && (mode == SFmode || !TARGET_VFP_SINGLE))
7544 *total = COSTS_N_INSNS (1);
7545 else
7546 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7547 return false;
7549 case SIGN_EXTEND:
7550 case ZERO_EXTEND:
7551 return arm_rtx_costs_1 (x, outer_code, total, 0);
7553 case CONST_INT:
7554 if (const_ok_for_arm (INTVAL (x)))
7555 /* A multiplication by a constant requires another instruction
7556 to load the constant to a register. */
7557 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7558 ? 1 : 0);
7559 else if (const_ok_for_arm (~INTVAL (x)))
7560 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7561 else if (const_ok_for_arm (-INTVAL (x)))
7563 if (outer_code == COMPARE || outer_code == PLUS
7564 || outer_code == MINUS)
7565 *total = 0;
7566 else
7567 *total = COSTS_N_INSNS (1);
7569 else
7570 *total = COSTS_N_INSNS (2);
7571 return true;
7573 case CONST:
7574 case LABEL_REF:
7575 case SYMBOL_REF:
7576 *total = COSTS_N_INSNS (2);
7577 return true;
7579 case CONST_DOUBLE:
7580 *total = COSTS_N_INSNS (4);
7581 return true;
7583 case HIGH:
7584 case LO_SUM:
7585 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7586 cost of these slightly. */
7587 *total = COSTS_N_INSNS (1) + 1;
7588 return true;
7590 default:
7591 if (mode != VOIDmode)
7592 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7593 else
7594 *total = COSTS_N_INSNS (4); /* How knows? */
7595 return false;
7599 /* RTX costs when optimizing for size. */
7600 static bool
7601 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7602 bool speed)
7604 if (!speed)
7605 return arm_size_rtx_costs (x, (enum rtx_code) code,
7606 (enum rtx_code) outer_code, total);
7607 else
7608 return current_tune->rtx_costs (x, (enum rtx_code) code,
7609 (enum rtx_code) outer_code,
7610 total, speed);
7613 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7614 supported on any "slowmul" cores, so it can be ignored. */
7616 static bool
7617 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7618 int *total, bool speed)
7620 enum machine_mode mode = GET_MODE (x);
7622 if (TARGET_THUMB)
7624 *total = thumb1_rtx_costs (x, code, outer_code);
7625 return true;
7628 switch (code)
7630 case MULT:
7631 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7632 || mode == DImode)
7634 *total = COSTS_N_INSNS (20);
7635 return false;
7638 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7640 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7641 & (unsigned HOST_WIDE_INT) 0xffffffff);
7642 int cost, const_ok = const_ok_for_arm (i);
7643 int j, booth_unit_size;
7645 /* Tune as appropriate. */
7646 cost = const_ok ? 4 : 8;
7647 booth_unit_size = 2;
7648 for (j = 0; i && j < 32; j += booth_unit_size)
7650 i >>= booth_unit_size;
7651 cost++;
7654 *total = COSTS_N_INSNS (cost);
7655 *total += rtx_cost (XEXP (x, 0), code, speed);
7656 return true;
7659 *total = COSTS_N_INSNS (20);
7660 return false;
7662 default:
7663 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7668 /* RTX cost for cores with a fast multiply unit (M variants). */
7670 static bool
7671 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7672 int *total, bool speed)
7674 enum machine_mode mode = GET_MODE (x);
7676 if (TARGET_THUMB1)
7678 *total = thumb1_rtx_costs (x, code, outer_code);
7679 return true;
7682 /* ??? should thumb2 use different costs? */
7683 switch (code)
7685 case MULT:
7686 /* There is no point basing this on the tuning, since it is always the
7687 fast variant if it exists at all. */
7688 if (mode == DImode
7689 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7690 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7691 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7693 *total = COSTS_N_INSNS(2);
7694 return false;
7698 if (mode == DImode)
7700 *total = COSTS_N_INSNS (5);
7701 return false;
7704 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7706 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7707 & (unsigned HOST_WIDE_INT) 0xffffffff);
7708 int cost, const_ok = const_ok_for_arm (i);
7709 int j, booth_unit_size;
7711 /* Tune as appropriate. */
7712 cost = const_ok ? 4 : 8;
7713 booth_unit_size = 8;
7714 for (j = 0; i && j < 32; j += booth_unit_size)
7716 i >>= booth_unit_size;
7717 cost++;
7720 *total = COSTS_N_INSNS(cost);
7721 return false;
7724 if (mode == SImode)
7726 *total = COSTS_N_INSNS (4);
7727 return false;
7730 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7732 if (TARGET_HARD_FLOAT
7733 && (mode == SFmode
7734 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7736 *total = COSTS_N_INSNS (1);
7737 return false;
7741 /* Requires a lib call */
7742 *total = COSTS_N_INSNS (20);
7743 return false;
7745 default:
7746 return arm_rtx_costs_1 (x, outer_code, total, speed);
7751 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7752 so it can be ignored. */
7754 static bool
7755 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7756 int *total, bool speed)
7758 enum machine_mode mode = GET_MODE (x);
7760 if (TARGET_THUMB)
7762 *total = thumb1_rtx_costs (x, code, outer_code);
7763 return true;
7766 switch (code)
7768 case COMPARE:
7769 if (GET_CODE (XEXP (x, 0)) != MULT)
7770 return arm_rtx_costs_1 (x, outer_code, total, speed);
7772 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7773 will stall until the multiplication is complete. */
7774 *total = COSTS_N_INSNS (3);
7775 return false;
7777 case MULT:
7778 /* There is no point basing this on the tuning, since it is always the
7779 fast variant if it exists at all. */
7780 if (mode == DImode
7781 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7782 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7783 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7785 *total = COSTS_N_INSNS (2);
7786 return false;
7790 if (mode == DImode)
7792 *total = COSTS_N_INSNS (5);
7793 return false;
7796 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7798 /* If operand 1 is a constant we can more accurately
7799 calculate the cost of the multiply. The multiplier can
7800 retire 15 bits on the first cycle and a further 12 on the
7801 second. We do, of course, have to load the constant into
7802 a register first. */
7803 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7804 /* There's a general overhead of one cycle. */
7805 int cost = 1;
7806 unsigned HOST_WIDE_INT masked_const;
7808 if (i & 0x80000000)
7809 i = ~i;
7811 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7813 masked_const = i & 0xffff8000;
7814 if (masked_const != 0)
7816 cost++;
7817 masked_const = i & 0xf8000000;
7818 if (masked_const != 0)
7819 cost++;
7821 *total = COSTS_N_INSNS (cost);
7822 return false;
7825 if (mode == SImode)
7827 *total = COSTS_N_INSNS (3);
7828 return false;
7831 /* Requires a lib call */
7832 *total = COSTS_N_INSNS (20);
7833 return false;
7835 default:
7836 return arm_rtx_costs_1 (x, outer_code, total, speed);
7841 /* RTX costs for 9e (and later) cores. */
7843 static bool
7844 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7845 int *total, bool speed)
7847 enum machine_mode mode = GET_MODE (x);
7849 if (TARGET_THUMB1)
7851 switch (code)
7853 case MULT:
7854 *total = COSTS_N_INSNS (3);
7855 return true;
7857 default:
7858 *total = thumb1_rtx_costs (x, code, outer_code);
7859 return true;
7863 switch (code)
7865 case MULT:
7866 /* There is no point basing this on the tuning, since it is always the
7867 fast variant if it exists at all. */
7868 if (mode == DImode
7869 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7870 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7871 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7873 *total = COSTS_N_INSNS (2);
7874 return false;
7878 if (mode == DImode)
7880 *total = COSTS_N_INSNS (5);
7881 return false;
7884 if (mode == SImode)
7886 *total = COSTS_N_INSNS (2);
7887 return false;
7890 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7892 if (TARGET_HARD_FLOAT
7893 && (mode == SFmode
7894 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7896 *total = COSTS_N_INSNS (1);
7897 return false;
7901 *total = COSTS_N_INSNS (20);
7902 return false;
7904 default:
7905 return arm_rtx_costs_1 (x, outer_code, total, speed);
7908 /* All address computations that can be done are free, but rtx cost returns
7909 the same for practically all of them. So we weight the different types
7910 of address here in the order (most pref first):
7911 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7912 static inline int
7913 arm_arm_address_cost (rtx x)
7915 enum rtx_code c = GET_CODE (x);
7917 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7918 return 0;
7919 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7920 return 10;
7922 if (c == PLUS)
7924 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7925 return 2;
7927 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7928 return 3;
7930 return 4;
7933 return 6;
7936 static inline int
7937 arm_thumb_address_cost (rtx x)
7939 enum rtx_code c = GET_CODE (x);
7941 if (c == REG)
7942 return 1;
7943 if (c == PLUS
7944 && GET_CODE (XEXP (x, 0)) == REG
7945 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7946 return 1;
7948 return 2;
7951 static int
7952 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7954 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7957 /* Adjust cost hook for XScale. */
7958 static bool
7959 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7961 /* Some true dependencies can have a higher cost depending
7962 on precisely how certain input operands are used. */
7963 if (REG_NOTE_KIND(link) == 0
7964 && recog_memoized (insn) >= 0
7965 && recog_memoized (dep) >= 0)
7967 int shift_opnum = get_attr_shift (insn);
7968 enum attr_type attr_type = get_attr_type (dep);
7970 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7971 operand for INSN. If we have a shifted input operand and the
7972 instruction we depend on is another ALU instruction, then we may
7973 have to account for an additional stall. */
7974 if (shift_opnum != 0
7975 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7977 rtx shifted_operand;
7978 int opno;
7980 /* Get the shifted operand. */
7981 extract_insn (insn);
7982 shifted_operand = recog_data.operand[shift_opnum];
7984 /* Iterate over all the operands in DEP. If we write an operand
7985 that overlaps with SHIFTED_OPERAND, then we have increase the
7986 cost of this dependency. */
7987 extract_insn (dep);
7988 preprocess_constraints ();
7989 for (opno = 0; opno < recog_data.n_operands; opno++)
7991 /* We can ignore strict inputs. */
7992 if (recog_data.operand_type[opno] == OP_IN)
7993 continue;
7995 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7996 shifted_operand))
7998 *cost = 2;
7999 return false;
8004 return true;
8007 /* Adjust cost hook for Cortex A9. */
8008 static bool
8009 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8011 switch (REG_NOTE_KIND (link))
8013 case REG_DEP_ANTI:
8014 *cost = 0;
8015 return false;
8017 case REG_DEP_TRUE:
8018 case REG_DEP_OUTPUT:
8019 if (recog_memoized (insn) >= 0
8020 && recog_memoized (dep) >= 0)
8022 if (GET_CODE (PATTERN (insn)) == SET)
8024 if (GET_MODE_CLASS
8025 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8026 || GET_MODE_CLASS
8027 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8029 enum attr_type attr_type_insn = get_attr_type (insn);
8030 enum attr_type attr_type_dep = get_attr_type (dep);
8032 /* By default all dependencies of the form
8033 s0 = s0 <op> s1
8034 s0 = s0 <op> s2
8035 have an extra latency of 1 cycle because
8036 of the input and output dependency in this
8037 case. However this gets modeled as an true
8038 dependency and hence all these checks. */
8039 if (REG_P (SET_DEST (PATTERN (insn)))
8040 && REG_P (SET_DEST (PATTERN (dep)))
8041 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8042 SET_DEST (PATTERN (dep))))
8044 /* FMACS is a special case where the dependant
8045 instruction can be issued 3 cycles before
8046 the normal latency in case of an output
8047 dependency. */
8048 if ((attr_type_insn == TYPE_FMACS
8049 || attr_type_insn == TYPE_FMACD)
8050 && (attr_type_dep == TYPE_FMACS
8051 || attr_type_dep == TYPE_FMACD))
8053 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8054 *cost = insn_default_latency (dep) - 3;
8055 else
8056 *cost = insn_default_latency (dep);
8057 return false;
8059 else
8061 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8062 *cost = insn_default_latency (dep) + 1;
8063 else
8064 *cost = insn_default_latency (dep);
8066 return false;
8071 break;
8073 default:
8074 gcc_unreachable ();
8077 return true;
8080 /* Adjust cost hook for FA726TE. */
8081 static bool
8082 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8084 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8085 have penalty of 3. */
8086 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8087 && recog_memoized (insn) >= 0
8088 && recog_memoized (dep) >= 0
8089 && get_attr_conds (dep) == CONDS_SET)
8091 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8092 if (get_attr_conds (insn) == CONDS_USE
8093 && get_attr_type (insn) != TYPE_BRANCH)
8095 *cost = 3;
8096 return false;
8099 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8100 || get_attr_conds (insn) == CONDS_USE)
8102 *cost = 0;
8103 return false;
8107 return true;
8110 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8111 It corrects the value of COST based on the relationship between
8112 INSN and DEP through the dependence LINK. It returns the new
8113 value. There is a per-core adjust_cost hook to adjust scheduler costs
8114 and the per-core hook can choose to completely override the generic
8115 adjust_cost function. Only put bits of code into arm_adjust_cost that
8116 are common across all cores. */
8117 static int
8118 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8120 rtx i_pat, d_pat;
8122 /* When generating Thumb-1 code, we want to place flag-setting operations
8123 close to a conditional branch which depends on them, so that we can
8124 omit the comparison. */
8125 if (TARGET_THUMB1
8126 && REG_NOTE_KIND (link) == 0
8127 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8128 && recog_memoized (dep) >= 0
8129 && get_attr_conds (dep) == CONDS_SET)
8130 return 0;
8132 if (current_tune->sched_adjust_cost != NULL)
8134 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8135 return cost;
8138 /* XXX This is not strictly true for the FPA. */
8139 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8140 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8141 return 0;
8143 /* Call insns don't incur a stall, even if they follow a load. */
8144 if (REG_NOTE_KIND (link) == 0
8145 && GET_CODE (insn) == CALL_INSN)
8146 return 1;
8148 if ((i_pat = single_set (insn)) != NULL
8149 && GET_CODE (SET_SRC (i_pat)) == MEM
8150 && (d_pat = single_set (dep)) != NULL
8151 && GET_CODE (SET_DEST (d_pat)) == MEM)
8153 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8154 /* This is a load after a store, there is no conflict if the load reads
8155 from a cached area. Assume that loads from the stack, and from the
8156 constant pool are cached, and that others will miss. This is a
8157 hack. */
8159 if ((GET_CODE (src_mem) == SYMBOL_REF
8160 && CONSTANT_POOL_ADDRESS_P (src_mem))
8161 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8162 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8163 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8164 return 1;
8167 return cost;
8170 static int fp_consts_inited = 0;
8172 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8173 static const char * const strings_fp[8] =
8175 "0", "1", "2", "3",
8176 "4", "5", "0.5", "10"
8179 static REAL_VALUE_TYPE values_fp[8];
8181 static void
8182 init_fp_table (void)
8184 int i;
8185 REAL_VALUE_TYPE r;
8187 if (TARGET_VFP)
8188 fp_consts_inited = 1;
8189 else
8190 fp_consts_inited = 8;
8192 for (i = 0; i < fp_consts_inited; i++)
8194 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8195 values_fp[i] = r;
8199 /* Return TRUE if rtx X is a valid immediate FP constant. */
8201 arm_const_double_rtx (rtx x)
8203 REAL_VALUE_TYPE r;
8204 int i;
8206 if (!fp_consts_inited)
8207 init_fp_table ();
8209 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8210 if (REAL_VALUE_MINUS_ZERO (r))
8211 return 0;
8213 for (i = 0; i < fp_consts_inited; i++)
8214 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8215 return 1;
8217 return 0;
8220 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8222 neg_const_double_rtx_ok_for_fpa (rtx x)
8224 REAL_VALUE_TYPE r;
8225 int i;
8227 if (!fp_consts_inited)
8228 init_fp_table ();
8230 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8231 r = real_value_negate (&r);
8232 if (REAL_VALUE_MINUS_ZERO (r))
8233 return 0;
8235 for (i = 0; i < 8; i++)
8236 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8237 return 1;
8239 return 0;
8243 /* VFPv3 has a fairly wide range of representable immediates, formed from
8244 "quarter-precision" floating-point values. These can be evaluated using this
8245 formula (with ^ for exponentiation):
8247 -1^s * n * 2^-r
8249 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8250 16 <= n <= 31 and 0 <= r <= 7.
8252 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8254 - A (most-significant) is the sign bit.
8255 - BCD are the exponent (encoded as r XOR 3).
8256 - EFGH are the mantissa (encoded as n - 16).
8259 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8260 fconst[sd] instruction, or -1 if X isn't suitable. */
8261 static int
8262 vfp3_const_double_index (rtx x)
8264 REAL_VALUE_TYPE r, m;
8265 int sign, exponent;
8266 unsigned HOST_WIDE_INT mantissa, mant_hi;
8267 unsigned HOST_WIDE_INT mask;
8268 HOST_WIDE_INT m1, m2;
8269 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8271 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8272 return -1;
8274 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8276 /* We can't represent these things, so detect them first. */
8277 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8278 return -1;
8280 /* Extract sign, exponent and mantissa. */
8281 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8282 r = real_value_abs (&r);
8283 exponent = REAL_EXP (&r);
8284 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8285 highest (sign) bit, with a fixed binary point at bit point_pos.
8286 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8287 bits for the mantissa, this may fail (low bits would be lost). */
8288 real_ldexp (&m, &r, point_pos - exponent);
8289 REAL_VALUE_TO_INT (&m1, &m2, m);
8290 mantissa = m1;
8291 mant_hi = m2;
8293 /* If there are bits set in the low part of the mantissa, we can't
8294 represent this value. */
8295 if (mantissa != 0)
8296 return -1;
8298 /* Now make it so that mantissa contains the most-significant bits, and move
8299 the point_pos to indicate that the least-significant bits have been
8300 discarded. */
8301 point_pos -= HOST_BITS_PER_WIDE_INT;
8302 mantissa = mant_hi;
8304 /* We can permit four significant bits of mantissa only, plus a high bit
8305 which is always 1. */
8306 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8307 if ((mantissa & mask) != 0)
8308 return -1;
8310 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8311 mantissa >>= point_pos - 5;
8313 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8314 floating-point immediate zero with Neon using an integer-zero load, but
8315 that case is handled elsewhere.) */
8316 if (mantissa == 0)
8317 return -1;
8319 gcc_assert (mantissa >= 16 && mantissa <= 31);
8321 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8322 normalized significands are in the range [1, 2). (Our mantissa is shifted
8323 left 4 places at this point relative to normalized IEEE754 values). GCC
8324 internally uses [0.5, 1) (see real.c), so the exponent returned from
8325 REAL_EXP must be altered. */
8326 exponent = 5 - exponent;
8328 if (exponent < 0 || exponent > 7)
8329 return -1;
8331 /* Sign, mantissa and exponent are now in the correct form to plug into the
8332 formula described in the comment above. */
8333 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8336 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8338 vfp3_const_double_rtx (rtx x)
8340 if (!TARGET_VFP3)
8341 return 0;
8343 return vfp3_const_double_index (x) != -1;
8346 /* Recognize immediates which can be used in various Neon instructions. Legal
8347 immediates are described by the following table (for VMVN variants, the
8348 bitwise inverse of the constant shown is recognized. In either case, VMOV
8349 is output and the correct instruction to use for a given constant is chosen
8350 by the assembler). The constant shown is replicated across all elements of
8351 the destination vector.
8353 insn elems variant constant (binary)
8354 ---- ----- ------- -----------------
8355 vmov i32 0 00000000 00000000 00000000 abcdefgh
8356 vmov i32 1 00000000 00000000 abcdefgh 00000000
8357 vmov i32 2 00000000 abcdefgh 00000000 00000000
8358 vmov i32 3 abcdefgh 00000000 00000000 00000000
8359 vmov i16 4 00000000 abcdefgh
8360 vmov i16 5 abcdefgh 00000000
8361 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8362 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8363 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8364 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8365 vmvn i16 10 00000000 abcdefgh
8366 vmvn i16 11 abcdefgh 00000000
8367 vmov i32 12 00000000 00000000 abcdefgh 11111111
8368 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8369 vmov i32 14 00000000 abcdefgh 11111111 11111111
8370 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8371 vmov i8 16 abcdefgh
8372 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8373 eeeeeeee ffffffff gggggggg hhhhhhhh
8374 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8376 For case 18, B = !b. Representable values are exactly those accepted by
8377 vfp3_const_double_index, but are output as floating-point numbers rather
8378 than indices.
8380 Variants 0-5 (inclusive) may also be used as immediates for the second
8381 operand of VORR/VBIC instructions.
8383 The INVERSE argument causes the bitwise inverse of the given operand to be
8384 recognized instead (used for recognizing legal immediates for the VAND/VORN
8385 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8386 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8387 output, rather than the real insns vbic/vorr).
8389 INVERSE makes no difference to the recognition of float vectors.
8391 The return value is the variant of immediate as shown in the above table, or
8392 -1 if the given value doesn't match any of the listed patterns.
8394 static int
8395 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8396 rtx *modconst, int *elementwidth)
8398 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8399 matches = 1; \
8400 for (i = 0; i < idx; i += (STRIDE)) \
8401 if (!(TEST)) \
8402 matches = 0; \
8403 if (matches) \
8405 immtype = (CLASS); \
8406 elsize = (ELSIZE); \
8407 break; \
8410 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8411 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8412 unsigned char bytes[16];
8413 int immtype = -1, matches;
8414 unsigned int invmask = inverse ? 0xff : 0;
8416 /* Vectors of float constants. */
8417 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8419 rtx el0 = CONST_VECTOR_ELT (op, 0);
8420 REAL_VALUE_TYPE r0;
8422 if (!vfp3_const_double_rtx (el0))
8423 return -1;
8425 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8427 for (i = 1; i < n_elts; i++)
8429 rtx elt = CONST_VECTOR_ELT (op, i);
8430 REAL_VALUE_TYPE re;
8432 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8434 if (!REAL_VALUES_EQUAL (r0, re))
8435 return -1;
8438 if (modconst)
8439 *modconst = CONST_VECTOR_ELT (op, 0);
8441 if (elementwidth)
8442 *elementwidth = 0;
8444 return 18;
8447 /* Splat vector constant out into a byte vector. */
8448 for (i = 0; i < n_elts; i++)
8450 rtx el = CONST_VECTOR_ELT (op, i);
8451 unsigned HOST_WIDE_INT elpart;
8452 unsigned int part, parts;
8454 if (GET_CODE (el) == CONST_INT)
8456 elpart = INTVAL (el);
8457 parts = 1;
8459 else if (GET_CODE (el) == CONST_DOUBLE)
8461 elpart = CONST_DOUBLE_LOW (el);
8462 parts = 2;
8464 else
8465 gcc_unreachable ();
8467 for (part = 0; part < parts; part++)
8469 unsigned int byte;
8470 for (byte = 0; byte < innersize; byte++)
8472 bytes[idx++] = (elpart & 0xff) ^ invmask;
8473 elpart >>= BITS_PER_UNIT;
8475 if (GET_CODE (el) == CONST_DOUBLE)
8476 elpart = CONST_DOUBLE_HIGH (el);
8480 /* Sanity check. */
8481 gcc_assert (idx == GET_MODE_SIZE (mode));
8485 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8486 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8488 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8489 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8491 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8492 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8494 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8495 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8497 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8499 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8501 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8502 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8504 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8505 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8507 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8508 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8510 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8511 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8513 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8515 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8517 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8518 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8520 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8521 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8523 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8524 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8526 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8527 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8529 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8531 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8532 && bytes[i] == bytes[(i + 8) % idx]);
8534 while (0);
8536 if (immtype == -1)
8537 return -1;
8539 if (elementwidth)
8540 *elementwidth = elsize;
8542 if (modconst)
8544 unsigned HOST_WIDE_INT imm = 0;
8546 /* Un-invert bytes of recognized vector, if necessary. */
8547 if (invmask != 0)
8548 for (i = 0; i < idx; i++)
8549 bytes[i] ^= invmask;
8551 if (immtype == 17)
8553 /* FIXME: Broken on 32-bit H_W_I hosts. */
8554 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8556 for (i = 0; i < 8; i++)
8557 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8558 << (i * BITS_PER_UNIT);
8560 *modconst = GEN_INT (imm);
8562 else
8564 unsigned HOST_WIDE_INT imm = 0;
8566 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8567 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8569 *modconst = GEN_INT (imm);
8573 return immtype;
8574 #undef CHECK
8577 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8578 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8579 float elements), and a modified constant (whatever should be output for a
8580 VMOV) in *MODCONST. */
8583 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8584 rtx *modconst, int *elementwidth)
8586 rtx tmpconst;
8587 int tmpwidth;
8588 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8590 if (retval == -1)
8591 return 0;
8593 if (modconst)
8594 *modconst = tmpconst;
8596 if (elementwidth)
8597 *elementwidth = tmpwidth;
8599 return 1;
8602 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8603 the immediate is valid, write a constant suitable for using as an operand
8604 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8605 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8608 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8609 rtx *modconst, int *elementwidth)
8611 rtx tmpconst;
8612 int tmpwidth;
8613 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8615 if (retval < 0 || retval > 5)
8616 return 0;
8618 if (modconst)
8619 *modconst = tmpconst;
8621 if (elementwidth)
8622 *elementwidth = tmpwidth;
8624 return 1;
8627 /* Return a string suitable for output of Neon immediate logic operation
8628 MNEM. */
8630 char *
8631 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8632 int inverse, int quad)
8634 int width, is_valid;
8635 static char templ[40];
8637 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8639 gcc_assert (is_valid != 0);
8641 if (quad)
8642 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8643 else
8644 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8646 return templ;
8649 /* Output a sequence of pairwise operations to implement a reduction.
8650 NOTE: We do "too much work" here, because pairwise operations work on two
8651 registers-worth of operands in one go. Unfortunately we can't exploit those
8652 extra calculations to do the full operation in fewer steps, I don't think.
8653 Although all vector elements of the result but the first are ignored, we
8654 actually calculate the same result in each of the elements. An alternative
8655 such as initially loading a vector with zero to use as each of the second
8656 operands would use up an additional register and take an extra instruction,
8657 for no particular gain. */
8659 void
8660 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8661 rtx (*reduc) (rtx, rtx, rtx))
8663 enum machine_mode inner = GET_MODE_INNER (mode);
8664 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8665 rtx tmpsum = op1;
8667 for (i = parts / 2; i >= 1; i /= 2)
8669 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8670 emit_insn (reduc (dest, tmpsum, tmpsum));
8671 tmpsum = dest;
8675 /* If VALS is a vector constant that can be loaded into a register
8676 using VDUP, generate instructions to do so and return an RTX to
8677 assign to the register. Otherwise return NULL_RTX. */
8679 static rtx
8680 neon_vdup_constant (rtx vals)
8682 enum machine_mode mode = GET_MODE (vals);
8683 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8684 int n_elts = GET_MODE_NUNITS (mode);
8685 bool all_same = true;
8686 rtx x;
8687 int i;
8689 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8690 return NULL_RTX;
8692 for (i = 0; i < n_elts; ++i)
8694 x = XVECEXP (vals, 0, i);
8695 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8696 all_same = false;
8699 if (!all_same)
8700 /* The elements are not all the same. We could handle repeating
8701 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8702 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8703 vdup.i16). */
8704 return NULL_RTX;
8706 /* We can load this constant by using VDUP and a constant in a
8707 single ARM register. This will be cheaper than a vector
8708 load. */
8710 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8711 return gen_rtx_VEC_DUPLICATE (mode, x);
8714 /* Generate code to load VALS, which is a PARALLEL containing only
8715 constants (for vec_init) or CONST_VECTOR, efficiently into a
8716 register. Returns an RTX to copy into the register, or NULL_RTX
8717 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8720 neon_make_constant (rtx vals)
8722 enum machine_mode mode = GET_MODE (vals);
8723 rtx target;
8724 rtx const_vec = NULL_RTX;
8725 int n_elts = GET_MODE_NUNITS (mode);
8726 int n_const = 0;
8727 int i;
8729 if (GET_CODE (vals) == CONST_VECTOR)
8730 const_vec = vals;
8731 else if (GET_CODE (vals) == PARALLEL)
8733 /* A CONST_VECTOR must contain only CONST_INTs and
8734 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8735 Only store valid constants in a CONST_VECTOR. */
8736 for (i = 0; i < n_elts; ++i)
8738 rtx x = XVECEXP (vals, 0, i);
8739 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8740 n_const++;
8742 if (n_const == n_elts)
8743 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8745 else
8746 gcc_unreachable ();
8748 if (const_vec != NULL
8749 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8750 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8751 return const_vec;
8752 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8753 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8754 pipeline cycle; creating the constant takes one or two ARM
8755 pipeline cycles. */
8756 return target;
8757 else if (const_vec != NULL_RTX)
8758 /* Load from constant pool. On Cortex-A8 this takes two cycles
8759 (for either double or quad vectors). We can not take advantage
8760 of single-cycle VLD1 because we need a PC-relative addressing
8761 mode. */
8762 return const_vec;
8763 else
8764 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8765 We can not construct an initializer. */
8766 return NULL_RTX;
8769 /* Initialize vector TARGET to VALS. */
8771 void
8772 neon_expand_vector_init (rtx target, rtx vals)
8774 enum machine_mode mode = GET_MODE (target);
8775 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8776 int n_elts = GET_MODE_NUNITS (mode);
8777 int n_var = 0, one_var = -1;
8778 bool all_same = true;
8779 rtx x, mem;
8780 int i;
8782 for (i = 0; i < n_elts; ++i)
8784 x = XVECEXP (vals, 0, i);
8785 if (!CONSTANT_P (x))
8786 ++n_var, one_var = i;
8788 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8789 all_same = false;
8792 if (n_var == 0)
8794 rtx constant = neon_make_constant (vals);
8795 if (constant != NULL_RTX)
8797 emit_move_insn (target, constant);
8798 return;
8802 /* Splat a single non-constant element if we can. */
8803 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8805 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8806 emit_insn (gen_rtx_SET (VOIDmode, target,
8807 gen_rtx_VEC_DUPLICATE (mode, x)));
8808 return;
8811 /* One field is non-constant. Load constant then overwrite varying
8812 field. This is more efficient than using the stack. */
8813 if (n_var == 1)
8815 rtx copy = copy_rtx (vals);
8816 rtx index = GEN_INT (one_var);
8818 /* Load constant part of vector, substitute neighboring value for
8819 varying element. */
8820 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8821 neon_expand_vector_init (target, copy);
8823 /* Insert variable. */
8824 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8825 switch (mode)
8827 case V8QImode:
8828 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8829 break;
8830 case V16QImode:
8831 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8832 break;
8833 case V4HImode:
8834 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8835 break;
8836 case V8HImode:
8837 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8838 break;
8839 case V2SImode:
8840 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8841 break;
8842 case V4SImode:
8843 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8844 break;
8845 case V2SFmode:
8846 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8847 break;
8848 case V4SFmode:
8849 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8850 break;
8851 case V2DImode:
8852 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8853 break;
8854 default:
8855 gcc_unreachable ();
8857 return;
8860 /* Construct the vector in memory one field at a time
8861 and load the whole vector. */
8862 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8863 for (i = 0; i < n_elts; i++)
8864 emit_move_insn (adjust_address_nv (mem, inner_mode,
8865 i * GET_MODE_SIZE (inner_mode)),
8866 XVECEXP (vals, 0, i));
8867 emit_move_insn (target, mem);
8870 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8871 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8872 reported source locations are bogus. */
8874 static void
8875 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8876 const char *err)
8878 HOST_WIDE_INT lane;
8880 gcc_assert (GET_CODE (operand) == CONST_INT);
8882 lane = INTVAL (operand);
8884 if (lane < low || lane >= high)
8885 error (err);
8888 /* Bounds-check lanes. */
8890 void
8891 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8893 bounds_check (operand, low, high, "lane out of range");
8896 /* Bounds-check constants. */
8898 void
8899 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8901 bounds_check (operand, low, high, "constant out of range");
8904 HOST_WIDE_INT
8905 neon_element_bits (enum machine_mode mode)
8907 if (mode == DImode)
8908 return GET_MODE_BITSIZE (mode);
8909 else
8910 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8914 /* Predicates for `match_operand' and `match_operator'. */
8916 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8918 cirrus_memory_offset (rtx op)
8920 /* Reject eliminable registers. */
8921 if (! (reload_in_progress || reload_completed)
8922 && ( reg_mentioned_p (frame_pointer_rtx, op)
8923 || reg_mentioned_p (arg_pointer_rtx, op)
8924 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8925 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8926 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8927 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8928 return 0;
8930 if (GET_CODE (op) == MEM)
8932 rtx ind;
8934 ind = XEXP (op, 0);
8936 /* Match: (mem (reg)). */
8937 if (GET_CODE (ind) == REG)
8938 return 1;
8940 /* Match:
8941 (mem (plus (reg)
8942 (const))). */
8943 if (GET_CODE (ind) == PLUS
8944 && GET_CODE (XEXP (ind, 0)) == REG
8945 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8946 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8947 return 1;
8950 return 0;
8953 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8954 WB is true if full writeback address modes are allowed and is false
8955 if limited writeback address modes (POST_INC and PRE_DEC) are
8956 allowed. */
8959 arm_coproc_mem_operand (rtx op, bool wb)
8961 rtx ind;
8963 /* Reject eliminable registers. */
8964 if (! (reload_in_progress || reload_completed)
8965 && ( reg_mentioned_p (frame_pointer_rtx, op)
8966 || reg_mentioned_p (arg_pointer_rtx, op)
8967 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8968 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8969 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8970 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8971 return FALSE;
8973 /* Constants are converted into offsets from labels. */
8974 if (GET_CODE (op) != MEM)
8975 return FALSE;
8977 ind = XEXP (op, 0);
8979 if (reload_completed
8980 && (GET_CODE (ind) == LABEL_REF
8981 || (GET_CODE (ind) == CONST
8982 && GET_CODE (XEXP (ind, 0)) == PLUS
8983 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8984 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8985 return TRUE;
8987 /* Match: (mem (reg)). */
8988 if (GET_CODE (ind) == REG)
8989 return arm_address_register_rtx_p (ind, 0);
8991 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8992 acceptable in any case (subject to verification by
8993 arm_address_register_rtx_p). We need WB to be true to accept
8994 PRE_INC and POST_DEC. */
8995 if (GET_CODE (ind) == POST_INC
8996 || GET_CODE (ind) == PRE_DEC
8997 || (wb
8998 && (GET_CODE (ind) == PRE_INC
8999 || GET_CODE (ind) == POST_DEC)))
9000 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9002 if (wb
9003 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9004 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9005 && GET_CODE (XEXP (ind, 1)) == PLUS
9006 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9007 ind = XEXP (ind, 1);
9009 /* Match:
9010 (plus (reg)
9011 (const)). */
9012 if (GET_CODE (ind) == PLUS
9013 && GET_CODE (XEXP (ind, 0)) == REG
9014 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9015 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9016 && INTVAL (XEXP (ind, 1)) > -1024
9017 && INTVAL (XEXP (ind, 1)) < 1024
9018 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9019 return TRUE;
9021 return FALSE;
9024 /* Return TRUE if OP is a memory operand which we can load or store a vector
9025 to/from. TYPE is one of the following values:
9026 0 - Vector load/stor (vldr)
9027 1 - Core registers (ldm)
9028 2 - Element/structure loads (vld1)
9031 neon_vector_mem_operand (rtx op, int type)
9033 rtx ind;
9035 /* Reject eliminable registers. */
9036 if (! (reload_in_progress || reload_completed)
9037 && ( reg_mentioned_p (frame_pointer_rtx, op)
9038 || reg_mentioned_p (arg_pointer_rtx, op)
9039 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9040 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9041 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9042 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9043 return FALSE;
9045 /* Constants are converted into offsets from labels. */
9046 if (GET_CODE (op) != MEM)
9047 return FALSE;
9049 ind = XEXP (op, 0);
9051 if (reload_completed
9052 && (GET_CODE (ind) == LABEL_REF
9053 || (GET_CODE (ind) == CONST
9054 && GET_CODE (XEXP (ind, 0)) == PLUS
9055 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9056 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9057 return TRUE;
9059 /* Match: (mem (reg)). */
9060 if (GET_CODE (ind) == REG)
9061 return arm_address_register_rtx_p (ind, 0);
9063 /* Allow post-increment with Neon registers. */
9064 if ((type != 1 && GET_CODE (ind) == POST_INC)
9065 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9066 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9068 /* FIXME: vld1 allows register post-modify. */
9070 /* Match:
9071 (plus (reg)
9072 (const)). */
9073 if (type == 0
9074 && GET_CODE (ind) == PLUS
9075 && GET_CODE (XEXP (ind, 0)) == REG
9076 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9077 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9078 && INTVAL (XEXP (ind, 1)) > -1024
9079 && INTVAL (XEXP (ind, 1)) < 1016
9080 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9081 return TRUE;
9083 return FALSE;
9086 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9087 type. */
9089 neon_struct_mem_operand (rtx op)
9091 rtx ind;
9093 /* Reject eliminable registers. */
9094 if (! (reload_in_progress || reload_completed)
9095 && ( reg_mentioned_p (frame_pointer_rtx, op)
9096 || reg_mentioned_p (arg_pointer_rtx, op)
9097 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9098 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9099 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9100 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9101 return FALSE;
9103 /* Constants are converted into offsets from labels. */
9104 if (GET_CODE (op) != MEM)
9105 return FALSE;
9107 ind = XEXP (op, 0);
9109 if (reload_completed
9110 && (GET_CODE (ind) == LABEL_REF
9111 || (GET_CODE (ind) == CONST
9112 && GET_CODE (XEXP (ind, 0)) == PLUS
9113 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9114 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9115 return TRUE;
9117 /* Match: (mem (reg)). */
9118 if (GET_CODE (ind) == REG)
9119 return arm_address_register_rtx_p (ind, 0);
9121 return FALSE;
9124 /* Return true if X is a register that will be eliminated later on. */
9126 arm_eliminable_register (rtx x)
9128 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9129 || REGNO (x) == ARG_POINTER_REGNUM
9130 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9131 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9134 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9135 coprocessor registers. Otherwise return NO_REGS. */
9137 enum reg_class
9138 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9140 if (mode == HFmode)
9142 if (!TARGET_NEON_FP16)
9143 return GENERAL_REGS;
9144 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9145 return NO_REGS;
9146 return GENERAL_REGS;
9149 if (TARGET_NEON
9150 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9151 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9152 && neon_vector_mem_operand (x, 0))
9153 return NO_REGS;
9155 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9156 return NO_REGS;
9158 return GENERAL_REGS;
9161 /* Values which must be returned in the most-significant end of the return
9162 register. */
9164 static bool
9165 arm_return_in_msb (const_tree valtype)
9167 return (TARGET_AAPCS_BASED
9168 && BYTES_BIG_ENDIAN
9169 && (AGGREGATE_TYPE_P (valtype)
9170 || TREE_CODE (valtype) == COMPLEX_TYPE));
9173 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9174 Use by the Cirrus Maverick code which has to workaround
9175 a hardware bug triggered by such instructions. */
9176 static bool
9177 arm_memory_load_p (rtx insn)
9179 rtx body, lhs, rhs;;
9181 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9182 return false;
9184 body = PATTERN (insn);
9186 if (GET_CODE (body) != SET)
9187 return false;
9189 lhs = XEXP (body, 0);
9190 rhs = XEXP (body, 1);
9192 lhs = REG_OR_SUBREG_RTX (lhs);
9194 /* If the destination is not a general purpose
9195 register we do not have to worry. */
9196 if (GET_CODE (lhs) != REG
9197 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9198 return false;
9200 /* As well as loads from memory we also have to react
9201 to loads of invalid constants which will be turned
9202 into loads from the minipool. */
9203 return (GET_CODE (rhs) == MEM
9204 || GET_CODE (rhs) == SYMBOL_REF
9205 || note_invalid_constants (insn, -1, false));
9208 /* Return TRUE if INSN is a Cirrus instruction. */
9209 static bool
9210 arm_cirrus_insn_p (rtx insn)
9212 enum attr_cirrus attr;
9214 /* get_attr cannot accept USE or CLOBBER. */
9215 if (!insn
9216 || GET_CODE (insn) != INSN
9217 || GET_CODE (PATTERN (insn)) == USE
9218 || GET_CODE (PATTERN (insn)) == CLOBBER)
9219 return 0;
9221 attr = get_attr_cirrus (insn);
9223 return attr != CIRRUS_NOT;
9226 /* Cirrus reorg for invalid instruction combinations. */
9227 static void
9228 cirrus_reorg (rtx first)
9230 enum attr_cirrus attr;
9231 rtx body = PATTERN (first);
9232 rtx t;
9233 int nops;
9235 /* Any branch must be followed by 2 non Cirrus instructions. */
9236 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9238 nops = 0;
9239 t = next_nonnote_insn (first);
9241 if (arm_cirrus_insn_p (t))
9242 ++ nops;
9244 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9245 ++ nops;
9247 while (nops --)
9248 emit_insn_after (gen_nop (), first);
9250 return;
9253 /* (float (blah)) is in parallel with a clobber. */
9254 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9255 body = XVECEXP (body, 0, 0);
9257 if (GET_CODE (body) == SET)
9259 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9261 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9262 be followed by a non Cirrus insn. */
9263 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9265 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9266 emit_insn_after (gen_nop (), first);
9268 return;
9270 else if (arm_memory_load_p (first))
9272 unsigned int arm_regno;
9274 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9275 ldr/cfmv64hr combination where the Rd field is the same
9276 in both instructions must be split with a non Cirrus
9277 insn. Example:
9279 ldr r0, blah
9281 cfmvsr mvf0, r0. */
9283 /* Get Arm register number for ldr insn. */
9284 if (GET_CODE (lhs) == REG)
9285 arm_regno = REGNO (lhs);
9286 else
9288 gcc_assert (GET_CODE (rhs) == REG);
9289 arm_regno = REGNO (rhs);
9292 /* Next insn. */
9293 first = next_nonnote_insn (first);
9295 if (! arm_cirrus_insn_p (first))
9296 return;
9298 body = PATTERN (first);
9300 /* (float (blah)) is in parallel with a clobber. */
9301 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9302 body = XVECEXP (body, 0, 0);
9304 if (GET_CODE (body) == FLOAT)
9305 body = XEXP (body, 0);
9307 if (get_attr_cirrus (first) == CIRRUS_MOVE
9308 && GET_CODE (XEXP (body, 1)) == REG
9309 && arm_regno == REGNO (XEXP (body, 1)))
9310 emit_insn_after (gen_nop (), first);
9312 return;
9316 /* get_attr cannot accept USE or CLOBBER. */
9317 if (!first
9318 || GET_CODE (first) != INSN
9319 || GET_CODE (PATTERN (first)) == USE
9320 || GET_CODE (PATTERN (first)) == CLOBBER)
9321 return;
9323 attr = get_attr_cirrus (first);
9325 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9326 must be followed by a non-coprocessor instruction. */
9327 if (attr == CIRRUS_COMPARE)
9329 nops = 0;
9331 t = next_nonnote_insn (first);
9333 if (arm_cirrus_insn_p (t))
9334 ++ nops;
9336 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9337 ++ nops;
9339 while (nops --)
9340 emit_insn_after (gen_nop (), first);
9342 return;
9346 /* Return TRUE if X references a SYMBOL_REF. */
9348 symbol_mentioned_p (rtx x)
9350 const char * fmt;
9351 int i;
9353 if (GET_CODE (x) == SYMBOL_REF)
9354 return 1;
9356 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9357 are constant offsets, not symbols. */
9358 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9359 return 0;
9361 fmt = GET_RTX_FORMAT (GET_CODE (x));
9363 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9365 if (fmt[i] == 'E')
9367 int j;
9369 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9370 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9371 return 1;
9373 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9374 return 1;
9377 return 0;
9380 /* Return TRUE if X references a LABEL_REF. */
9382 label_mentioned_p (rtx x)
9384 const char * fmt;
9385 int i;
9387 if (GET_CODE (x) == LABEL_REF)
9388 return 1;
9390 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9391 instruction, but they are constant offsets, not symbols. */
9392 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9393 return 0;
9395 fmt = GET_RTX_FORMAT (GET_CODE (x));
9396 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9398 if (fmt[i] == 'E')
9400 int j;
9402 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9403 if (label_mentioned_p (XVECEXP (x, i, j)))
9404 return 1;
9406 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9407 return 1;
9410 return 0;
9414 tls_mentioned_p (rtx x)
9416 switch (GET_CODE (x))
9418 case CONST:
9419 return tls_mentioned_p (XEXP (x, 0));
9421 case UNSPEC:
9422 if (XINT (x, 1) == UNSPEC_TLS)
9423 return 1;
9425 default:
9426 return 0;
9430 /* Must not copy any rtx that uses a pc-relative address. */
9432 static int
9433 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9435 if (GET_CODE (*x) == UNSPEC
9436 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9437 return 1;
9438 return 0;
9441 static bool
9442 arm_cannot_copy_insn_p (rtx insn)
9444 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9447 enum rtx_code
9448 minmax_code (rtx x)
9450 enum rtx_code code = GET_CODE (x);
9452 switch (code)
9454 case SMAX:
9455 return GE;
9456 case SMIN:
9457 return LE;
9458 case UMIN:
9459 return LEU;
9460 case UMAX:
9461 return GEU;
9462 default:
9463 gcc_unreachable ();
9467 /* Return 1 if memory locations are adjacent. */
9469 adjacent_mem_locations (rtx a, rtx b)
9471 /* We don't guarantee to preserve the order of these memory refs. */
9472 if (volatile_refs_p (a) || volatile_refs_p (b))
9473 return 0;
9475 if ((GET_CODE (XEXP (a, 0)) == REG
9476 || (GET_CODE (XEXP (a, 0)) == PLUS
9477 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9478 && (GET_CODE (XEXP (b, 0)) == REG
9479 || (GET_CODE (XEXP (b, 0)) == PLUS
9480 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9482 HOST_WIDE_INT val0 = 0, val1 = 0;
9483 rtx reg0, reg1;
9484 int val_diff;
9486 if (GET_CODE (XEXP (a, 0)) == PLUS)
9488 reg0 = XEXP (XEXP (a, 0), 0);
9489 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9491 else
9492 reg0 = XEXP (a, 0);
9494 if (GET_CODE (XEXP (b, 0)) == PLUS)
9496 reg1 = XEXP (XEXP (b, 0), 0);
9497 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9499 else
9500 reg1 = XEXP (b, 0);
9502 /* Don't accept any offset that will require multiple
9503 instructions to handle, since this would cause the
9504 arith_adjacentmem pattern to output an overlong sequence. */
9505 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9506 return 0;
9508 /* Don't allow an eliminable register: register elimination can make
9509 the offset too large. */
9510 if (arm_eliminable_register (reg0))
9511 return 0;
9513 val_diff = val1 - val0;
9515 if (arm_ld_sched)
9517 /* If the target has load delay slots, then there's no benefit
9518 to using an ldm instruction unless the offset is zero and
9519 we are optimizing for size. */
9520 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9521 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9522 && (val_diff == 4 || val_diff == -4));
9525 return ((REGNO (reg0) == REGNO (reg1))
9526 && (val_diff == 4 || val_diff == -4));
9529 return 0;
9532 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9533 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9534 instruction. ADD_OFFSET is nonzero if the base address register needs
9535 to be modified with an add instruction before we can use it. */
9537 static bool
9538 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9539 int nops, HOST_WIDE_INT add_offset)
9541 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9542 if the offset isn't small enough. The reason 2 ldrs are faster
9543 is because these ARMs are able to do more than one cache access
9544 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9545 whilst the ARM8 has a double bandwidth cache. This means that
9546 these cores can do both an instruction fetch and a data fetch in
9547 a single cycle, so the trick of calculating the address into a
9548 scratch register (one of the result regs) and then doing a load
9549 multiple actually becomes slower (and no smaller in code size).
9550 That is the transformation
9552 ldr rd1, [rbase + offset]
9553 ldr rd2, [rbase + offset + 4]
9557 add rd1, rbase, offset
9558 ldmia rd1, {rd1, rd2}
9560 produces worse code -- '3 cycles + any stalls on rd2' instead of
9561 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9562 access per cycle, the first sequence could never complete in less
9563 than 6 cycles, whereas the ldm sequence would only take 5 and
9564 would make better use of sequential accesses if not hitting the
9565 cache.
9567 We cheat here and test 'arm_ld_sched' which we currently know to
9568 only be true for the ARM8, ARM9 and StrongARM. If this ever
9569 changes, then the test below needs to be reworked. */
9570 if (nops == 2 && arm_ld_sched && add_offset != 0)
9571 return false;
9573 /* XScale has load-store double instructions, but they have stricter
9574 alignment requirements than load-store multiple, so we cannot
9575 use them.
9577 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9578 the pipeline until completion.
9580 NREGS CYCLES
9586 An ldr instruction takes 1-3 cycles, but does not block the
9587 pipeline.
9589 NREGS CYCLES
9590 1 1-3
9591 2 2-6
9592 3 3-9
9593 4 4-12
9595 Best case ldr will always win. However, the more ldr instructions
9596 we issue, the less likely we are to be able to schedule them well.
9597 Using ldr instructions also increases code size.
9599 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9600 for counts of 3 or 4 regs. */
9601 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9602 return false;
9603 return true;
9606 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9607 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9608 an array ORDER which describes the sequence to use when accessing the
9609 offsets that produces an ascending order. In this sequence, each
9610 offset must be larger by exactly 4 than the previous one. ORDER[0]
9611 must have been filled in with the lowest offset by the caller.
9612 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9613 we use to verify that ORDER produces an ascending order of registers.
9614 Return true if it was possible to construct such an order, false if
9615 not. */
9617 static bool
9618 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9619 int *unsorted_regs)
9621 int i;
9622 for (i = 1; i < nops; i++)
9624 int j;
9626 order[i] = order[i - 1];
9627 for (j = 0; j < nops; j++)
9628 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9630 /* We must find exactly one offset that is higher than the
9631 previous one by 4. */
9632 if (order[i] != order[i - 1])
9633 return false;
9634 order[i] = j;
9636 if (order[i] == order[i - 1])
9637 return false;
9638 /* The register numbers must be ascending. */
9639 if (unsorted_regs != NULL
9640 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9641 return false;
9643 return true;
9646 /* Used to determine in a peephole whether a sequence of load
9647 instructions can be changed into a load-multiple instruction.
9648 NOPS is the number of separate load instructions we are examining. The
9649 first NOPS entries in OPERANDS are the destination registers, the
9650 next NOPS entries are memory operands. If this function is
9651 successful, *BASE is set to the common base register of the memory
9652 accesses; *LOAD_OFFSET is set to the first memory location's offset
9653 from that base register.
9654 REGS is an array filled in with the destination register numbers.
9655 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9656 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9657 the sequence of registers in REGS matches the loads from ascending memory
9658 locations, and the function verifies that the register numbers are
9659 themselves ascending. If CHECK_REGS is false, the register numbers
9660 are stored in the order they are found in the operands. */
9661 static int
9662 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9663 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9665 int unsorted_regs[MAX_LDM_STM_OPS];
9666 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9667 int order[MAX_LDM_STM_OPS];
9668 rtx base_reg_rtx = NULL;
9669 int base_reg = -1;
9670 int i, ldm_case;
9672 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9673 easily extended if required. */
9674 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9676 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9678 /* Loop over the operands and check that the memory references are
9679 suitable (i.e. immediate offsets from the same base register). At
9680 the same time, extract the target register, and the memory
9681 offsets. */
9682 for (i = 0; i < nops; i++)
9684 rtx reg;
9685 rtx offset;
9687 /* Convert a subreg of a mem into the mem itself. */
9688 if (GET_CODE (operands[nops + i]) == SUBREG)
9689 operands[nops + i] = alter_subreg (operands + (nops + i));
9691 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9693 /* Don't reorder volatile memory references; it doesn't seem worth
9694 looking for the case where the order is ok anyway. */
9695 if (MEM_VOLATILE_P (operands[nops + i]))
9696 return 0;
9698 offset = const0_rtx;
9700 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9701 || (GET_CODE (reg) == SUBREG
9702 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9703 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9704 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9705 == REG)
9706 || (GET_CODE (reg) == SUBREG
9707 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9708 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9709 == CONST_INT)))
9711 if (i == 0)
9713 base_reg = REGNO (reg);
9714 base_reg_rtx = reg;
9715 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9716 return 0;
9718 else if (base_reg != (int) REGNO (reg))
9719 /* Not addressed from the same base register. */
9720 return 0;
9722 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9723 ? REGNO (operands[i])
9724 : REGNO (SUBREG_REG (operands[i])));
9726 /* If it isn't an integer register, or if it overwrites the
9727 base register but isn't the last insn in the list, then
9728 we can't do this. */
9729 if (unsorted_regs[i] < 0
9730 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9731 || unsorted_regs[i] > 14
9732 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9733 return 0;
9735 unsorted_offsets[i] = INTVAL (offset);
9736 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9737 order[0] = i;
9739 else
9740 /* Not a suitable memory address. */
9741 return 0;
9744 /* All the useful information has now been extracted from the
9745 operands into unsorted_regs and unsorted_offsets; additionally,
9746 order[0] has been set to the lowest offset in the list. Sort
9747 the offsets into order, verifying that they are adjacent, and
9748 check that the register numbers are ascending. */
9749 if (!compute_offset_order (nops, unsorted_offsets, order,
9750 check_regs ? unsorted_regs : NULL))
9751 return 0;
9753 if (saved_order)
9754 memcpy (saved_order, order, sizeof order);
9756 if (base)
9758 *base = base_reg;
9760 for (i = 0; i < nops; i++)
9761 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9763 *load_offset = unsorted_offsets[order[0]];
9766 if (TARGET_THUMB1
9767 && !peep2_reg_dead_p (nops, base_reg_rtx))
9768 return 0;
9770 if (unsorted_offsets[order[0]] == 0)
9771 ldm_case = 1; /* ldmia */
9772 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9773 ldm_case = 2; /* ldmib */
9774 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9775 ldm_case = 3; /* ldmda */
9776 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9777 ldm_case = 4; /* ldmdb */
9778 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9779 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9780 ldm_case = 5;
9781 else
9782 return 0;
9784 if (!multiple_operation_profitable_p (false, nops,
9785 ldm_case == 5
9786 ? unsorted_offsets[order[0]] : 0))
9787 return 0;
9789 return ldm_case;
9792 /* Used to determine in a peephole whether a sequence of store instructions can
9793 be changed into a store-multiple instruction.
9794 NOPS is the number of separate store instructions we are examining.
9795 NOPS_TOTAL is the total number of instructions recognized by the peephole
9796 pattern.
9797 The first NOPS entries in OPERANDS are the source registers, the next
9798 NOPS entries are memory operands. If this function is successful, *BASE is
9799 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9800 to the first memory location's offset from that base register. REGS is an
9801 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9802 likewise filled with the corresponding rtx's.
9803 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9804 numbers to to an ascending order of stores.
9805 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9806 from ascending memory locations, and the function verifies that the register
9807 numbers are themselves ascending. If CHECK_REGS is false, the register
9808 numbers are stored in the order they are found in the operands. */
9809 static int
9810 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9811 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9812 HOST_WIDE_INT *load_offset, bool check_regs)
9814 int unsorted_regs[MAX_LDM_STM_OPS];
9815 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9816 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9817 int order[MAX_LDM_STM_OPS];
9818 int base_reg = -1;
9819 rtx base_reg_rtx = NULL;
9820 int i, stm_case;
9822 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9823 easily extended if required. */
9824 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9826 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9828 /* Loop over the operands and check that the memory references are
9829 suitable (i.e. immediate offsets from the same base register). At
9830 the same time, extract the target register, and the memory
9831 offsets. */
9832 for (i = 0; i < nops; i++)
9834 rtx reg;
9835 rtx offset;
9837 /* Convert a subreg of a mem into the mem itself. */
9838 if (GET_CODE (operands[nops + i]) == SUBREG)
9839 operands[nops + i] = alter_subreg (operands + (nops + i));
9841 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9843 /* Don't reorder volatile memory references; it doesn't seem worth
9844 looking for the case where the order is ok anyway. */
9845 if (MEM_VOLATILE_P (operands[nops + i]))
9846 return 0;
9848 offset = const0_rtx;
9850 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9851 || (GET_CODE (reg) == SUBREG
9852 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9853 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9854 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9855 == REG)
9856 || (GET_CODE (reg) == SUBREG
9857 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9858 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9859 == CONST_INT)))
9861 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9862 ? operands[i] : SUBREG_REG (operands[i]));
9863 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9865 if (i == 0)
9867 base_reg = REGNO (reg);
9868 base_reg_rtx = reg;
9869 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9870 return 0;
9872 else if (base_reg != (int) REGNO (reg))
9873 /* Not addressed from the same base register. */
9874 return 0;
9876 /* If it isn't an integer register, then we can't do this. */
9877 if (unsorted_regs[i] < 0
9878 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9879 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9880 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9881 || unsorted_regs[i] > 14)
9882 return 0;
9884 unsorted_offsets[i] = INTVAL (offset);
9885 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9886 order[0] = i;
9888 else
9889 /* Not a suitable memory address. */
9890 return 0;
9893 /* All the useful information has now been extracted from the
9894 operands into unsorted_regs and unsorted_offsets; additionally,
9895 order[0] has been set to the lowest offset in the list. Sort
9896 the offsets into order, verifying that they are adjacent, and
9897 check that the register numbers are ascending. */
9898 if (!compute_offset_order (nops, unsorted_offsets, order,
9899 check_regs ? unsorted_regs : NULL))
9900 return 0;
9902 if (saved_order)
9903 memcpy (saved_order, order, sizeof order);
9905 if (base)
9907 *base = base_reg;
9909 for (i = 0; i < nops; i++)
9911 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9912 if (reg_rtxs)
9913 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9916 *load_offset = unsorted_offsets[order[0]];
9919 if (TARGET_THUMB1
9920 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9921 return 0;
9923 if (unsorted_offsets[order[0]] == 0)
9924 stm_case = 1; /* stmia */
9925 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9926 stm_case = 2; /* stmib */
9927 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9928 stm_case = 3; /* stmda */
9929 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9930 stm_case = 4; /* stmdb */
9931 else
9932 return 0;
9934 if (!multiple_operation_profitable_p (false, nops, 0))
9935 return 0;
9937 return stm_case;
9940 /* Routines for use in generating RTL. */
9942 /* Generate a load-multiple instruction. COUNT is the number of loads in
9943 the instruction; REGS and MEMS are arrays containing the operands.
9944 BASEREG is the base register to be used in addressing the memory operands.
9945 WBACK_OFFSET is nonzero if the instruction should update the base
9946 register. */
9948 static rtx
9949 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9950 HOST_WIDE_INT wback_offset)
9952 int i = 0, j;
9953 rtx result;
9955 if (!multiple_operation_profitable_p (false, count, 0))
9957 rtx seq;
9959 start_sequence ();
9961 for (i = 0; i < count; i++)
9962 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9964 if (wback_offset != 0)
9965 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9967 seq = get_insns ();
9968 end_sequence ();
9970 return seq;
9973 result = gen_rtx_PARALLEL (VOIDmode,
9974 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9975 if (wback_offset != 0)
9977 XVECEXP (result, 0, 0)
9978 = gen_rtx_SET (VOIDmode, basereg,
9979 plus_constant (basereg, wback_offset));
9980 i = 1;
9981 count++;
9984 for (j = 0; i < count; i++, j++)
9985 XVECEXP (result, 0, i)
9986 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9988 return result;
9991 /* Generate a store-multiple instruction. COUNT is the number of stores in
9992 the instruction; REGS and MEMS are arrays containing the operands.
9993 BASEREG is the base register to be used in addressing the memory operands.
9994 WBACK_OFFSET is nonzero if the instruction should update the base
9995 register. */
9997 static rtx
9998 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9999 HOST_WIDE_INT wback_offset)
10001 int i = 0, j;
10002 rtx result;
10004 if (GET_CODE (basereg) == PLUS)
10005 basereg = XEXP (basereg, 0);
10007 if (!multiple_operation_profitable_p (false, count, 0))
10009 rtx seq;
10011 start_sequence ();
10013 for (i = 0; i < count; i++)
10014 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10016 if (wback_offset != 0)
10017 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10019 seq = get_insns ();
10020 end_sequence ();
10022 return seq;
10025 result = gen_rtx_PARALLEL (VOIDmode,
10026 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10027 if (wback_offset != 0)
10029 XVECEXP (result, 0, 0)
10030 = gen_rtx_SET (VOIDmode, basereg,
10031 plus_constant (basereg, wback_offset));
10032 i = 1;
10033 count++;
10036 for (j = 0; i < count; i++, j++)
10037 XVECEXP (result, 0, i)
10038 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10040 return result;
10043 /* Generate either a load-multiple or a store-multiple instruction. This
10044 function can be used in situations where we can start with a single MEM
10045 rtx and adjust its address upwards.
10046 COUNT is the number of operations in the instruction, not counting a
10047 possible update of the base register. REGS is an array containing the
10048 register operands.
10049 BASEREG is the base register to be used in addressing the memory operands,
10050 which are constructed from BASEMEM.
10051 WRITE_BACK specifies whether the generated instruction should include an
10052 update of the base register.
10053 OFFSETP is used to pass an offset to and from this function; this offset
10054 is not used when constructing the address (instead BASEMEM should have an
10055 appropriate offset in its address), it is used only for setting
10056 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10058 static rtx
10059 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10060 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10062 rtx mems[MAX_LDM_STM_OPS];
10063 HOST_WIDE_INT offset = *offsetp;
10064 int i;
10066 gcc_assert (count <= MAX_LDM_STM_OPS);
10068 if (GET_CODE (basereg) == PLUS)
10069 basereg = XEXP (basereg, 0);
10071 for (i = 0; i < count; i++)
10073 rtx addr = plus_constant (basereg, i * 4);
10074 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10075 offset += 4;
10078 if (write_back)
10079 *offsetp = offset;
10081 if (is_load)
10082 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10083 write_back ? 4 * count : 0);
10084 else
10085 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10086 write_back ? 4 * count : 0);
10090 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10091 rtx basemem, HOST_WIDE_INT *offsetp)
10093 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10094 offsetp);
10098 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10099 rtx basemem, HOST_WIDE_INT *offsetp)
10101 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10102 offsetp);
10105 /* Called from a peephole2 expander to turn a sequence of loads into an
10106 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10107 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10108 is true if we can reorder the registers because they are used commutatively
10109 subsequently.
10110 Returns true iff we could generate a new instruction. */
10112 bool
10113 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10115 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10116 rtx mems[MAX_LDM_STM_OPS];
10117 int i, j, base_reg;
10118 rtx base_reg_rtx;
10119 HOST_WIDE_INT offset;
10120 int write_back = FALSE;
10121 int ldm_case;
10122 rtx addr;
10124 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10125 &base_reg, &offset, !sort_regs);
10127 if (ldm_case == 0)
10128 return false;
10130 if (sort_regs)
10131 for (i = 0; i < nops - 1; i++)
10132 for (j = i + 1; j < nops; j++)
10133 if (regs[i] > regs[j])
10135 int t = regs[i];
10136 regs[i] = regs[j];
10137 regs[j] = t;
10139 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10141 if (TARGET_THUMB1)
10143 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10144 gcc_assert (ldm_case == 1 || ldm_case == 5);
10145 write_back = TRUE;
10148 if (ldm_case == 5)
10150 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10151 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10152 offset = 0;
10153 if (!TARGET_THUMB1)
10155 base_reg = regs[0];
10156 base_reg_rtx = newbase;
10160 for (i = 0; i < nops; i++)
10162 addr = plus_constant (base_reg_rtx, offset + i * 4);
10163 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10164 SImode, addr, 0);
10166 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10167 write_back ? offset + i * 4 : 0));
10168 return true;
10171 /* Called from a peephole2 expander to turn a sequence of stores into an
10172 STM instruction. OPERANDS are the operands found by the peephole matcher;
10173 NOPS indicates how many separate stores we are trying to combine.
10174 Returns true iff we could generate a new instruction. */
10176 bool
10177 gen_stm_seq (rtx *operands, int nops)
10179 int i;
10180 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10181 rtx mems[MAX_LDM_STM_OPS];
10182 int base_reg;
10183 rtx base_reg_rtx;
10184 HOST_WIDE_INT offset;
10185 int write_back = FALSE;
10186 int stm_case;
10187 rtx addr;
10188 bool base_reg_dies;
10190 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10191 mem_order, &base_reg, &offset, true);
10193 if (stm_case == 0)
10194 return false;
10196 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10198 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10199 if (TARGET_THUMB1)
10201 gcc_assert (base_reg_dies);
10202 write_back = TRUE;
10205 if (stm_case == 5)
10207 gcc_assert (base_reg_dies);
10208 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10209 offset = 0;
10212 addr = plus_constant (base_reg_rtx, offset);
10214 for (i = 0; i < nops; i++)
10216 addr = plus_constant (base_reg_rtx, offset + i * 4);
10217 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10218 SImode, addr, 0);
10220 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10221 write_back ? offset + i * 4 : 0));
10222 return true;
10225 /* Called from a peephole2 expander to turn a sequence of stores that are
10226 preceded by constant loads into an STM instruction. OPERANDS are the
10227 operands found by the peephole matcher; NOPS indicates how many
10228 separate stores we are trying to combine; there are 2 * NOPS
10229 instructions in the peephole.
10230 Returns true iff we could generate a new instruction. */
10232 bool
10233 gen_const_stm_seq (rtx *operands, int nops)
10235 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10236 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10237 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10238 rtx mems[MAX_LDM_STM_OPS];
10239 int base_reg;
10240 rtx base_reg_rtx;
10241 HOST_WIDE_INT offset;
10242 int write_back = FALSE;
10243 int stm_case;
10244 rtx addr;
10245 bool base_reg_dies;
10246 int i, j;
10247 HARD_REG_SET allocated;
10249 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10250 mem_order, &base_reg, &offset, false);
10252 if (stm_case == 0)
10253 return false;
10255 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10257 /* If the same register is used more than once, try to find a free
10258 register. */
10259 CLEAR_HARD_REG_SET (allocated);
10260 for (i = 0; i < nops; i++)
10262 for (j = i + 1; j < nops; j++)
10263 if (regs[i] == regs[j])
10265 rtx t = peep2_find_free_register (0, nops * 2,
10266 TARGET_THUMB1 ? "l" : "r",
10267 SImode, &allocated);
10268 if (t == NULL_RTX)
10269 return false;
10270 reg_rtxs[i] = t;
10271 regs[i] = REGNO (t);
10275 /* Compute an ordering that maps the register numbers to an ascending
10276 sequence. */
10277 reg_order[0] = 0;
10278 for (i = 0; i < nops; i++)
10279 if (regs[i] < regs[reg_order[0]])
10280 reg_order[0] = i;
10282 for (i = 1; i < nops; i++)
10284 int this_order = reg_order[i - 1];
10285 for (j = 0; j < nops; j++)
10286 if (regs[j] > regs[reg_order[i - 1]]
10287 && (this_order == reg_order[i - 1]
10288 || regs[j] < regs[this_order]))
10289 this_order = j;
10290 reg_order[i] = this_order;
10293 /* Ensure that registers that must be live after the instruction end
10294 up with the correct value. */
10295 for (i = 0; i < nops; i++)
10297 int this_order = reg_order[i];
10298 if ((this_order != mem_order[i]
10299 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10300 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10301 return false;
10304 /* Load the constants. */
10305 for (i = 0; i < nops; i++)
10307 rtx op = operands[2 * nops + mem_order[i]];
10308 sorted_regs[i] = regs[reg_order[i]];
10309 emit_move_insn (reg_rtxs[reg_order[i]], op);
10312 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10314 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10315 if (TARGET_THUMB1)
10317 gcc_assert (base_reg_dies);
10318 write_back = TRUE;
10321 if (stm_case == 5)
10323 gcc_assert (base_reg_dies);
10324 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10325 offset = 0;
10328 addr = plus_constant (base_reg_rtx, offset);
10330 for (i = 0; i < nops; i++)
10332 addr = plus_constant (base_reg_rtx, offset + i * 4);
10333 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10334 SImode, addr, 0);
10336 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10337 write_back ? offset + i * 4 : 0));
10338 return true;
10342 arm_gen_movmemqi (rtx *operands)
10344 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10345 HOST_WIDE_INT srcoffset, dstoffset;
10346 int i;
10347 rtx src, dst, srcbase, dstbase;
10348 rtx part_bytes_reg = NULL;
10349 rtx mem;
10351 if (GET_CODE (operands[2]) != CONST_INT
10352 || GET_CODE (operands[3]) != CONST_INT
10353 || INTVAL (operands[2]) > 64
10354 || INTVAL (operands[3]) & 3)
10355 return 0;
10357 dstbase = operands[0];
10358 srcbase = operands[1];
10360 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10361 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10363 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10364 out_words_to_go = INTVAL (operands[2]) / 4;
10365 last_bytes = INTVAL (operands[2]) & 3;
10366 dstoffset = srcoffset = 0;
10368 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10369 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10371 for (i = 0; in_words_to_go >= 2; i+=4)
10373 if (in_words_to_go > 4)
10374 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10375 TRUE, srcbase, &srcoffset));
10376 else
10377 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10378 src, FALSE, srcbase,
10379 &srcoffset));
10381 if (out_words_to_go)
10383 if (out_words_to_go > 4)
10384 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10385 TRUE, dstbase, &dstoffset));
10386 else if (out_words_to_go != 1)
10387 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10388 out_words_to_go, dst,
10389 (last_bytes == 0
10390 ? FALSE : TRUE),
10391 dstbase, &dstoffset));
10392 else
10394 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10395 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10396 if (last_bytes != 0)
10398 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10399 dstoffset += 4;
10404 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10405 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10408 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10409 if (out_words_to_go)
10411 rtx sreg;
10413 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10414 sreg = copy_to_reg (mem);
10416 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10417 emit_move_insn (mem, sreg);
10418 in_words_to_go--;
10420 gcc_assert (!in_words_to_go); /* Sanity check */
10423 if (in_words_to_go)
10425 gcc_assert (in_words_to_go > 0);
10427 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10428 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10431 gcc_assert (!last_bytes || part_bytes_reg);
10433 if (BYTES_BIG_ENDIAN && last_bytes)
10435 rtx tmp = gen_reg_rtx (SImode);
10437 /* The bytes we want are in the top end of the word. */
10438 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10439 GEN_INT (8 * (4 - last_bytes))));
10440 part_bytes_reg = tmp;
10442 while (last_bytes)
10444 mem = adjust_automodify_address (dstbase, QImode,
10445 plus_constant (dst, last_bytes - 1),
10446 dstoffset + last_bytes - 1);
10447 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10449 if (--last_bytes)
10451 tmp = gen_reg_rtx (SImode);
10452 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10453 part_bytes_reg = tmp;
10458 else
10460 if (last_bytes > 1)
10462 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10463 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10464 last_bytes -= 2;
10465 if (last_bytes)
10467 rtx tmp = gen_reg_rtx (SImode);
10468 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10469 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10470 part_bytes_reg = tmp;
10471 dstoffset += 2;
10475 if (last_bytes)
10477 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10478 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10482 return 1;
10485 /* Select a dominance comparison mode if possible for a test of the general
10486 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10487 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10488 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10489 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10490 In all cases OP will be either EQ or NE, but we don't need to know which
10491 here. If we are unable to support a dominance comparison we return
10492 CC mode. This will then fail to match for the RTL expressions that
10493 generate this call. */
10494 enum machine_mode
10495 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10497 enum rtx_code cond1, cond2;
10498 int swapped = 0;
10500 /* Currently we will probably get the wrong result if the individual
10501 comparisons are not simple. This also ensures that it is safe to
10502 reverse a comparison if necessary. */
10503 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10504 != CCmode)
10505 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10506 != CCmode))
10507 return CCmode;
10509 /* The if_then_else variant of this tests the second condition if the
10510 first passes, but is true if the first fails. Reverse the first
10511 condition to get a true "inclusive-or" expression. */
10512 if (cond_or == DOM_CC_NX_OR_Y)
10513 cond1 = reverse_condition (cond1);
10515 /* If the comparisons are not equal, and one doesn't dominate the other,
10516 then we can't do this. */
10517 if (cond1 != cond2
10518 && !comparison_dominates_p (cond1, cond2)
10519 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10520 return CCmode;
10522 if (swapped)
10524 enum rtx_code temp = cond1;
10525 cond1 = cond2;
10526 cond2 = temp;
10529 switch (cond1)
10531 case EQ:
10532 if (cond_or == DOM_CC_X_AND_Y)
10533 return CC_DEQmode;
10535 switch (cond2)
10537 case EQ: return CC_DEQmode;
10538 case LE: return CC_DLEmode;
10539 case LEU: return CC_DLEUmode;
10540 case GE: return CC_DGEmode;
10541 case GEU: return CC_DGEUmode;
10542 default: gcc_unreachable ();
10545 case LT:
10546 if (cond_or == DOM_CC_X_AND_Y)
10547 return CC_DLTmode;
10549 switch (cond2)
10551 case LT:
10552 return CC_DLTmode;
10553 case LE:
10554 return CC_DLEmode;
10555 case NE:
10556 return CC_DNEmode;
10557 default:
10558 gcc_unreachable ();
10561 case GT:
10562 if (cond_or == DOM_CC_X_AND_Y)
10563 return CC_DGTmode;
10565 switch (cond2)
10567 case GT:
10568 return CC_DGTmode;
10569 case GE:
10570 return CC_DGEmode;
10571 case NE:
10572 return CC_DNEmode;
10573 default:
10574 gcc_unreachable ();
10577 case LTU:
10578 if (cond_or == DOM_CC_X_AND_Y)
10579 return CC_DLTUmode;
10581 switch (cond2)
10583 case LTU:
10584 return CC_DLTUmode;
10585 case LEU:
10586 return CC_DLEUmode;
10587 case NE:
10588 return CC_DNEmode;
10589 default:
10590 gcc_unreachable ();
10593 case GTU:
10594 if (cond_or == DOM_CC_X_AND_Y)
10595 return CC_DGTUmode;
10597 switch (cond2)
10599 case GTU:
10600 return CC_DGTUmode;
10601 case GEU:
10602 return CC_DGEUmode;
10603 case NE:
10604 return CC_DNEmode;
10605 default:
10606 gcc_unreachable ();
10609 /* The remaining cases only occur when both comparisons are the
10610 same. */
10611 case NE:
10612 gcc_assert (cond1 == cond2);
10613 return CC_DNEmode;
10615 case LE:
10616 gcc_assert (cond1 == cond2);
10617 return CC_DLEmode;
10619 case GE:
10620 gcc_assert (cond1 == cond2);
10621 return CC_DGEmode;
10623 case LEU:
10624 gcc_assert (cond1 == cond2);
10625 return CC_DLEUmode;
10627 case GEU:
10628 gcc_assert (cond1 == cond2);
10629 return CC_DGEUmode;
10631 default:
10632 gcc_unreachable ();
10636 enum machine_mode
10637 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10639 /* All floating point compares return CCFP if it is an equality
10640 comparison, and CCFPE otherwise. */
10641 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10643 switch (op)
10645 case EQ:
10646 case NE:
10647 case UNORDERED:
10648 case ORDERED:
10649 case UNLT:
10650 case UNLE:
10651 case UNGT:
10652 case UNGE:
10653 case UNEQ:
10654 case LTGT:
10655 return CCFPmode;
10657 case LT:
10658 case LE:
10659 case GT:
10660 case GE:
10661 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10662 return CCFPmode;
10663 return CCFPEmode;
10665 default:
10666 gcc_unreachable ();
10670 /* A compare with a shifted operand. Because of canonicalization, the
10671 comparison will have to be swapped when we emit the assembler. */
10672 if (GET_MODE (y) == SImode
10673 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10674 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10675 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10676 || GET_CODE (x) == ROTATERT))
10677 return CC_SWPmode;
10679 /* This operation is performed swapped, but since we only rely on the Z
10680 flag we don't need an additional mode. */
10681 if (GET_MODE (y) == SImode
10682 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10683 && GET_CODE (x) == NEG
10684 && (op == EQ || op == NE))
10685 return CC_Zmode;
10687 /* This is a special case that is used by combine to allow a
10688 comparison of a shifted byte load to be split into a zero-extend
10689 followed by a comparison of the shifted integer (only valid for
10690 equalities and unsigned inequalities). */
10691 if (GET_MODE (x) == SImode
10692 && GET_CODE (x) == ASHIFT
10693 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10694 && GET_CODE (XEXP (x, 0)) == SUBREG
10695 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10696 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10697 && (op == EQ || op == NE
10698 || op == GEU || op == GTU || op == LTU || op == LEU)
10699 && GET_CODE (y) == CONST_INT)
10700 return CC_Zmode;
10702 /* A construct for a conditional compare, if the false arm contains
10703 0, then both conditions must be true, otherwise either condition
10704 must be true. Not all conditions are possible, so CCmode is
10705 returned if it can't be done. */
10706 if (GET_CODE (x) == IF_THEN_ELSE
10707 && (XEXP (x, 2) == const0_rtx
10708 || XEXP (x, 2) == const1_rtx)
10709 && COMPARISON_P (XEXP (x, 0))
10710 && COMPARISON_P (XEXP (x, 1)))
10711 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10712 INTVAL (XEXP (x, 2)));
10714 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10715 if (GET_CODE (x) == AND
10716 && (op == EQ || op == NE)
10717 && COMPARISON_P (XEXP (x, 0))
10718 && COMPARISON_P (XEXP (x, 1)))
10719 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10720 DOM_CC_X_AND_Y);
10722 if (GET_CODE (x) == IOR
10723 && (op == EQ || op == NE)
10724 && COMPARISON_P (XEXP (x, 0))
10725 && COMPARISON_P (XEXP (x, 1)))
10726 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10727 DOM_CC_X_OR_Y);
10729 /* An operation (on Thumb) where we want to test for a single bit.
10730 This is done by shifting that bit up into the top bit of a
10731 scratch register; we can then branch on the sign bit. */
10732 if (TARGET_THUMB1
10733 && GET_MODE (x) == SImode
10734 && (op == EQ || op == NE)
10735 && GET_CODE (x) == ZERO_EXTRACT
10736 && XEXP (x, 1) == const1_rtx)
10737 return CC_Nmode;
10739 /* An operation that sets the condition codes as a side-effect, the
10740 V flag is not set correctly, so we can only use comparisons where
10741 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10742 instead.) */
10743 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10744 if (GET_MODE (x) == SImode
10745 && y == const0_rtx
10746 && (op == EQ || op == NE || op == LT || op == GE)
10747 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10748 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10749 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10750 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10751 || GET_CODE (x) == LSHIFTRT
10752 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10753 || GET_CODE (x) == ROTATERT
10754 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10755 return CC_NOOVmode;
10757 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10758 return CC_Zmode;
10760 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10761 && GET_CODE (x) == PLUS
10762 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10763 return CC_Cmode;
10765 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10767 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10768 available. */
10769 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10770 return CCmode;
10772 switch (op)
10774 case EQ:
10775 case NE:
10776 /* A DImode comparison against zero can be implemented by
10777 or'ing the two halves together. */
10778 if (y == const0_rtx)
10779 return CC_Zmode;
10781 /* We can do an equality test in three Thumb instructions. */
10782 if (!TARGET_ARM)
10783 return CC_Zmode;
10785 /* FALLTHROUGH */
10787 case LTU:
10788 case LEU:
10789 case GTU:
10790 case GEU:
10791 /* DImode unsigned comparisons can be implemented by cmp +
10792 cmpeq without a scratch register. Not worth doing in
10793 Thumb-2. */
10794 if (TARGET_ARM)
10795 return CC_CZmode;
10797 /* FALLTHROUGH */
10799 case LT:
10800 case LE:
10801 case GT:
10802 case GE:
10803 /* DImode signed and unsigned comparisons can be implemented
10804 by cmp + sbcs with a scratch register, but that does not
10805 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10806 gcc_assert (op != EQ && op != NE);
10807 return CC_NCVmode;
10809 default:
10810 gcc_unreachable ();
10814 return CCmode;
10817 /* X and Y are two things to compare using CODE. Emit the compare insn and
10818 return the rtx for register 0 in the proper mode. FP means this is a
10819 floating point compare: I don't think that it is needed on the arm. */
10821 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10823 enum machine_mode mode;
10824 rtx cc_reg;
10825 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10827 /* We might have X as a constant, Y as a register because of the predicates
10828 used for cmpdi. If so, force X to a register here. */
10829 if (dimode_comparison && !REG_P (x))
10830 x = force_reg (DImode, x);
10832 mode = SELECT_CC_MODE (code, x, y);
10833 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10835 if (dimode_comparison
10836 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10837 && mode != CC_CZmode)
10839 rtx clobber, set;
10841 /* To compare two non-zero values for equality, XOR them and
10842 then compare against zero. Not used for ARM mode; there
10843 CC_CZmode is cheaper. */
10844 if (mode == CC_Zmode && y != const0_rtx)
10846 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10847 y = const0_rtx;
10849 /* A scratch register is required. */
10850 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10851 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10852 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10854 else
10855 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10857 return cc_reg;
10860 /* Generate a sequence of insns that will generate the correct return
10861 address mask depending on the physical architecture that the program
10862 is running on. */
10864 arm_gen_return_addr_mask (void)
10866 rtx reg = gen_reg_rtx (Pmode);
10868 emit_insn (gen_return_addr_mask (reg));
10869 return reg;
10872 void
10873 arm_reload_in_hi (rtx *operands)
10875 rtx ref = operands[1];
10876 rtx base, scratch;
10877 HOST_WIDE_INT offset = 0;
10879 if (GET_CODE (ref) == SUBREG)
10881 offset = SUBREG_BYTE (ref);
10882 ref = SUBREG_REG (ref);
10885 if (GET_CODE (ref) == REG)
10887 /* We have a pseudo which has been spilt onto the stack; there
10888 are two cases here: the first where there is a simple
10889 stack-slot replacement and a second where the stack-slot is
10890 out of range, or is used as a subreg. */
10891 if (reg_equiv_mem[REGNO (ref)])
10893 ref = reg_equiv_mem[REGNO (ref)];
10894 base = find_replacement (&XEXP (ref, 0));
10896 else
10897 /* The slot is out of range, or was dressed up in a SUBREG. */
10898 base = reg_equiv_address[REGNO (ref)];
10900 else
10901 base = find_replacement (&XEXP (ref, 0));
10903 /* Handle the case where the address is too complex to be offset by 1. */
10904 if (GET_CODE (base) == MINUS
10905 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10907 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10909 emit_set_insn (base_plus, base);
10910 base = base_plus;
10912 else if (GET_CODE (base) == PLUS)
10914 /* The addend must be CONST_INT, or we would have dealt with it above. */
10915 HOST_WIDE_INT hi, lo;
10917 offset += INTVAL (XEXP (base, 1));
10918 base = XEXP (base, 0);
10920 /* Rework the address into a legal sequence of insns. */
10921 /* Valid range for lo is -4095 -> 4095 */
10922 lo = (offset >= 0
10923 ? (offset & 0xfff)
10924 : -((-offset) & 0xfff));
10926 /* Corner case, if lo is the max offset then we would be out of range
10927 once we have added the additional 1 below, so bump the msb into the
10928 pre-loading insn(s). */
10929 if (lo == 4095)
10930 lo &= 0x7ff;
10932 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10933 ^ (HOST_WIDE_INT) 0x80000000)
10934 - (HOST_WIDE_INT) 0x80000000);
10936 gcc_assert (hi + lo == offset);
10938 if (hi != 0)
10940 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10942 /* Get the base address; addsi3 knows how to handle constants
10943 that require more than one insn. */
10944 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10945 base = base_plus;
10946 offset = lo;
10950 /* Operands[2] may overlap operands[0] (though it won't overlap
10951 operands[1]), that's why we asked for a DImode reg -- so we can
10952 use the bit that does not overlap. */
10953 if (REGNO (operands[2]) == REGNO (operands[0]))
10954 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10955 else
10956 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10958 emit_insn (gen_zero_extendqisi2 (scratch,
10959 gen_rtx_MEM (QImode,
10960 plus_constant (base,
10961 offset))));
10962 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10963 gen_rtx_MEM (QImode,
10964 plus_constant (base,
10965 offset + 1))));
10966 if (!BYTES_BIG_ENDIAN)
10967 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10968 gen_rtx_IOR (SImode,
10969 gen_rtx_ASHIFT
10970 (SImode,
10971 gen_rtx_SUBREG (SImode, operands[0], 0),
10972 GEN_INT (8)),
10973 scratch));
10974 else
10975 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10976 gen_rtx_IOR (SImode,
10977 gen_rtx_ASHIFT (SImode, scratch,
10978 GEN_INT (8)),
10979 gen_rtx_SUBREG (SImode, operands[0], 0)));
10982 /* Handle storing a half-word to memory during reload by synthesizing as two
10983 byte stores. Take care not to clobber the input values until after we
10984 have moved them somewhere safe. This code assumes that if the DImode
10985 scratch in operands[2] overlaps either the input value or output address
10986 in some way, then that value must die in this insn (we absolutely need
10987 two scratch registers for some corner cases). */
10988 void
10989 arm_reload_out_hi (rtx *operands)
10991 rtx ref = operands[0];
10992 rtx outval = operands[1];
10993 rtx base, scratch;
10994 HOST_WIDE_INT offset = 0;
10996 if (GET_CODE (ref) == SUBREG)
10998 offset = SUBREG_BYTE (ref);
10999 ref = SUBREG_REG (ref);
11002 if (GET_CODE (ref) == REG)
11004 /* We have a pseudo which has been spilt onto the stack; there
11005 are two cases here: the first where there is a simple
11006 stack-slot replacement and a second where the stack-slot is
11007 out of range, or is used as a subreg. */
11008 if (reg_equiv_mem[REGNO (ref)])
11010 ref = reg_equiv_mem[REGNO (ref)];
11011 base = find_replacement (&XEXP (ref, 0));
11013 else
11014 /* The slot is out of range, or was dressed up in a SUBREG. */
11015 base = reg_equiv_address[REGNO (ref)];
11017 else
11018 base = find_replacement (&XEXP (ref, 0));
11020 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11022 /* Handle the case where the address is too complex to be offset by 1. */
11023 if (GET_CODE (base) == MINUS
11024 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11026 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11028 /* Be careful not to destroy OUTVAL. */
11029 if (reg_overlap_mentioned_p (base_plus, outval))
11031 /* Updating base_plus might destroy outval, see if we can
11032 swap the scratch and base_plus. */
11033 if (!reg_overlap_mentioned_p (scratch, outval))
11035 rtx tmp = scratch;
11036 scratch = base_plus;
11037 base_plus = tmp;
11039 else
11041 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11043 /* Be conservative and copy OUTVAL into the scratch now,
11044 this should only be necessary if outval is a subreg
11045 of something larger than a word. */
11046 /* XXX Might this clobber base? I can't see how it can,
11047 since scratch is known to overlap with OUTVAL, and
11048 must be wider than a word. */
11049 emit_insn (gen_movhi (scratch_hi, outval));
11050 outval = scratch_hi;
11054 emit_set_insn (base_plus, base);
11055 base = base_plus;
11057 else if (GET_CODE (base) == PLUS)
11059 /* The addend must be CONST_INT, or we would have dealt with it above. */
11060 HOST_WIDE_INT hi, lo;
11062 offset += INTVAL (XEXP (base, 1));
11063 base = XEXP (base, 0);
11065 /* Rework the address into a legal sequence of insns. */
11066 /* Valid range for lo is -4095 -> 4095 */
11067 lo = (offset >= 0
11068 ? (offset & 0xfff)
11069 : -((-offset) & 0xfff));
11071 /* Corner case, if lo is the max offset then we would be out of range
11072 once we have added the additional 1 below, so bump the msb into the
11073 pre-loading insn(s). */
11074 if (lo == 4095)
11075 lo &= 0x7ff;
11077 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11078 ^ (HOST_WIDE_INT) 0x80000000)
11079 - (HOST_WIDE_INT) 0x80000000);
11081 gcc_assert (hi + lo == offset);
11083 if (hi != 0)
11085 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11087 /* Be careful not to destroy OUTVAL. */
11088 if (reg_overlap_mentioned_p (base_plus, outval))
11090 /* Updating base_plus might destroy outval, see if we
11091 can swap the scratch and base_plus. */
11092 if (!reg_overlap_mentioned_p (scratch, outval))
11094 rtx tmp = scratch;
11095 scratch = base_plus;
11096 base_plus = tmp;
11098 else
11100 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11102 /* Be conservative and copy outval into scratch now,
11103 this should only be necessary if outval is a
11104 subreg of something larger than a word. */
11105 /* XXX Might this clobber base? I can't see how it
11106 can, since scratch is known to overlap with
11107 outval. */
11108 emit_insn (gen_movhi (scratch_hi, outval));
11109 outval = scratch_hi;
11113 /* Get the base address; addsi3 knows how to handle constants
11114 that require more than one insn. */
11115 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11116 base = base_plus;
11117 offset = lo;
11121 if (BYTES_BIG_ENDIAN)
11123 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11124 plus_constant (base, offset + 1)),
11125 gen_lowpart (QImode, outval)));
11126 emit_insn (gen_lshrsi3 (scratch,
11127 gen_rtx_SUBREG (SImode, outval, 0),
11128 GEN_INT (8)));
11129 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11130 gen_lowpart (QImode, scratch)));
11132 else
11134 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11135 gen_lowpart (QImode, outval)));
11136 emit_insn (gen_lshrsi3 (scratch,
11137 gen_rtx_SUBREG (SImode, outval, 0),
11138 GEN_INT (8)));
11139 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11140 plus_constant (base, offset + 1)),
11141 gen_lowpart (QImode, scratch)));
11145 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11146 (padded to the size of a word) should be passed in a register. */
11148 static bool
11149 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11151 if (TARGET_AAPCS_BASED)
11152 return must_pass_in_stack_var_size (mode, type);
11153 else
11154 return must_pass_in_stack_var_size_or_pad (mode, type);
11158 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11159 Return true if an argument passed on the stack should be padded upwards,
11160 i.e. if the least-significant byte has useful data.
11161 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11162 aggregate types are placed in the lowest memory address. */
11164 bool
11165 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11167 if (!TARGET_AAPCS_BASED)
11168 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11170 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11171 return false;
11173 return true;
11177 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11178 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11179 byte of the register has useful data, and return the opposite if the
11180 most significant byte does.
11181 For AAPCS, small aggregates and small complex types are always padded
11182 upwards. */
11184 bool
11185 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11186 tree type, int first ATTRIBUTE_UNUSED)
11188 if (TARGET_AAPCS_BASED
11189 && BYTES_BIG_ENDIAN
11190 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11191 && int_size_in_bytes (type) <= 4)
11192 return true;
11194 /* Otherwise, use default padding. */
11195 return !BYTES_BIG_ENDIAN;
11199 /* Print a symbolic form of X to the debug file, F. */
11200 static void
11201 arm_print_value (FILE *f, rtx x)
11203 switch (GET_CODE (x))
11205 case CONST_INT:
11206 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11207 return;
11209 case CONST_DOUBLE:
11210 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11211 return;
11213 case CONST_VECTOR:
11215 int i;
11217 fprintf (f, "<");
11218 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11220 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11221 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11222 fputc (',', f);
11224 fprintf (f, ">");
11226 return;
11228 case CONST_STRING:
11229 fprintf (f, "\"%s\"", XSTR (x, 0));
11230 return;
11232 case SYMBOL_REF:
11233 fprintf (f, "`%s'", XSTR (x, 0));
11234 return;
11236 case LABEL_REF:
11237 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11238 return;
11240 case CONST:
11241 arm_print_value (f, XEXP (x, 0));
11242 return;
11244 case PLUS:
11245 arm_print_value (f, XEXP (x, 0));
11246 fprintf (f, "+");
11247 arm_print_value (f, XEXP (x, 1));
11248 return;
11250 case PC:
11251 fprintf (f, "pc");
11252 return;
11254 default:
11255 fprintf (f, "????");
11256 return;
11260 /* Routines for manipulation of the constant pool. */
11262 /* Arm instructions cannot load a large constant directly into a
11263 register; they have to come from a pc relative load. The constant
11264 must therefore be placed in the addressable range of the pc
11265 relative load. Depending on the precise pc relative load
11266 instruction the range is somewhere between 256 bytes and 4k. This
11267 means that we often have to dump a constant inside a function, and
11268 generate code to branch around it.
11270 It is important to minimize this, since the branches will slow
11271 things down and make the code larger.
11273 Normally we can hide the table after an existing unconditional
11274 branch so that there is no interruption of the flow, but in the
11275 worst case the code looks like this:
11277 ldr rn, L1
11279 b L2
11280 align
11281 L1: .long value
11285 ldr rn, L3
11287 b L4
11288 align
11289 L3: .long value
11293 We fix this by performing a scan after scheduling, which notices
11294 which instructions need to have their operands fetched from the
11295 constant table and builds the table.
11297 The algorithm starts by building a table of all the constants that
11298 need fixing up and all the natural barriers in the function (places
11299 where a constant table can be dropped without breaking the flow).
11300 For each fixup we note how far the pc-relative replacement will be
11301 able to reach and the offset of the instruction into the function.
11303 Having built the table we then group the fixes together to form
11304 tables that are as large as possible (subject to addressing
11305 constraints) and emit each table of constants after the last
11306 barrier that is within range of all the instructions in the group.
11307 If a group does not contain a barrier, then we forcibly create one
11308 by inserting a jump instruction into the flow. Once the table has
11309 been inserted, the insns are then modified to reference the
11310 relevant entry in the pool.
11312 Possible enhancements to the algorithm (not implemented) are:
11314 1) For some processors and object formats, there may be benefit in
11315 aligning the pools to the start of cache lines; this alignment
11316 would need to be taken into account when calculating addressability
11317 of a pool. */
11319 /* These typedefs are located at the start of this file, so that
11320 they can be used in the prototypes there. This comment is to
11321 remind readers of that fact so that the following structures
11322 can be understood more easily.
11324 typedef struct minipool_node Mnode;
11325 typedef struct minipool_fixup Mfix; */
11327 struct minipool_node
11329 /* Doubly linked chain of entries. */
11330 Mnode * next;
11331 Mnode * prev;
11332 /* The maximum offset into the code that this entry can be placed. While
11333 pushing fixes for forward references, all entries are sorted in order
11334 of increasing max_address. */
11335 HOST_WIDE_INT max_address;
11336 /* Similarly for an entry inserted for a backwards ref. */
11337 HOST_WIDE_INT min_address;
11338 /* The number of fixes referencing this entry. This can become zero
11339 if we "unpush" an entry. In this case we ignore the entry when we
11340 come to emit the code. */
11341 int refcount;
11342 /* The offset from the start of the minipool. */
11343 HOST_WIDE_INT offset;
11344 /* The value in table. */
11345 rtx value;
11346 /* The mode of value. */
11347 enum machine_mode mode;
11348 /* The size of the value. With iWMMXt enabled
11349 sizes > 4 also imply an alignment of 8-bytes. */
11350 int fix_size;
11353 struct minipool_fixup
11355 Mfix * next;
11356 rtx insn;
11357 HOST_WIDE_INT address;
11358 rtx * loc;
11359 enum machine_mode mode;
11360 int fix_size;
11361 rtx value;
11362 Mnode * minipool;
11363 HOST_WIDE_INT forwards;
11364 HOST_WIDE_INT backwards;
11367 /* Fixes less than a word need padding out to a word boundary. */
11368 #define MINIPOOL_FIX_SIZE(mode) \
11369 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11371 static Mnode * minipool_vector_head;
11372 static Mnode * minipool_vector_tail;
11373 static rtx minipool_vector_label;
11374 static int minipool_pad;
11376 /* The linked list of all minipool fixes required for this function. */
11377 Mfix * minipool_fix_head;
11378 Mfix * minipool_fix_tail;
11379 /* The fix entry for the current minipool, once it has been placed. */
11380 Mfix * minipool_barrier;
11382 /* Determines if INSN is the start of a jump table. Returns the end
11383 of the TABLE or NULL_RTX. */
11384 static rtx
11385 is_jump_table (rtx insn)
11387 rtx table;
11389 if (GET_CODE (insn) == JUMP_INSN
11390 && JUMP_LABEL (insn) != NULL
11391 && ((table = next_real_insn (JUMP_LABEL (insn)))
11392 == next_real_insn (insn))
11393 && table != NULL
11394 && GET_CODE (table) == JUMP_INSN
11395 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11396 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11397 return table;
11399 return NULL_RTX;
11402 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11403 #define JUMP_TABLES_IN_TEXT_SECTION 0
11404 #endif
11406 static HOST_WIDE_INT
11407 get_jump_table_size (rtx insn)
11409 /* ADDR_VECs only take room if read-only data does into the text
11410 section. */
11411 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11413 rtx body = PATTERN (insn);
11414 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11415 HOST_WIDE_INT size;
11416 HOST_WIDE_INT modesize;
11418 modesize = GET_MODE_SIZE (GET_MODE (body));
11419 size = modesize * XVECLEN (body, elt);
11420 switch (modesize)
11422 case 1:
11423 /* Round up size of TBB table to a halfword boundary. */
11424 size = (size + 1) & ~(HOST_WIDE_INT)1;
11425 break;
11426 case 2:
11427 /* No padding necessary for TBH. */
11428 break;
11429 case 4:
11430 /* Add two bytes for alignment on Thumb. */
11431 if (TARGET_THUMB)
11432 size += 2;
11433 break;
11434 default:
11435 gcc_unreachable ();
11437 return size;
11440 return 0;
11443 /* Move a minipool fix MP from its current location to before MAX_MP.
11444 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11445 constraints may need updating. */
11446 static Mnode *
11447 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11448 HOST_WIDE_INT max_address)
11450 /* The code below assumes these are different. */
11451 gcc_assert (mp != max_mp);
11453 if (max_mp == NULL)
11455 if (max_address < mp->max_address)
11456 mp->max_address = max_address;
11458 else
11460 if (max_address > max_mp->max_address - mp->fix_size)
11461 mp->max_address = max_mp->max_address - mp->fix_size;
11462 else
11463 mp->max_address = max_address;
11465 /* Unlink MP from its current position. Since max_mp is non-null,
11466 mp->prev must be non-null. */
11467 mp->prev->next = mp->next;
11468 if (mp->next != NULL)
11469 mp->next->prev = mp->prev;
11470 else
11471 minipool_vector_tail = mp->prev;
11473 /* Re-insert it before MAX_MP. */
11474 mp->next = max_mp;
11475 mp->prev = max_mp->prev;
11476 max_mp->prev = mp;
11478 if (mp->prev != NULL)
11479 mp->prev->next = mp;
11480 else
11481 minipool_vector_head = mp;
11484 /* Save the new entry. */
11485 max_mp = mp;
11487 /* Scan over the preceding entries and adjust their addresses as
11488 required. */
11489 while (mp->prev != NULL
11490 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11492 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11493 mp = mp->prev;
11496 return max_mp;
11499 /* Add a constant to the minipool for a forward reference. Returns the
11500 node added or NULL if the constant will not fit in this pool. */
11501 static Mnode *
11502 add_minipool_forward_ref (Mfix *fix)
11504 /* If set, max_mp is the first pool_entry that has a lower
11505 constraint than the one we are trying to add. */
11506 Mnode * max_mp = NULL;
11507 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11508 Mnode * mp;
11510 /* If the minipool starts before the end of FIX->INSN then this FIX
11511 can not be placed into the current pool. Furthermore, adding the
11512 new constant pool entry may cause the pool to start FIX_SIZE bytes
11513 earlier. */
11514 if (minipool_vector_head &&
11515 (fix->address + get_attr_length (fix->insn)
11516 >= minipool_vector_head->max_address - fix->fix_size))
11517 return NULL;
11519 /* Scan the pool to see if a constant with the same value has
11520 already been added. While we are doing this, also note the
11521 location where we must insert the constant if it doesn't already
11522 exist. */
11523 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11525 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11526 && fix->mode == mp->mode
11527 && (GET_CODE (fix->value) != CODE_LABEL
11528 || (CODE_LABEL_NUMBER (fix->value)
11529 == CODE_LABEL_NUMBER (mp->value)))
11530 && rtx_equal_p (fix->value, mp->value))
11532 /* More than one fix references this entry. */
11533 mp->refcount++;
11534 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11537 /* Note the insertion point if necessary. */
11538 if (max_mp == NULL
11539 && mp->max_address > max_address)
11540 max_mp = mp;
11542 /* If we are inserting an 8-bytes aligned quantity and
11543 we have not already found an insertion point, then
11544 make sure that all such 8-byte aligned quantities are
11545 placed at the start of the pool. */
11546 if (ARM_DOUBLEWORD_ALIGN
11547 && max_mp == NULL
11548 && fix->fix_size >= 8
11549 && mp->fix_size < 8)
11551 max_mp = mp;
11552 max_address = mp->max_address;
11556 /* The value is not currently in the minipool, so we need to create
11557 a new entry for it. If MAX_MP is NULL, the entry will be put on
11558 the end of the list since the placement is less constrained than
11559 any existing entry. Otherwise, we insert the new fix before
11560 MAX_MP and, if necessary, adjust the constraints on the other
11561 entries. */
11562 mp = XNEW (Mnode);
11563 mp->fix_size = fix->fix_size;
11564 mp->mode = fix->mode;
11565 mp->value = fix->value;
11566 mp->refcount = 1;
11567 /* Not yet required for a backwards ref. */
11568 mp->min_address = -65536;
11570 if (max_mp == NULL)
11572 mp->max_address = max_address;
11573 mp->next = NULL;
11574 mp->prev = minipool_vector_tail;
11576 if (mp->prev == NULL)
11578 minipool_vector_head = mp;
11579 minipool_vector_label = gen_label_rtx ();
11581 else
11582 mp->prev->next = mp;
11584 minipool_vector_tail = mp;
11586 else
11588 if (max_address > max_mp->max_address - mp->fix_size)
11589 mp->max_address = max_mp->max_address - mp->fix_size;
11590 else
11591 mp->max_address = max_address;
11593 mp->next = max_mp;
11594 mp->prev = max_mp->prev;
11595 max_mp->prev = mp;
11596 if (mp->prev != NULL)
11597 mp->prev->next = mp;
11598 else
11599 minipool_vector_head = mp;
11602 /* Save the new entry. */
11603 max_mp = mp;
11605 /* Scan over the preceding entries and adjust their addresses as
11606 required. */
11607 while (mp->prev != NULL
11608 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11610 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11611 mp = mp->prev;
11614 return max_mp;
11617 static Mnode *
11618 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11619 HOST_WIDE_INT min_address)
11621 HOST_WIDE_INT offset;
11623 /* The code below assumes these are different. */
11624 gcc_assert (mp != min_mp);
11626 if (min_mp == NULL)
11628 if (min_address > mp->min_address)
11629 mp->min_address = min_address;
11631 else
11633 /* We will adjust this below if it is too loose. */
11634 mp->min_address = min_address;
11636 /* Unlink MP from its current position. Since min_mp is non-null,
11637 mp->next must be non-null. */
11638 mp->next->prev = mp->prev;
11639 if (mp->prev != NULL)
11640 mp->prev->next = mp->next;
11641 else
11642 minipool_vector_head = mp->next;
11644 /* Reinsert it after MIN_MP. */
11645 mp->prev = min_mp;
11646 mp->next = min_mp->next;
11647 min_mp->next = mp;
11648 if (mp->next != NULL)
11649 mp->next->prev = mp;
11650 else
11651 minipool_vector_tail = mp;
11654 min_mp = mp;
11656 offset = 0;
11657 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11659 mp->offset = offset;
11660 if (mp->refcount > 0)
11661 offset += mp->fix_size;
11663 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11664 mp->next->min_address = mp->min_address + mp->fix_size;
11667 return min_mp;
11670 /* Add a constant to the minipool for a backward reference. Returns the
11671 node added or NULL if the constant will not fit in this pool.
11673 Note that the code for insertion for a backwards reference can be
11674 somewhat confusing because the calculated offsets for each fix do
11675 not take into account the size of the pool (which is still under
11676 construction. */
11677 static Mnode *
11678 add_minipool_backward_ref (Mfix *fix)
11680 /* If set, min_mp is the last pool_entry that has a lower constraint
11681 than the one we are trying to add. */
11682 Mnode *min_mp = NULL;
11683 /* This can be negative, since it is only a constraint. */
11684 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11685 Mnode *mp;
11687 /* If we can't reach the current pool from this insn, or if we can't
11688 insert this entry at the end of the pool without pushing other
11689 fixes out of range, then we don't try. This ensures that we
11690 can't fail later on. */
11691 if (min_address >= minipool_barrier->address
11692 || (minipool_vector_tail->min_address + fix->fix_size
11693 >= minipool_barrier->address))
11694 return NULL;
11696 /* Scan the pool to see if a constant with the same value has
11697 already been added. While we are doing this, also note the
11698 location where we must insert the constant if it doesn't already
11699 exist. */
11700 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11702 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11703 && fix->mode == mp->mode
11704 && (GET_CODE (fix->value) != CODE_LABEL
11705 || (CODE_LABEL_NUMBER (fix->value)
11706 == CODE_LABEL_NUMBER (mp->value)))
11707 && rtx_equal_p (fix->value, mp->value)
11708 /* Check that there is enough slack to move this entry to the
11709 end of the table (this is conservative). */
11710 && (mp->max_address
11711 > (minipool_barrier->address
11712 + minipool_vector_tail->offset
11713 + minipool_vector_tail->fix_size)))
11715 mp->refcount++;
11716 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11719 if (min_mp != NULL)
11720 mp->min_address += fix->fix_size;
11721 else
11723 /* Note the insertion point if necessary. */
11724 if (mp->min_address < min_address)
11726 /* For now, we do not allow the insertion of 8-byte alignment
11727 requiring nodes anywhere but at the start of the pool. */
11728 if (ARM_DOUBLEWORD_ALIGN
11729 && fix->fix_size >= 8 && mp->fix_size < 8)
11730 return NULL;
11731 else
11732 min_mp = mp;
11734 else if (mp->max_address
11735 < minipool_barrier->address + mp->offset + fix->fix_size)
11737 /* Inserting before this entry would push the fix beyond
11738 its maximum address (which can happen if we have
11739 re-located a forwards fix); force the new fix to come
11740 after it. */
11741 if (ARM_DOUBLEWORD_ALIGN
11742 && fix->fix_size >= 8 && mp->fix_size < 8)
11743 return NULL;
11744 else
11746 min_mp = mp;
11747 min_address = mp->min_address + fix->fix_size;
11750 /* Do not insert a non-8-byte aligned quantity before 8-byte
11751 aligned quantities. */
11752 else if (ARM_DOUBLEWORD_ALIGN
11753 && fix->fix_size < 8
11754 && mp->fix_size >= 8)
11756 min_mp = mp;
11757 min_address = mp->min_address + fix->fix_size;
11762 /* We need to create a new entry. */
11763 mp = XNEW (Mnode);
11764 mp->fix_size = fix->fix_size;
11765 mp->mode = fix->mode;
11766 mp->value = fix->value;
11767 mp->refcount = 1;
11768 mp->max_address = minipool_barrier->address + 65536;
11770 mp->min_address = min_address;
11772 if (min_mp == NULL)
11774 mp->prev = NULL;
11775 mp->next = minipool_vector_head;
11777 if (mp->next == NULL)
11779 minipool_vector_tail = mp;
11780 minipool_vector_label = gen_label_rtx ();
11782 else
11783 mp->next->prev = mp;
11785 minipool_vector_head = mp;
11787 else
11789 mp->next = min_mp->next;
11790 mp->prev = min_mp;
11791 min_mp->next = mp;
11793 if (mp->next != NULL)
11794 mp->next->prev = mp;
11795 else
11796 minipool_vector_tail = mp;
11799 /* Save the new entry. */
11800 min_mp = mp;
11802 if (mp->prev)
11803 mp = mp->prev;
11804 else
11805 mp->offset = 0;
11807 /* Scan over the following entries and adjust their offsets. */
11808 while (mp->next != NULL)
11810 if (mp->next->min_address < mp->min_address + mp->fix_size)
11811 mp->next->min_address = mp->min_address + mp->fix_size;
11813 if (mp->refcount)
11814 mp->next->offset = mp->offset + mp->fix_size;
11815 else
11816 mp->next->offset = mp->offset;
11818 mp = mp->next;
11821 return min_mp;
11824 static void
11825 assign_minipool_offsets (Mfix *barrier)
11827 HOST_WIDE_INT offset = 0;
11828 Mnode *mp;
11830 minipool_barrier = barrier;
11832 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11834 mp->offset = offset;
11836 if (mp->refcount > 0)
11837 offset += mp->fix_size;
11841 /* Output the literal table */
11842 static void
11843 dump_minipool (rtx scan)
11845 Mnode * mp;
11846 Mnode * nmp;
11847 int align64 = 0;
11849 if (ARM_DOUBLEWORD_ALIGN)
11850 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11851 if (mp->refcount > 0 && mp->fix_size >= 8)
11853 align64 = 1;
11854 break;
11857 if (dump_file)
11858 fprintf (dump_file,
11859 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11860 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11862 scan = emit_label_after (gen_label_rtx (), scan);
11863 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11864 scan = emit_label_after (minipool_vector_label, scan);
11866 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11868 if (mp->refcount > 0)
11870 if (dump_file)
11872 fprintf (dump_file,
11873 ";; Offset %u, min %ld, max %ld ",
11874 (unsigned) mp->offset, (unsigned long) mp->min_address,
11875 (unsigned long) mp->max_address);
11876 arm_print_value (dump_file, mp->value);
11877 fputc ('\n', dump_file);
11880 switch (mp->fix_size)
11882 #ifdef HAVE_consttable_1
11883 case 1:
11884 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11885 break;
11887 #endif
11888 #ifdef HAVE_consttable_2
11889 case 2:
11890 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11891 break;
11893 #endif
11894 #ifdef HAVE_consttable_4
11895 case 4:
11896 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11897 break;
11899 #endif
11900 #ifdef HAVE_consttable_8
11901 case 8:
11902 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11903 break;
11905 #endif
11906 #ifdef HAVE_consttable_16
11907 case 16:
11908 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11909 break;
11911 #endif
11912 default:
11913 gcc_unreachable ();
11917 nmp = mp->next;
11918 free (mp);
11921 minipool_vector_head = minipool_vector_tail = NULL;
11922 scan = emit_insn_after (gen_consttable_end (), scan);
11923 scan = emit_barrier_after (scan);
11926 /* Return the cost of forcibly inserting a barrier after INSN. */
11927 static int
11928 arm_barrier_cost (rtx insn)
11930 /* Basing the location of the pool on the loop depth is preferable,
11931 but at the moment, the basic block information seems to be
11932 corrupt by this stage of the compilation. */
11933 int base_cost = 50;
11934 rtx next = next_nonnote_insn (insn);
11936 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11937 base_cost -= 20;
11939 switch (GET_CODE (insn))
11941 case CODE_LABEL:
11942 /* It will always be better to place the table before the label, rather
11943 than after it. */
11944 return 50;
11946 case INSN:
11947 case CALL_INSN:
11948 return base_cost;
11950 case JUMP_INSN:
11951 return base_cost - 10;
11953 default:
11954 return base_cost + 10;
11958 /* Find the best place in the insn stream in the range
11959 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11960 Create the barrier by inserting a jump and add a new fix entry for
11961 it. */
11962 static Mfix *
11963 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11965 HOST_WIDE_INT count = 0;
11966 rtx barrier;
11967 rtx from = fix->insn;
11968 /* The instruction after which we will insert the jump. */
11969 rtx selected = NULL;
11970 int selected_cost;
11971 /* The address at which the jump instruction will be placed. */
11972 HOST_WIDE_INT selected_address;
11973 Mfix * new_fix;
11974 HOST_WIDE_INT max_count = max_address - fix->address;
11975 rtx label = gen_label_rtx ();
11977 selected_cost = arm_barrier_cost (from);
11978 selected_address = fix->address;
11980 while (from && count < max_count)
11982 rtx tmp;
11983 int new_cost;
11985 /* This code shouldn't have been called if there was a natural barrier
11986 within range. */
11987 gcc_assert (GET_CODE (from) != BARRIER);
11989 /* Count the length of this insn. */
11990 count += get_attr_length (from);
11992 /* If there is a jump table, add its length. */
11993 tmp = is_jump_table (from);
11994 if (tmp != NULL)
11996 count += get_jump_table_size (tmp);
11998 /* Jump tables aren't in a basic block, so base the cost on
11999 the dispatch insn. If we select this location, we will
12000 still put the pool after the table. */
12001 new_cost = arm_barrier_cost (from);
12003 if (count < max_count
12004 && (!selected || new_cost <= selected_cost))
12006 selected = tmp;
12007 selected_cost = new_cost;
12008 selected_address = fix->address + count;
12011 /* Continue after the dispatch table. */
12012 from = NEXT_INSN (tmp);
12013 continue;
12016 new_cost = arm_barrier_cost (from);
12018 if (count < max_count
12019 && (!selected || new_cost <= selected_cost))
12021 selected = from;
12022 selected_cost = new_cost;
12023 selected_address = fix->address + count;
12026 from = NEXT_INSN (from);
12029 /* Make sure that we found a place to insert the jump. */
12030 gcc_assert (selected);
12032 /* Make sure we do not split a call and its corresponding
12033 CALL_ARG_LOCATION note. */
12034 if (CALL_P (selected))
12036 rtx next = NEXT_INSN (selected);
12037 if (next && NOTE_P (next)
12038 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12039 selected = next;
12042 /* Create a new JUMP_INSN that branches around a barrier. */
12043 from = emit_jump_insn_after (gen_jump (label), selected);
12044 JUMP_LABEL (from) = label;
12045 barrier = emit_barrier_after (from);
12046 emit_label_after (label, barrier);
12048 /* Create a minipool barrier entry for the new barrier. */
12049 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12050 new_fix->insn = barrier;
12051 new_fix->address = selected_address;
12052 new_fix->next = fix->next;
12053 fix->next = new_fix;
12055 return new_fix;
12058 /* Record that there is a natural barrier in the insn stream at
12059 ADDRESS. */
12060 static void
12061 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12063 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12065 fix->insn = insn;
12066 fix->address = address;
12068 fix->next = NULL;
12069 if (minipool_fix_head != NULL)
12070 minipool_fix_tail->next = fix;
12071 else
12072 minipool_fix_head = fix;
12074 minipool_fix_tail = fix;
12077 /* Record INSN, which will need fixing up to load a value from the
12078 minipool. ADDRESS is the offset of the insn since the start of the
12079 function; LOC is a pointer to the part of the insn which requires
12080 fixing; VALUE is the constant that must be loaded, which is of type
12081 MODE. */
12082 static void
12083 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12084 enum machine_mode mode, rtx value)
12086 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12088 fix->insn = insn;
12089 fix->address = address;
12090 fix->loc = loc;
12091 fix->mode = mode;
12092 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12093 fix->value = value;
12094 fix->forwards = get_attr_pool_range (insn);
12095 fix->backwards = get_attr_neg_pool_range (insn);
12096 fix->minipool = NULL;
12098 /* If an insn doesn't have a range defined for it, then it isn't
12099 expecting to be reworked by this code. Better to stop now than
12100 to generate duff assembly code. */
12101 gcc_assert (fix->forwards || fix->backwards);
12103 /* If an entry requires 8-byte alignment then assume all constant pools
12104 require 4 bytes of padding. Trying to do this later on a per-pool
12105 basis is awkward because existing pool entries have to be modified. */
12106 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12107 minipool_pad = 4;
12109 if (dump_file)
12111 fprintf (dump_file,
12112 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12113 GET_MODE_NAME (mode),
12114 INSN_UID (insn), (unsigned long) address,
12115 -1 * (long)fix->backwards, (long)fix->forwards);
12116 arm_print_value (dump_file, fix->value);
12117 fprintf (dump_file, "\n");
12120 /* Add it to the chain of fixes. */
12121 fix->next = NULL;
12123 if (minipool_fix_head != NULL)
12124 minipool_fix_tail->next = fix;
12125 else
12126 minipool_fix_head = fix;
12128 minipool_fix_tail = fix;
12131 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12132 Returns the number of insns needed, or 99 if we don't know how to
12133 do it. */
12135 arm_const_double_inline_cost (rtx val)
12137 rtx lowpart, highpart;
12138 enum machine_mode mode;
12140 mode = GET_MODE (val);
12142 if (mode == VOIDmode)
12143 mode = DImode;
12145 gcc_assert (GET_MODE_SIZE (mode) == 8);
12147 lowpart = gen_lowpart (SImode, val);
12148 highpart = gen_highpart_mode (SImode, mode, val);
12150 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12151 gcc_assert (GET_CODE (highpart) == CONST_INT);
12153 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12154 NULL_RTX, NULL_RTX, 0, 0)
12155 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12156 NULL_RTX, NULL_RTX, 0, 0));
12159 /* Return true if it is worthwhile to split a 64-bit constant into two
12160 32-bit operations. This is the case if optimizing for size, or
12161 if we have load delay slots, or if one 32-bit part can be done with
12162 a single data operation. */
12163 bool
12164 arm_const_double_by_parts (rtx val)
12166 enum machine_mode mode = GET_MODE (val);
12167 rtx part;
12169 if (optimize_size || arm_ld_sched)
12170 return true;
12172 if (mode == VOIDmode)
12173 mode = DImode;
12175 part = gen_highpart_mode (SImode, mode, val);
12177 gcc_assert (GET_CODE (part) == CONST_INT);
12179 if (const_ok_for_arm (INTVAL (part))
12180 || const_ok_for_arm (~INTVAL (part)))
12181 return true;
12183 part = gen_lowpart (SImode, val);
12185 gcc_assert (GET_CODE (part) == CONST_INT);
12187 if (const_ok_for_arm (INTVAL (part))
12188 || const_ok_for_arm (~INTVAL (part)))
12189 return true;
12191 return false;
12194 /* Return true if it is possible to inline both the high and low parts
12195 of a 64-bit constant into 32-bit data processing instructions. */
12196 bool
12197 arm_const_double_by_immediates (rtx val)
12199 enum machine_mode mode = GET_MODE (val);
12200 rtx part;
12202 if (mode == VOIDmode)
12203 mode = DImode;
12205 part = gen_highpart_mode (SImode, mode, val);
12207 gcc_assert (GET_CODE (part) == CONST_INT);
12209 if (!const_ok_for_arm (INTVAL (part)))
12210 return false;
12212 part = gen_lowpart (SImode, val);
12214 gcc_assert (GET_CODE (part) == CONST_INT);
12216 if (!const_ok_for_arm (INTVAL (part)))
12217 return false;
12219 return true;
12222 /* Scan INSN and note any of its operands that need fixing.
12223 If DO_PUSHES is false we do not actually push any of the fixups
12224 needed. The function returns TRUE if any fixups were needed/pushed.
12225 This is used by arm_memory_load_p() which needs to know about loads
12226 of constants that will be converted into minipool loads. */
12227 static bool
12228 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12230 bool result = false;
12231 int opno;
12233 extract_insn (insn);
12235 if (!constrain_operands (1))
12236 fatal_insn_not_found (insn);
12238 if (recog_data.n_alternatives == 0)
12239 return false;
12241 /* Fill in recog_op_alt with information about the constraints of
12242 this insn. */
12243 preprocess_constraints ();
12245 for (opno = 0; opno < recog_data.n_operands; opno++)
12247 /* Things we need to fix can only occur in inputs. */
12248 if (recog_data.operand_type[opno] != OP_IN)
12249 continue;
12251 /* If this alternative is a memory reference, then any mention
12252 of constants in this alternative is really to fool reload
12253 into allowing us to accept one there. We need to fix them up
12254 now so that we output the right code. */
12255 if (recog_op_alt[opno][which_alternative].memory_ok)
12257 rtx op = recog_data.operand[opno];
12259 if (CONSTANT_P (op))
12261 if (do_pushes)
12262 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12263 recog_data.operand_mode[opno], op);
12264 result = true;
12266 else if (GET_CODE (op) == MEM
12267 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12268 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12270 if (do_pushes)
12272 rtx cop = avoid_constant_pool_reference (op);
12274 /* Casting the address of something to a mode narrower
12275 than a word can cause avoid_constant_pool_reference()
12276 to return the pool reference itself. That's no good to
12277 us here. Lets just hope that we can use the
12278 constant pool value directly. */
12279 if (op == cop)
12280 cop = get_pool_constant (XEXP (op, 0));
12282 push_minipool_fix (insn, address,
12283 recog_data.operand_loc[opno],
12284 recog_data.operand_mode[opno], cop);
12287 result = true;
12292 return result;
12295 /* Convert instructions to their cc-clobbering variant if possible, since
12296 that allows us to use smaller encodings. */
12298 static void
12299 thumb2_reorg (void)
12301 basic_block bb;
12302 regset_head live;
12304 INIT_REG_SET (&live);
12306 /* We are freeing block_for_insn in the toplev to keep compatibility
12307 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12308 compute_bb_for_insn ();
12309 df_analyze ();
12311 FOR_EACH_BB (bb)
12313 rtx insn;
12315 COPY_REG_SET (&live, DF_LR_OUT (bb));
12316 df_simulate_initialize_backwards (bb, &live);
12317 FOR_BB_INSNS_REVERSE (bb, insn)
12319 if (NONJUMP_INSN_P (insn)
12320 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12322 rtx pat = PATTERN (insn);
12323 if (GET_CODE (pat) == SET
12324 && low_register_operand (XEXP (pat, 0), SImode)
12325 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12326 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12327 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12329 rtx dst = XEXP (pat, 0);
12330 rtx src = XEXP (pat, 1);
12331 rtx op0 = XEXP (src, 0);
12332 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12333 ? XEXP (src, 1) : NULL);
12335 if (rtx_equal_p (dst, op0)
12336 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12338 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12339 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12340 rtvec vec = gen_rtvec (2, pat, clobber);
12342 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12343 INSN_CODE (insn) = -1;
12345 /* We can also handle a commutative operation where the
12346 second operand matches the destination. */
12347 else if (op1 && rtx_equal_p (dst, op1))
12349 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12350 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12351 rtvec vec;
12353 src = copy_rtx (src);
12354 XEXP (src, 0) = op1;
12355 XEXP (src, 1) = op0;
12356 pat = gen_rtx_SET (VOIDmode, dst, src);
12357 vec = gen_rtvec (2, pat, clobber);
12358 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12359 INSN_CODE (insn) = -1;
12364 if (NONDEBUG_INSN_P (insn))
12365 df_simulate_one_insn_backwards (bb, insn, &live);
12369 CLEAR_REG_SET (&live);
12372 /* Gcc puts the pool in the wrong place for ARM, since we can only
12373 load addresses a limited distance around the pc. We do some
12374 special munging to move the constant pool values to the correct
12375 point in the code. */
12376 static void
12377 arm_reorg (void)
12379 rtx insn;
12380 HOST_WIDE_INT address = 0;
12381 Mfix * fix;
12383 if (TARGET_THUMB2)
12384 thumb2_reorg ();
12386 minipool_fix_head = minipool_fix_tail = NULL;
12388 /* The first insn must always be a note, or the code below won't
12389 scan it properly. */
12390 insn = get_insns ();
12391 gcc_assert (GET_CODE (insn) == NOTE);
12392 minipool_pad = 0;
12394 /* Scan all the insns and record the operands that will need fixing. */
12395 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12397 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12398 && (arm_cirrus_insn_p (insn)
12399 || GET_CODE (insn) == JUMP_INSN
12400 || arm_memory_load_p (insn)))
12401 cirrus_reorg (insn);
12403 if (GET_CODE (insn) == BARRIER)
12404 push_minipool_barrier (insn, address);
12405 else if (INSN_P (insn))
12407 rtx table;
12409 note_invalid_constants (insn, address, true);
12410 address += get_attr_length (insn);
12412 /* If the insn is a vector jump, add the size of the table
12413 and skip the table. */
12414 if ((table = is_jump_table (insn)) != NULL)
12416 address += get_jump_table_size (table);
12417 insn = table;
12422 fix = minipool_fix_head;
12424 /* Now scan the fixups and perform the required changes. */
12425 while (fix)
12427 Mfix * ftmp;
12428 Mfix * fdel;
12429 Mfix * last_added_fix;
12430 Mfix * last_barrier = NULL;
12431 Mfix * this_fix;
12433 /* Skip any further barriers before the next fix. */
12434 while (fix && GET_CODE (fix->insn) == BARRIER)
12435 fix = fix->next;
12437 /* No more fixes. */
12438 if (fix == NULL)
12439 break;
12441 last_added_fix = NULL;
12443 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12445 if (GET_CODE (ftmp->insn) == BARRIER)
12447 if (ftmp->address >= minipool_vector_head->max_address)
12448 break;
12450 last_barrier = ftmp;
12452 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12453 break;
12455 last_added_fix = ftmp; /* Keep track of the last fix added. */
12458 /* If we found a barrier, drop back to that; any fixes that we
12459 could have reached but come after the barrier will now go in
12460 the next mini-pool. */
12461 if (last_barrier != NULL)
12463 /* Reduce the refcount for those fixes that won't go into this
12464 pool after all. */
12465 for (fdel = last_barrier->next;
12466 fdel && fdel != ftmp;
12467 fdel = fdel->next)
12469 fdel->minipool->refcount--;
12470 fdel->minipool = NULL;
12473 ftmp = last_barrier;
12475 else
12477 /* ftmp is first fix that we can't fit into this pool and
12478 there no natural barriers that we could use. Insert a
12479 new barrier in the code somewhere between the previous
12480 fix and this one, and arrange to jump around it. */
12481 HOST_WIDE_INT max_address;
12483 /* The last item on the list of fixes must be a barrier, so
12484 we can never run off the end of the list of fixes without
12485 last_barrier being set. */
12486 gcc_assert (ftmp);
12488 max_address = minipool_vector_head->max_address;
12489 /* Check that there isn't another fix that is in range that
12490 we couldn't fit into this pool because the pool was
12491 already too large: we need to put the pool before such an
12492 instruction. The pool itself may come just after the
12493 fix because create_fix_barrier also allows space for a
12494 jump instruction. */
12495 if (ftmp->address < max_address)
12496 max_address = ftmp->address + 1;
12498 last_barrier = create_fix_barrier (last_added_fix, max_address);
12501 assign_minipool_offsets (last_barrier);
12503 while (ftmp)
12505 if (GET_CODE (ftmp->insn) != BARRIER
12506 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12507 == NULL))
12508 break;
12510 ftmp = ftmp->next;
12513 /* Scan over the fixes we have identified for this pool, fixing them
12514 up and adding the constants to the pool itself. */
12515 for (this_fix = fix; this_fix && ftmp != this_fix;
12516 this_fix = this_fix->next)
12517 if (GET_CODE (this_fix->insn) != BARRIER)
12519 rtx addr
12520 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12521 minipool_vector_label),
12522 this_fix->minipool->offset);
12523 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12526 dump_minipool (last_barrier->insn);
12527 fix = ftmp;
12530 /* From now on we must synthesize any constants that we can't handle
12531 directly. This can happen if the RTL gets split during final
12532 instruction generation. */
12533 after_arm_reorg = 1;
12535 /* Free the minipool memory. */
12536 obstack_free (&minipool_obstack, minipool_startobj);
12539 /* Routines to output assembly language. */
12541 /* If the rtx is the correct value then return the string of the number.
12542 In this way we can ensure that valid double constants are generated even
12543 when cross compiling. */
12544 const char *
12545 fp_immediate_constant (rtx x)
12547 REAL_VALUE_TYPE r;
12548 int i;
12550 if (!fp_consts_inited)
12551 init_fp_table ();
12553 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12554 for (i = 0; i < 8; i++)
12555 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12556 return strings_fp[i];
12558 gcc_unreachable ();
12561 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12562 static const char *
12563 fp_const_from_val (REAL_VALUE_TYPE *r)
12565 int i;
12567 if (!fp_consts_inited)
12568 init_fp_table ();
12570 for (i = 0; i < 8; i++)
12571 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12572 return strings_fp[i];
12574 gcc_unreachable ();
12577 /* Output the operands of a LDM/STM instruction to STREAM.
12578 MASK is the ARM register set mask of which only bits 0-15 are important.
12579 REG is the base register, either the frame pointer or the stack pointer,
12580 INSTR is the possibly suffixed load or store instruction.
12581 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12583 static void
12584 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12585 unsigned long mask, int rfe)
12587 unsigned i;
12588 bool not_first = FALSE;
12590 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12591 fputc ('\t', stream);
12592 asm_fprintf (stream, instr, reg);
12593 fputc ('{', stream);
12595 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12596 if (mask & (1 << i))
12598 if (not_first)
12599 fprintf (stream, ", ");
12601 asm_fprintf (stream, "%r", i);
12602 not_first = TRUE;
12605 if (rfe)
12606 fprintf (stream, "}^\n");
12607 else
12608 fprintf (stream, "}\n");
12612 /* Output a FLDMD instruction to STREAM.
12613 BASE if the register containing the address.
12614 REG and COUNT specify the register range.
12615 Extra registers may be added to avoid hardware bugs.
12617 We output FLDMD even for ARMv5 VFP implementations. Although
12618 FLDMD is technically not supported until ARMv6, it is believed
12619 that all VFP implementations support its use in this context. */
12621 static void
12622 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12624 int i;
12626 /* Workaround ARM10 VFPr1 bug. */
12627 if (count == 2 && !arm_arch6)
12629 if (reg == 15)
12630 reg--;
12631 count++;
12634 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12635 load into multiple parts if we have to handle more than 16 registers. */
12636 if (count > 16)
12638 vfp_output_fldmd (stream, base, reg, 16);
12639 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12640 return;
12643 fputc ('\t', stream);
12644 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12646 for (i = reg; i < reg + count; i++)
12648 if (i > reg)
12649 fputs (", ", stream);
12650 asm_fprintf (stream, "d%d", i);
12652 fputs ("}\n", stream);
12657 /* Output the assembly for a store multiple. */
12659 const char *
12660 vfp_output_fstmd (rtx * operands)
12662 char pattern[100];
12663 int p;
12664 int base;
12665 int i;
12667 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12668 p = strlen (pattern);
12670 gcc_assert (GET_CODE (operands[1]) == REG);
12672 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12673 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12675 p += sprintf (&pattern[p], ", d%d", base + i);
12677 strcpy (&pattern[p], "}");
12679 output_asm_insn (pattern, operands);
12680 return "";
12684 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12685 number of bytes pushed. */
12687 static int
12688 vfp_emit_fstmd (int base_reg, int count)
12690 rtx par;
12691 rtx dwarf;
12692 rtx tmp, reg;
12693 int i;
12695 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12696 register pairs are stored by a store multiple insn. We avoid this
12697 by pushing an extra pair. */
12698 if (count == 2 && !arm_arch6)
12700 if (base_reg == LAST_VFP_REGNUM - 3)
12701 base_reg -= 2;
12702 count++;
12705 /* FSTMD may not store more than 16 doubleword registers at once. Split
12706 larger stores into multiple parts (up to a maximum of two, in
12707 practice). */
12708 if (count > 16)
12710 int saved;
12711 /* NOTE: base_reg is an internal register number, so each D register
12712 counts as 2. */
12713 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12714 saved += vfp_emit_fstmd (base_reg, 16);
12715 return saved;
12718 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12719 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12721 reg = gen_rtx_REG (DFmode, base_reg);
12722 base_reg += 2;
12724 XVECEXP (par, 0, 0)
12725 = gen_rtx_SET (VOIDmode,
12726 gen_frame_mem
12727 (BLKmode,
12728 gen_rtx_PRE_MODIFY (Pmode,
12729 stack_pointer_rtx,
12730 plus_constant
12731 (stack_pointer_rtx,
12732 - (count * 8)))
12734 gen_rtx_UNSPEC (BLKmode,
12735 gen_rtvec (1, reg),
12736 UNSPEC_PUSH_MULT));
12738 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12739 plus_constant (stack_pointer_rtx, -(count * 8)));
12740 RTX_FRAME_RELATED_P (tmp) = 1;
12741 XVECEXP (dwarf, 0, 0) = tmp;
12743 tmp = gen_rtx_SET (VOIDmode,
12744 gen_frame_mem (DFmode, stack_pointer_rtx),
12745 reg);
12746 RTX_FRAME_RELATED_P (tmp) = 1;
12747 XVECEXP (dwarf, 0, 1) = tmp;
12749 for (i = 1; i < count; i++)
12751 reg = gen_rtx_REG (DFmode, base_reg);
12752 base_reg += 2;
12753 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12755 tmp = gen_rtx_SET (VOIDmode,
12756 gen_frame_mem (DFmode,
12757 plus_constant (stack_pointer_rtx,
12758 i * 8)),
12759 reg);
12760 RTX_FRAME_RELATED_P (tmp) = 1;
12761 XVECEXP (dwarf, 0, i + 1) = tmp;
12764 par = emit_insn (par);
12765 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12766 RTX_FRAME_RELATED_P (par) = 1;
12768 return count * 8;
12771 /* Emit a call instruction with pattern PAT. ADDR is the address of
12772 the call target. */
12774 void
12775 arm_emit_call_insn (rtx pat, rtx addr)
12777 rtx insn;
12779 insn = emit_call_insn (pat);
12781 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12782 If the call might use such an entry, add a use of the PIC register
12783 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12784 if (TARGET_VXWORKS_RTP
12785 && flag_pic
12786 && GET_CODE (addr) == SYMBOL_REF
12787 && (SYMBOL_REF_DECL (addr)
12788 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12789 : !SYMBOL_REF_LOCAL_P (addr)))
12791 require_pic_register ();
12792 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12796 /* Output a 'call' insn. */
12797 const char *
12798 output_call (rtx *operands)
12800 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12802 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12803 if (REGNO (operands[0]) == LR_REGNUM)
12805 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12806 output_asm_insn ("mov%?\t%0, %|lr", operands);
12809 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12811 if (TARGET_INTERWORK || arm_arch4t)
12812 output_asm_insn ("bx%?\t%0", operands);
12813 else
12814 output_asm_insn ("mov%?\t%|pc, %0", operands);
12816 return "";
12819 /* Output a 'call' insn that is a reference in memory. This is
12820 disabled for ARMv5 and we prefer a blx instead because otherwise
12821 there's a significant performance overhead. */
12822 const char *
12823 output_call_mem (rtx *operands)
12825 gcc_assert (!arm_arch5);
12826 if (TARGET_INTERWORK)
12828 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12829 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12830 output_asm_insn ("bx%?\t%|ip", operands);
12832 else if (regno_use_in (LR_REGNUM, operands[0]))
12834 /* LR is used in the memory address. We load the address in the
12835 first instruction. It's safe to use IP as the target of the
12836 load since the call will kill it anyway. */
12837 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12838 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12839 if (arm_arch4t)
12840 output_asm_insn ("bx%?\t%|ip", operands);
12841 else
12842 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12844 else
12846 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12847 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12850 return "";
12854 /* Output a move from arm registers to an fpa registers.
12855 OPERANDS[0] is an fpa register.
12856 OPERANDS[1] is the first registers of an arm register pair. */
12857 const char *
12858 output_mov_long_double_fpa_from_arm (rtx *operands)
12860 int arm_reg0 = REGNO (operands[1]);
12861 rtx ops[3];
12863 gcc_assert (arm_reg0 != IP_REGNUM);
12865 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12866 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12867 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12869 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12870 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12872 return "";
12875 /* Output a move from an fpa register to arm registers.
12876 OPERANDS[0] is the first registers of an arm register pair.
12877 OPERANDS[1] is an fpa register. */
12878 const char *
12879 output_mov_long_double_arm_from_fpa (rtx *operands)
12881 int arm_reg0 = REGNO (operands[0]);
12882 rtx ops[3];
12884 gcc_assert (arm_reg0 != IP_REGNUM);
12886 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12887 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12888 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12890 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12891 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12892 return "";
12895 /* Output a move from arm registers to arm registers of a long double
12896 OPERANDS[0] is the destination.
12897 OPERANDS[1] is the source. */
12898 const char *
12899 output_mov_long_double_arm_from_arm (rtx *operands)
12901 /* We have to be careful here because the two might overlap. */
12902 int dest_start = REGNO (operands[0]);
12903 int src_start = REGNO (operands[1]);
12904 rtx ops[2];
12905 int i;
12907 if (dest_start < src_start)
12909 for (i = 0; i < 3; i++)
12911 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12912 ops[1] = gen_rtx_REG (SImode, src_start + i);
12913 output_asm_insn ("mov%?\t%0, %1", ops);
12916 else
12918 for (i = 2; i >= 0; i--)
12920 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12921 ops[1] = gen_rtx_REG (SImode, src_start + i);
12922 output_asm_insn ("mov%?\t%0, %1", ops);
12926 return "";
12929 void
12930 arm_emit_movpair (rtx dest, rtx src)
12932 /* If the src is an immediate, simplify it. */
12933 if (CONST_INT_P (src))
12935 HOST_WIDE_INT val = INTVAL (src);
12936 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12937 if ((val >> 16) & 0x0000ffff)
12938 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12939 GEN_INT (16)),
12940 GEN_INT ((val >> 16) & 0x0000ffff));
12941 return;
12943 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12944 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12947 /* Output a move from arm registers to an fpa registers.
12948 OPERANDS[0] is an fpa register.
12949 OPERANDS[1] is the first registers of an arm register pair. */
12950 const char *
12951 output_mov_double_fpa_from_arm (rtx *operands)
12953 int arm_reg0 = REGNO (operands[1]);
12954 rtx ops[2];
12956 gcc_assert (arm_reg0 != IP_REGNUM);
12958 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12959 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12960 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12961 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12962 return "";
12965 /* Output a move from an fpa register to arm registers.
12966 OPERANDS[0] is the first registers of an arm register pair.
12967 OPERANDS[1] is an fpa register. */
12968 const char *
12969 output_mov_double_arm_from_fpa (rtx *operands)
12971 int arm_reg0 = REGNO (operands[0]);
12972 rtx ops[2];
12974 gcc_assert (arm_reg0 != IP_REGNUM);
12976 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12977 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12978 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12979 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12980 return "";
12983 /* Output a move between double words. It must be REG<-MEM
12984 or MEM<-REG. */
12985 const char *
12986 output_move_double (rtx *operands)
12988 enum rtx_code code0 = GET_CODE (operands[0]);
12989 enum rtx_code code1 = GET_CODE (operands[1]);
12990 rtx otherops[3];
12992 if (code0 == REG)
12994 unsigned int reg0 = REGNO (operands[0]);
12996 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12998 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13000 switch (GET_CODE (XEXP (operands[1], 0)))
13002 case REG:
13003 if (TARGET_LDRD
13004 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13005 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13006 else
13007 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13008 break;
13010 case PRE_INC:
13011 gcc_assert (TARGET_LDRD);
13012 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13013 break;
13015 case PRE_DEC:
13016 if (TARGET_LDRD)
13017 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13018 else
13019 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13020 break;
13022 case POST_INC:
13023 if (TARGET_LDRD)
13024 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13025 else
13026 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13027 break;
13029 case POST_DEC:
13030 gcc_assert (TARGET_LDRD);
13031 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13032 break;
13034 case PRE_MODIFY:
13035 case POST_MODIFY:
13036 /* Autoicrement addressing modes should never have overlapping
13037 base and destination registers, and overlapping index registers
13038 are already prohibited, so this doesn't need to worry about
13039 fix_cm3_ldrd. */
13040 otherops[0] = operands[0];
13041 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13042 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13044 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13046 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13048 /* Registers overlap so split out the increment. */
13049 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13050 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13052 else
13054 /* Use a single insn if we can.
13055 FIXME: IWMMXT allows offsets larger than ldrd can
13056 handle, fix these up with a pair of ldr. */
13057 if (TARGET_THUMB2
13058 || GET_CODE (otherops[2]) != CONST_INT
13059 || (INTVAL (otherops[2]) > -256
13060 && INTVAL (otherops[2]) < 256))
13061 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13062 else
13064 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13065 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13069 else
13071 /* Use a single insn if we can.
13072 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13073 fix these up with a pair of ldr. */
13074 if (TARGET_THUMB2
13075 || GET_CODE (otherops[2]) != CONST_INT
13076 || (INTVAL (otherops[2]) > -256
13077 && INTVAL (otherops[2]) < 256))
13078 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13079 else
13081 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13082 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13085 break;
13087 case LABEL_REF:
13088 case CONST:
13089 /* We might be able to use ldrd %0, %1 here. However the range is
13090 different to ldr/adr, and it is broken on some ARMv7-M
13091 implementations. */
13092 /* Use the second register of the pair to avoid problematic
13093 overlap. */
13094 otherops[1] = operands[1];
13095 output_asm_insn ("adr%?\t%0, %1", otherops);
13096 operands[1] = otherops[0];
13097 if (TARGET_LDRD)
13098 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13099 else
13100 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13101 break;
13103 /* ??? This needs checking for thumb2. */
13104 default:
13105 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13106 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13108 otherops[0] = operands[0];
13109 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13110 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13112 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13114 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13116 switch ((int) INTVAL (otherops[2]))
13118 case -8:
13119 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13120 return "";
13121 case -4:
13122 if (TARGET_THUMB2)
13123 break;
13124 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13125 return "";
13126 case 4:
13127 if (TARGET_THUMB2)
13128 break;
13129 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13130 return "";
13133 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13134 operands[1] = otherops[0];
13135 if (TARGET_LDRD
13136 && (GET_CODE (otherops[2]) == REG
13137 || TARGET_THUMB2
13138 || (GET_CODE (otherops[2]) == CONST_INT
13139 && INTVAL (otherops[2]) > -256
13140 && INTVAL (otherops[2]) < 256)))
13142 if (reg_overlap_mentioned_p (operands[0],
13143 otherops[2]))
13145 rtx tmp;
13146 /* Swap base and index registers over to
13147 avoid a conflict. */
13148 tmp = otherops[1];
13149 otherops[1] = otherops[2];
13150 otherops[2] = tmp;
13152 /* If both registers conflict, it will usually
13153 have been fixed by a splitter. */
13154 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13155 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13157 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13158 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13160 else
13162 otherops[0] = operands[0];
13163 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13165 return "";
13168 if (GET_CODE (otherops[2]) == CONST_INT)
13170 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13171 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13172 else
13173 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13175 else
13176 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13178 else
13179 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13181 if (TARGET_LDRD)
13182 return "ldr%(d%)\t%0, [%1]";
13184 return "ldm%(ia%)\t%1, %M0";
13186 else
13188 otherops[1] = adjust_address (operands[1], SImode, 4);
13189 /* Take care of overlapping base/data reg. */
13190 if (reg_mentioned_p (operands[0], operands[1]))
13192 output_asm_insn ("ldr%?\t%0, %1", otherops);
13193 output_asm_insn ("ldr%?\t%0, %1", operands);
13195 else
13197 output_asm_insn ("ldr%?\t%0, %1", operands);
13198 output_asm_insn ("ldr%?\t%0, %1", otherops);
13203 else
13205 /* Constraints should ensure this. */
13206 gcc_assert (code0 == MEM && code1 == REG);
13207 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13209 switch (GET_CODE (XEXP (operands[0], 0)))
13211 case REG:
13212 if (TARGET_LDRD)
13213 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13214 else
13215 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13216 break;
13218 case PRE_INC:
13219 gcc_assert (TARGET_LDRD);
13220 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13221 break;
13223 case PRE_DEC:
13224 if (TARGET_LDRD)
13225 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13226 else
13227 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13228 break;
13230 case POST_INC:
13231 if (TARGET_LDRD)
13232 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13233 else
13234 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13235 break;
13237 case POST_DEC:
13238 gcc_assert (TARGET_LDRD);
13239 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13240 break;
13242 case PRE_MODIFY:
13243 case POST_MODIFY:
13244 otherops[0] = operands[1];
13245 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13246 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13248 /* IWMMXT allows offsets larger than ldrd can handle,
13249 fix these up with a pair of ldr. */
13250 if (!TARGET_THUMB2
13251 && GET_CODE (otherops[2]) == CONST_INT
13252 && (INTVAL(otherops[2]) <= -256
13253 || INTVAL(otherops[2]) >= 256))
13255 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13257 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13258 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13260 else
13262 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13263 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13266 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13267 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13268 else
13269 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13270 break;
13272 case PLUS:
13273 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13274 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13276 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13278 case -8:
13279 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13280 return "";
13282 case -4:
13283 if (TARGET_THUMB2)
13284 break;
13285 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13286 return "";
13288 case 4:
13289 if (TARGET_THUMB2)
13290 break;
13291 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13292 return "";
13295 if (TARGET_LDRD
13296 && (GET_CODE (otherops[2]) == REG
13297 || TARGET_THUMB2
13298 || (GET_CODE (otherops[2]) == CONST_INT
13299 && INTVAL (otherops[2]) > -256
13300 && INTVAL (otherops[2]) < 256)))
13302 otherops[0] = operands[1];
13303 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13304 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13305 return "";
13307 /* Fall through */
13309 default:
13310 otherops[0] = adjust_address (operands[0], SImode, 4);
13311 otherops[1] = operands[1];
13312 output_asm_insn ("str%?\t%1, %0", operands);
13313 output_asm_insn ("str%?\t%H1, %0", otherops);
13317 return "";
13320 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13321 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13323 const char *
13324 output_move_quad (rtx *operands)
13326 if (REG_P (operands[0]))
13328 /* Load, or reg->reg move. */
13330 if (MEM_P (operands[1]))
13332 switch (GET_CODE (XEXP (operands[1], 0)))
13334 case REG:
13335 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13336 break;
13338 case LABEL_REF:
13339 case CONST:
13340 output_asm_insn ("adr%?\t%0, %1", operands);
13341 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13342 break;
13344 default:
13345 gcc_unreachable ();
13348 else
13350 rtx ops[2];
13351 int dest, src, i;
13353 gcc_assert (REG_P (operands[1]));
13355 dest = REGNO (operands[0]);
13356 src = REGNO (operands[1]);
13358 /* This seems pretty dumb, but hopefully GCC won't try to do it
13359 very often. */
13360 if (dest < src)
13361 for (i = 0; i < 4; i++)
13363 ops[0] = gen_rtx_REG (SImode, dest + i);
13364 ops[1] = gen_rtx_REG (SImode, src + i);
13365 output_asm_insn ("mov%?\t%0, %1", ops);
13367 else
13368 for (i = 3; i >= 0; i--)
13370 ops[0] = gen_rtx_REG (SImode, dest + i);
13371 ops[1] = gen_rtx_REG (SImode, src + i);
13372 output_asm_insn ("mov%?\t%0, %1", ops);
13376 else
13378 gcc_assert (MEM_P (operands[0]));
13379 gcc_assert (REG_P (operands[1]));
13380 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13382 switch (GET_CODE (XEXP (operands[0], 0)))
13384 case REG:
13385 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13386 break;
13388 default:
13389 gcc_unreachable ();
13393 return "";
13396 /* Output a VFP load or store instruction. */
13398 const char *
13399 output_move_vfp (rtx *operands)
13401 rtx reg, mem, addr, ops[2];
13402 int load = REG_P (operands[0]);
13403 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13404 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13405 const char *templ;
13406 char buff[50];
13407 enum machine_mode mode;
13409 reg = operands[!load];
13410 mem = operands[load];
13412 mode = GET_MODE (reg);
13414 gcc_assert (REG_P (reg));
13415 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13416 gcc_assert (mode == SFmode
13417 || mode == DFmode
13418 || mode == SImode
13419 || mode == DImode
13420 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13421 gcc_assert (MEM_P (mem));
13423 addr = XEXP (mem, 0);
13425 switch (GET_CODE (addr))
13427 case PRE_DEC:
13428 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13429 ops[0] = XEXP (addr, 0);
13430 ops[1] = reg;
13431 break;
13433 case POST_INC:
13434 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13435 ops[0] = XEXP (addr, 0);
13436 ops[1] = reg;
13437 break;
13439 default:
13440 templ = "f%s%c%%?\t%%%s0, %%1%s";
13441 ops[0] = reg;
13442 ops[1] = mem;
13443 break;
13446 sprintf (buff, templ,
13447 load ? "ld" : "st",
13448 dp ? 'd' : 's',
13449 dp ? "P" : "",
13450 integer_p ? "\t%@ int" : "");
13451 output_asm_insn (buff, ops);
13453 return "";
13456 /* Output a Neon quad-word load or store, or a load or store for
13457 larger structure modes.
13459 WARNING: The ordering of elements is weird in big-endian mode,
13460 because we use VSTM, as required by the EABI. GCC RTL defines
13461 element ordering based on in-memory order. This can be differ
13462 from the architectural ordering of elements within a NEON register.
13463 The intrinsics defined in arm_neon.h use the NEON register element
13464 ordering, not the GCC RTL element ordering.
13466 For example, the in-memory ordering of a big-endian a quadword
13467 vector with 16-bit elements when stored from register pair {d0,d1}
13468 will be (lowest address first, d0[N] is NEON register element N):
13470 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13472 When necessary, quadword registers (dN, dN+1) are moved to ARM
13473 registers from rN in the order:
13475 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13477 So that STM/LDM can be used on vectors in ARM registers, and the
13478 same memory layout will result as if VSTM/VLDM were used. */
13480 const char *
13481 output_move_neon (rtx *operands)
13483 rtx reg, mem, addr, ops[2];
13484 int regno, load = REG_P (operands[0]);
13485 const char *templ;
13486 char buff[50];
13487 enum machine_mode mode;
13489 reg = operands[!load];
13490 mem = operands[load];
13492 mode = GET_MODE (reg);
13494 gcc_assert (REG_P (reg));
13495 regno = REGNO (reg);
13496 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13497 || NEON_REGNO_OK_FOR_QUAD (regno));
13498 gcc_assert (VALID_NEON_DREG_MODE (mode)
13499 || VALID_NEON_QREG_MODE (mode)
13500 || VALID_NEON_STRUCT_MODE (mode));
13501 gcc_assert (MEM_P (mem));
13503 addr = XEXP (mem, 0);
13505 /* Strip off const from addresses like (const (plus (...))). */
13506 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13507 addr = XEXP (addr, 0);
13509 switch (GET_CODE (addr))
13511 case POST_INC:
13512 templ = "v%smia%%?\t%%0!, %%h1";
13513 ops[0] = XEXP (addr, 0);
13514 ops[1] = reg;
13515 break;
13517 case PRE_DEC:
13518 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13519 templ = "v%smdb%%?\t%%0!, %%h1";
13520 ops[0] = XEXP (addr, 0);
13521 ops[1] = reg;
13522 break;
13524 case POST_MODIFY:
13525 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13526 gcc_unreachable ();
13528 case LABEL_REF:
13529 case PLUS:
13531 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13532 int i;
13533 int overlap = -1;
13534 for (i = 0; i < nregs; i++)
13536 /* We're only using DImode here because it's a convenient size. */
13537 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13538 ops[1] = adjust_address (mem, DImode, 8 * i);
13539 if (reg_overlap_mentioned_p (ops[0], mem))
13541 gcc_assert (overlap == -1);
13542 overlap = i;
13544 else
13546 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13547 output_asm_insn (buff, ops);
13550 if (overlap != -1)
13552 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13553 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13554 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13555 output_asm_insn (buff, ops);
13558 return "";
13561 default:
13562 templ = "v%smia%%?\t%%m0, %%h1";
13563 ops[0] = mem;
13564 ops[1] = reg;
13567 sprintf (buff, templ, load ? "ld" : "st");
13568 output_asm_insn (buff, ops);
13570 return "";
13573 /* Compute and return the length of neon_mov<mode>, where <mode> is
13574 one of VSTRUCT modes: EI, OI, CI or XI. */
13576 arm_attr_length_move_neon (rtx insn)
13578 rtx reg, mem, addr;
13579 int load;
13580 enum machine_mode mode;
13582 extract_insn_cached (insn);
13584 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13586 mode = GET_MODE (recog_data.operand[0]);
13587 switch (mode)
13589 case EImode:
13590 case OImode:
13591 return 8;
13592 case CImode:
13593 return 12;
13594 case XImode:
13595 return 16;
13596 default:
13597 gcc_unreachable ();
13601 load = REG_P (recog_data.operand[0]);
13602 reg = recog_data.operand[!load];
13603 mem = recog_data.operand[load];
13605 gcc_assert (MEM_P (mem));
13607 mode = GET_MODE (reg);
13608 addr = XEXP (mem, 0);
13610 /* Strip off const from addresses like (const (plus (...))). */
13611 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13612 addr = XEXP (addr, 0);
13614 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13616 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13617 return insns * 4;
13619 else
13620 return 4;
13623 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13624 return zero. */
13627 arm_address_offset_is_imm (rtx insn)
13629 rtx mem, addr;
13631 extract_insn_cached (insn);
13633 if (REG_P (recog_data.operand[0]))
13634 return 0;
13636 mem = recog_data.operand[0];
13638 gcc_assert (MEM_P (mem));
13640 addr = XEXP (mem, 0);
13642 if (GET_CODE (addr) == REG
13643 || (GET_CODE (addr) == PLUS
13644 && GET_CODE (XEXP (addr, 0)) == REG
13645 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13646 return 1;
13647 else
13648 return 0;
13651 /* Output an ADD r, s, #n where n may be too big for one instruction.
13652 If adding zero to one register, output nothing. */
13653 const char *
13654 output_add_immediate (rtx *operands)
13656 HOST_WIDE_INT n = INTVAL (operands[2]);
13658 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13660 if (n < 0)
13661 output_multi_immediate (operands,
13662 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13663 -n);
13664 else
13665 output_multi_immediate (operands,
13666 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13670 return "";
13673 /* Output a multiple immediate operation.
13674 OPERANDS is the vector of operands referred to in the output patterns.
13675 INSTR1 is the output pattern to use for the first constant.
13676 INSTR2 is the output pattern to use for subsequent constants.
13677 IMMED_OP is the index of the constant slot in OPERANDS.
13678 N is the constant value. */
13679 static const char *
13680 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13681 int immed_op, HOST_WIDE_INT n)
13683 #if HOST_BITS_PER_WIDE_INT > 32
13684 n &= 0xffffffff;
13685 #endif
13687 if (n == 0)
13689 /* Quick and easy output. */
13690 operands[immed_op] = const0_rtx;
13691 output_asm_insn (instr1, operands);
13693 else
13695 int i;
13696 const char * instr = instr1;
13698 /* Note that n is never zero here (which would give no output). */
13699 for (i = 0; i < 32; i += 2)
13701 if (n & (3 << i))
13703 operands[immed_op] = GEN_INT (n & (255 << i));
13704 output_asm_insn (instr, operands);
13705 instr = instr2;
13706 i += 6;
13711 return "";
13714 /* Return the name of a shifter operation. */
13715 static const char *
13716 arm_shift_nmem(enum rtx_code code)
13718 switch (code)
13720 case ASHIFT:
13721 return ARM_LSL_NAME;
13723 case ASHIFTRT:
13724 return "asr";
13726 case LSHIFTRT:
13727 return "lsr";
13729 case ROTATERT:
13730 return "ror";
13732 default:
13733 abort();
13737 /* Return the appropriate ARM instruction for the operation code.
13738 The returned result should not be overwritten. OP is the rtx of the
13739 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13740 was shifted. */
13741 const char *
13742 arithmetic_instr (rtx op, int shift_first_arg)
13744 switch (GET_CODE (op))
13746 case PLUS:
13747 return "add";
13749 case MINUS:
13750 return shift_first_arg ? "rsb" : "sub";
13752 case IOR:
13753 return "orr";
13755 case XOR:
13756 return "eor";
13758 case AND:
13759 return "and";
13761 case ASHIFT:
13762 case ASHIFTRT:
13763 case LSHIFTRT:
13764 case ROTATERT:
13765 return arm_shift_nmem(GET_CODE(op));
13767 default:
13768 gcc_unreachable ();
13772 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13773 for the operation code. The returned result should not be overwritten.
13774 OP is the rtx code of the shift.
13775 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13776 shift. */
13777 static const char *
13778 shift_op (rtx op, HOST_WIDE_INT *amountp)
13780 const char * mnem;
13781 enum rtx_code code = GET_CODE (op);
13783 switch (GET_CODE (XEXP (op, 1)))
13785 case REG:
13786 case SUBREG:
13787 *amountp = -1;
13788 break;
13790 case CONST_INT:
13791 *amountp = INTVAL (XEXP (op, 1));
13792 break;
13794 default:
13795 gcc_unreachable ();
13798 switch (code)
13800 case ROTATE:
13801 gcc_assert (*amountp != -1);
13802 *amountp = 32 - *amountp;
13803 code = ROTATERT;
13805 /* Fall through. */
13807 case ASHIFT:
13808 case ASHIFTRT:
13809 case LSHIFTRT:
13810 case ROTATERT:
13811 mnem = arm_shift_nmem(code);
13812 break;
13814 case MULT:
13815 /* We never have to worry about the amount being other than a
13816 power of 2, since this case can never be reloaded from a reg. */
13817 gcc_assert (*amountp != -1);
13818 *amountp = int_log2 (*amountp);
13819 return ARM_LSL_NAME;
13821 default:
13822 gcc_unreachable ();
13825 if (*amountp != -1)
13827 /* This is not 100% correct, but follows from the desire to merge
13828 multiplication by a power of 2 with the recognizer for a
13829 shift. >=32 is not a valid shift for "lsl", so we must try and
13830 output a shift that produces the correct arithmetical result.
13831 Using lsr #32 is identical except for the fact that the carry bit
13832 is not set correctly if we set the flags; but we never use the
13833 carry bit from such an operation, so we can ignore that. */
13834 if (code == ROTATERT)
13835 /* Rotate is just modulo 32. */
13836 *amountp &= 31;
13837 else if (*amountp != (*amountp & 31))
13839 if (code == ASHIFT)
13840 mnem = "lsr";
13841 *amountp = 32;
13844 /* Shifts of 0 are no-ops. */
13845 if (*amountp == 0)
13846 return NULL;
13849 return mnem;
13852 /* Obtain the shift from the POWER of two. */
13854 static HOST_WIDE_INT
13855 int_log2 (HOST_WIDE_INT power)
13857 HOST_WIDE_INT shift = 0;
13859 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13861 gcc_assert (shift <= 31);
13862 shift++;
13865 return shift;
13868 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13869 because /bin/as is horribly restrictive. The judgement about
13870 whether or not each character is 'printable' (and can be output as
13871 is) or not (and must be printed with an octal escape) must be made
13872 with reference to the *host* character set -- the situation is
13873 similar to that discussed in the comments above pp_c_char in
13874 c-pretty-print.c. */
13876 #define MAX_ASCII_LEN 51
13878 void
13879 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13881 int i;
13882 int len_so_far = 0;
13884 fputs ("\t.ascii\t\"", stream);
13886 for (i = 0; i < len; i++)
13888 int c = p[i];
13890 if (len_so_far >= MAX_ASCII_LEN)
13892 fputs ("\"\n\t.ascii\t\"", stream);
13893 len_so_far = 0;
13896 if (ISPRINT (c))
13898 if (c == '\\' || c == '\"')
13900 putc ('\\', stream);
13901 len_so_far++;
13903 putc (c, stream);
13904 len_so_far++;
13906 else
13908 fprintf (stream, "\\%03o", c);
13909 len_so_far += 4;
13913 fputs ("\"\n", stream);
13916 /* Compute the register save mask for registers 0 through 12
13917 inclusive. This code is used by arm_compute_save_reg_mask. */
13919 static unsigned long
13920 arm_compute_save_reg0_reg12_mask (void)
13922 unsigned long func_type = arm_current_func_type ();
13923 unsigned long save_reg_mask = 0;
13924 unsigned int reg;
13926 if (IS_INTERRUPT (func_type))
13928 unsigned int max_reg;
13929 /* Interrupt functions must not corrupt any registers,
13930 even call clobbered ones. If this is a leaf function
13931 we can just examine the registers used by the RTL, but
13932 otherwise we have to assume that whatever function is
13933 called might clobber anything, and so we have to save
13934 all the call-clobbered registers as well. */
13935 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13936 /* FIQ handlers have registers r8 - r12 banked, so
13937 we only need to check r0 - r7, Normal ISRs only
13938 bank r14 and r15, so we must check up to r12.
13939 r13 is the stack pointer which is always preserved,
13940 so we do not need to consider it here. */
13941 max_reg = 7;
13942 else
13943 max_reg = 12;
13945 for (reg = 0; reg <= max_reg; reg++)
13946 if (df_regs_ever_live_p (reg)
13947 || (! current_function_is_leaf && call_used_regs[reg]))
13948 save_reg_mask |= (1 << reg);
13950 /* Also save the pic base register if necessary. */
13951 if (flag_pic
13952 && !TARGET_SINGLE_PIC_BASE
13953 && arm_pic_register != INVALID_REGNUM
13954 && crtl->uses_pic_offset_table)
13955 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13957 else if (IS_VOLATILE(func_type))
13959 /* For noreturn functions we historically omitted register saves
13960 altogether. However this really messes up debugging. As a
13961 compromise save just the frame pointers. Combined with the link
13962 register saved elsewhere this should be sufficient to get
13963 a backtrace. */
13964 if (frame_pointer_needed)
13965 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13966 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13967 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13968 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13969 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13971 else
13973 /* In the normal case we only need to save those registers
13974 which are call saved and which are used by this function. */
13975 for (reg = 0; reg <= 11; reg++)
13976 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13977 save_reg_mask |= (1 << reg);
13979 /* Handle the frame pointer as a special case. */
13980 if (frame_pointer_needed)
13981 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13983 /* If we aren't loading the PIC register,
13984 don't stack it even though it may be live. */
13985 if (flag_pic
13986 && !TARGET_SINGLE_PIC_BASE
13987 && arm_pic_register != INVALID_REGNUM
13988 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13989 || crtl->uses_pic_offset_table))
13990 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13992 /* The prologue will copy SP into R0, so save it. */
13993 if (IS_STACKALIGN (func_type))
13994 save_reg_mask |= 1;
13997 /* Save registers so the exception handler can modify them. */
13998 if (crtl->calls_eh_return)
14000 unsigned int i;
14002 for (i = 0; ; i++)
14004 reg = EH_RETURN_DATA_REGNO (i);
14005 if (reg == INVALID_REGNUM)
14006 break;
14007 save_reg_mask |= 1 << reg;
14011 return save_reg_mask;
14015 /* Compute the number of bytes used to store the static chain register on the
14016 stack, above the stack frame. We need to know this accurately to get the
14017 alignment of the rest of the stack frame correct. */
14019 static int arm_compute_static_chain_stack_bytes (void)
14021 unsigned long func_type = arm_current_func_type ();
14022 int static_chain_stack_bytes = 0;
14024 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
14025 IS_NESTED (func_type) &&
14026 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
14027 static_chain_stack_bytes = 4;
14029 return static_chain_stack_bytes;
14033 /* Compute a bit mask of which registers need to be
14034 saved on the stack for the current function.
14035 This is used by arm_get_frame_offsets, which may add extra registers. */
14037 static unsigned long
14038 arm_compute_save_reg_mask (void)
14040 unsigned int save_reg_mask = 0;
14041 unsigned long func_type = arm_current_func_type ();
14042 unsigned int reg;
14044 if (IS_NAKED (func_type))
14045 /* This should never really happen. */
14046 return 0;
14048 /* If we are creating a stack frame, then we must save the frame pointer,
14049 IP (which will hold the old stack pointer), LR and the PC. */
14050 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14051 save_reg_mask |=
14052 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
14053 | (1 << IP_REGNUM)
14054 | (1 << LR_REGNUM)
14055 | (1 << PC_REGNUM);
14057 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
14059 /* Decide if we need to save the link register.
14060 Interrupt routines have their own banked link register,
14061 so they never need to save it.
14062 Otherwise if we do not use the link register we do not need to save
14063 it. If we are pushing other registers onto the stack however, we
14064 can save an instruction in the epilogue by pushing the link register
14065 now and then popping it back into the PC. This incurs extra memory
14066 accesses though, so we only do it when optimizing for size, and only
14067 if we know that we will not need a fancy return sequence. */
14068 if (df_regs_ever_live_p (LR_REGNUM)
14069 || (save_reg_mask
14070 && optimize_size
14071 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14072 && !crtl->calls_eh_return))
14073 save_reg_mask |= 1 << LR_REGNUM;
14075 if (cfun->machine->lr_save_eliminated)
14076 save_reg_mask &= ~ (1 << LR_REGNUM);
14078 if (TARGET_REALLY_IWMMXT
14079 && ((bit_count (save_reg_mask)
14080 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14081 arm_compute_static_chain_stack_bytes())
14082 ) % 2) != 0)
14084 /* The total number of registers that are going to be pushed
14085 onto the stack is odd. We need to ensure that the stack
14086 is 64-bit aligned before we start to save iWMMXt registers,
14087 and also before we start to create locals. (A local variable
14088 might be a double or long long which we will load/store using
14089 an iWMMXt instruction). Therefore we need to push another
14090 ARM register, so that the stack will be 64-bit aligned. We
14091 try to avoid using the arg registers (r0 -r3) as they might be
14092 used to pass values in a tail call. */
14093 for (reg = 4; reg <= 12; reg++)
14094 if ((save_reg_mask & (1 << reg)) == 0)
14095 break;
14097 if (reg <= 12)
14098 save_reg_mask |= (1 << reg);
14099 else
14101 cfun->machine->sibcall_blocked = 1;
14102 save_reg_mask |= (1 << 3);
14106 /* We may need to push an additional register for use initializing the
14107 PIC base register. */
14108 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14109 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14111 reg = thumb_find_work_register (1 << 4);
14112 if (!call_used_regs[reg])
14113 save_reg_mask |= (1 << reg);
14116 return save_reg_mask;
14120 /* Compute a bit mask of which registers need to be
14121 saved on the stack for the current function. */
14122 static unsigned long
14123 thumb1_compute_save_reg_mask (void)
14125 unsigned long mask;
14126 unsigned reg;
14128 mask = 0;
14129 for (reg = 0; reg < 12; reg ++)
14130 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14131 mask |= 1 << reg;
14133 if (flag_pic
14134 && !TARGET_SINGLE_PIC_BASE
14135 && arm_pic_register != INVALID_REGNUM
14136 && crtl->uses_pic_offset_table)
14137 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14139 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14140 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14141 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14143 /* LR will also be pushed if any lo regs are pushed. */
14144 if (mask & 0xff || thumb_force_lr_save ())
14145 mask |= (1 << LR_REGNUM);
14147 /* Make sure we have a low work register if we need one.
14148 We will need one if we are going to push a high register,
14149 but we are not currently intending to push a low register. */
14150 if ((mask & 0xff) == 0
14151 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14153 /* Use thumb_find_work_register to choose which register
14154 we will use. If the register is live then we will
14155 have to push it. Use LAST_LO_REGNUM as our fallback
14156 choice for the register to select. */
14157 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14158 /* Make sure the register returned by thumb_find_work_register is
14159 not part of the return value. */
14160 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14161 reg = LAST_LO_REGNUM;
14163 if (! call_used_regs[reg])
14164 mask |= 1 << reg;
14167 /* The 504 below is 8 bytes less than 512 because there are two possible
14168 alignment words. We can't tell here if they will be present or not so we
14169 have to play it safe and assume that they are. */
14170 if ((CALLER_INTERWORKING_SLOT_SIZE +
14171 ROUND_UP_WORD (get_frame_size ()) +
14172 crtl->outgoing_args_size) >= 504)
14174 /* This is the same as the code in thumb1_expand_prologue() which
14175 determines which register to use for stack decrement. */
14176 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14177 if (mask & (1 << reg))
14178 break;
14180 if (reg > LAST_LO_REGNUM)
14182 /* Make sure we have a register available for stack decrement. */
14183 mask |= 1 << LAST_LO_REGNUM;
14187 return mask;
14191 /* Return the number of bytes required to save VFP registers. */
14192 static int
14193 arm_get_vfp_saved_size (void)
14195 unsigned int regno;
14196 int count;
14197 int saved;
14199 saved = 0;
14200 /* Space for saved VFP registers. */
14201 if (TARGET_HARD_FLOAT && TARGET_VFP)
14203 count = 0;
14204 for (regno = FIRST_VFP_REGNUM;
14205 regno < LAST_VFP_REGNUM;
14206 regno += 2)
14208 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14209 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14211 if (count > 0)
14213 /* Workaround ARM10 VFPr1 bug. */
14214 if (count == 2 && !arm_arch6)
14215 count++;
14216 saved += count * 8;
14218 count = 0;
14220 else
14221 count++;
14223 if (count > 0)
14225 if (count == 2 && !arm_arch6)
14226 count++;
14227 saved += count * 8;
14230 return saved;
14234 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14235 everything bar the final return instruction. */
14236 const char *
14237 output_return_instruction (rtx operand, int really_return, int reverse)
14239 char conditional[10];
14240 char instr[100];
14241 unsigned reg;
14242 unsigned long live_regs_mask;
14243 unsigned long func_type;
14244 arm_stack_offsets *offsets;
14246 func_type = arm_current_func_type ();
14248 if (IS_NAKED (func_type))
14249 return "";
14251 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14253 /* If this function was declared non-returning, and we have
14254 found a tail call, then we have to trust that the called
14255 function won't return. */
14256 if (really_return)
14258 rtx ops[2];
14260 /* Otherwise, trap an attempted return by aborting. */
14261 ops[0] = operand;
14262 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14263 : "abort");
14264 assemble_external_libcall (ops[1]);
14265 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14268 return "";
14271 gcc_assert (!cfun->calls_alloca || really_return);
14273 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14275 cfun->machine->return_used_this_function = 1;
14277 offsets = arm_get_frame_offsets ();
14278 live_regs_mask = offsets->saved_regs_mask;
14280 if (live_regs_mask)
14282 const char * return_reg;
14284 /* If we do not have any special requirements for function exit
14285 (e.g. interworking) then we can load the return address
14286 directly into the PC. Otherwise we must load it into LR. */
14287 if (really_return
14288 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14289 return_reg = reg_names[PC_REGNUM];
14290 else
14291 return_reg = reg_names[LR_REGNUM];
14293 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14295 /* There are three possible reasons for the IP register
14296 being saved. 1) a stack frame was created, in which case
14297 IP contains the old stack pointer, or 2) an ISR routine
14298 corrupted it, or 3) it was saved to align the stack on
14299 iWMMXt. In case 1, restore IP into SP, otherwise just
14300 restore IP. */
14301 if (frame_pointer_needed)
14303 live_regs_mask &= ~ (1 << IP_REGNUM);
14304 live_regs_mask |= (1 << SP_REGNUM);
14306 else
14307 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14310 /* On some ARM architectures it is faster to use LDR rather than
14311 LDM to load a single register. On other architectures, the
14312 cost is the same. In 26 bit mode, or for exception handlers,
14313 we have to use LDM to load the PC so that the CPSR is also
14314 restored. */
14315 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14316 if (live_regs_mask == (1U << reg))
14317 break;
14319 if (reg <= LAST_ARM_REGNUM
14320 && (reg != LR_REGNUM
14321 || ! really_return
14322 || ! IS_INTERRUPT (func_type)))
14324 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14325 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14327 else
14329 char *p;
14330 int first = 1;
14332 /* Generate the load multiple instruction to restore the
14333 registers. Note we can get here, even if
14334 frame_pointer_needed is true, but only if sp already
14335 points to the base of the saved core registers. */
14336 if (live_regs_mask & (1 << SP_REGNUM))
14338 unsigned HOST_WIDE_INT stack_adjust;
14340 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14341 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14343 if (stack_adjust && arm_arch5 && TARGET_ARM)
14344 if (TARGET_UNIFIED_ASM)
14345 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14346 else
14347 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14348 else
14350 /* If we can't use ldmib (SA110 bug),
14351 then try to pop r3 instead. */
14352 if (stack_adjust)
14353 live_regs_mask |= 1 << 3;
14355 if (TARGET_UNIFIED_ASM)
14356 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14357 else
14358 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14361 else
14362 if (TARGET_UNIFIED_ASM)
14363 sprintf (instr, "pop%s\t{", conditional);
14364 else
14365 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14367 p = instr + strlen (instr);
14369 for (reg = 0; reg <= SP_REGNUM; reg++)
14370 if (live_regs_mask & (1 << reg))
14372 int l = strlen (reg_names[reg]);
14374 if (first)
14375 first = 0;
14376 else
14378 memcpy (p, ", ", 2);
14379 p += 2;
14382 memcpy (p, "%|", 2);
14383 memcpy (p + 2, reg_names[reg], l);
14384 p += l + 2;
14387 if (live_regs_mask & (1 << LR_REGNUM))
14389 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14390 /* If returning from an interrupt, restore the CPSR. */
14391 if (IS_INTERRUPT (func_type))
14392 strcat (p, "^");
14394 else
14395 strcpy (p, "}");
14398 output_asm_insn (instr, & operand);
14400 /* See if we need to generate an extra instruction to
14401 perform the actual function return. */
14402 if (really_return
14403 && func_type != ARM_FT_INTERWORKED
14404 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14406 /* The return has already been handled
14407 by loading the LR into the PC. */
14408 really_return = 0;
14412 if (really_return)
14414 switch ((int) ARM_FUNC_TYPE (func_type))
14416 case ARM_FT_ISR:
14417 case ARM_FT_FIQ:
14418 /* ??? This is wrong for unified assembly syntax. */
14419 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14420 break;
14422 case ARM_FT_INTERWORKED:
14423 sprintf (instr, "bx%s\t%%|lr", conditional);
14424 break;
14426 case ARM_FT_EXCEPTION:
14427 /* ??? This is wrong for unified assembly syntax. */
14428 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14429 break;
14431 default:
14432 /* Use bx if it's available. */
14433 if (arm_arch5 || arm_arch4t)
14434 sprintf (instr, "bx%s\t%%|lr", conditional);
14435 else
14436 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14437 break;
14440 output_asm_insn (instr, & operand);
14443 return "";
14446 /* Write the function name into the code section, directly preceding
14447 the function prologue.
14449 Code will be output similar to this:
14451 .ascii "arm_poke_function_name", 0
14452 .align
14454 .word 0xff000000 + (t1 - t0)
14455 arm_poke_function_name
14456 mov ip, sp
14457 stmfd sp!, {fp, ip, lr, pc}
14458 sub fp, ip, #4
14460 When performing a stack backtrace, code can inspect the value
14461 of 'pc' stored at 'fp' + 0. If the trace function then looks
14462 at location pc - 12 and the top 8 bits are set, then we know
14463 that there is a function name embedded immediately preceding this
14464 location and has length ((pc[-3]) & 0xff000000).
14466 We assume that pc is declared as a pointer to an unsigned long.
14468 It is of no benefit to output the function name if we are assembling
14469 a leaf function. These function types will not contain a stack
14470 backtrace structure, therefore it is not possible to determine the
14471 function name. */
14472 void
14473 arm_poke_function_name (FILE *stream, const char *name)
14475 unsigned long alignlength;
14476 unsigned long length;
14477 rtx x;
14479 length = strlen (name) + 1;
14480 alignlength = ROUND_UP_WORD (length);
14482 ASM_OUTPUT_ASCII (stream, name, length);
14483 ASM_OUTPUT_ALIGN (stream, 2);
14484 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14485 assemble_aligned_integer (UNITS_PER_WORD, x);
14488 /* Place some comments into the assembler stream
14489 describing the current function. */
14490 static void
14491 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14493 unsigned long func_type;
14495 if (TARGET_THUMB1)
14497 thumb1_output_function_prologue (f, frame_size);
14498 return;
14501 /* Sanity check. */
14502 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14504 func_type = arm_current_func_type ();
14506 switch ((int) ARM_FUNC_TYPE (func_type))
14508 default:
14509 case ARM_FT_NORMAL:
14510 break;
14511 case ARM_FT_INTERWORKED:
14512 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14513 break;
14514 case ARM_FT_ISR:
14515 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14516 break;
14517 case ARM_FT_FIQ:
14518 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14519 break;
14520 case ARM_FT_EXCEPTION:
14521 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14522 break;
14525 if (IS_NAKED (func_type))
14526 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14528 if (IS_VOLATILE (func_type))
14529 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14531 if (IS_NESTED (func_type))
14532 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14533 if (IS_STACKALIGN (func_type))
14534 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14536 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14537 crtl->args.size,
14538 crtl->args.pretend_args_size, frame_size);
14540 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14541 frame_pointer_needed,
14542 cfun->machine->uses_anonymous_args);
14544 if (cfun->machine->lr_save_eliminated)
14545 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14547 if (crtl->calls_eh_return)
14548 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14552 const char *
14553 arm_output_epilogue (rtx sibling)
14555 int reg;
14556 unsigned long saved_regs_mask;
14557 unsigned long func_type;
14558 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14559 frame that is $fp + 4 for a non-variadic function. */
14560 int floats_offset = 0;
14561 rtx operands[3];
14562 FILE * f = asm_out_file;
14563 unsigned int lrm_count = 0;
14564 int really_return = (sibling == NULL);
14565 int start_reg;
14566 arm_stack_offsets *offsets;
14568 /* If we have already generated the return instruction
14569 then it is futile to generate anything else. */
14570 if (use_return_insn (FALSE, sibling) &&
14571 (cfun->machine->return_used_this_function != 0))
14572 return "";
14574 func_type = arm_current_func_type ();
14576 if (IS_NAKED (func_type))
14577 /* Naked functions don't have epilogues. */
14578 return "";
14580 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14582 rtx op;
14584 /* A volatile function should never return. Call abort. */
14585 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14586 assemble_external_libcall (op);
14587 output_asm_insn ("bl\t%a0", &op);
14589 return "";
14592 /* If we are throwing an exception, then we really must be doing a
14593 return, so we can't tail-call. */
14594 gcc_assert (!crtl->calls_eh_return || really_return);
14596 offsets = arm_get_frame_offsets ();
14597 saved_regs_mask = offsets->saved_regs_mask;
14599 if (TARGET_IWMMXT)
14600 lrm_count = bit_count (saved_regs_mask);
14602 floats_offset = offsets->saved_args;
14603 /* Compute how far away the floats will be. */
14604 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14605 if (saved_regs_mask & (1 << reg))
14606 floats_offset += 4;
14608 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14610 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14611 int vfp_offset = offsets->frame;
14613 if (TARGET_FPA_EMU2)
14615 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14616 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14618 floats_offset += 12;
14619 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14620 reg, FP_REGNUM, floats_offset - vfp_offset);
14623 else
14625 start_reg = LAST_FPA_REGNUM;
14627 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14629 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14631 floats_offset += 12;
14633 /* We can't unstack more than four registers at once. */
14634 if (start_reg - reg == 3)
14636 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14637 reg, FP_REGNUM, floats_offset - vfp_offset);
14638 start_reg = reg - 1;
14641 else
14643 if (reg != start_reg)
14644 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14645 reg + 1, start_reg - reg,
14646 FP_REGNUM, floats_offset - vfp_offset);
14647 start_reg = reg - 1;
14651 /* Just in case the last register checked also needs unstacking. */
14652 if (reg != start_reg)
14653 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14654 reg + 1, start_reg - reg,
14655 FP_REGNUM, floats_offset - vfp_offset);
14658 if (TARGET_HARD_FLOAT && TARGET_VFP)
14660 int saved_size;
14662 /* The fldmd insns do not have base+offset addressing
14663 modes, so we use IP to hold the address. */
14664 saved_size = arm_get_vfp_saved_size ();
14666 if (saved_size > 0)
14668 floats_offset += saved_size;
14669 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14670 FP_REGNUM, floats_offset - vfp_offset);
14672 start_reg = FIRST_VFP_REGNUM;
14673 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14675 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14676 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14678 if (start_reg != reg)
14679 vfp_output_fldmd (f, IP_REGNUM,
14680 (start_reg - FIRST_VFP_REGNUM) / 2,
14681 (reg - start_reg) / 2);
14682 start_reg = reg + 2;
14685 if (start_reg != reg)
14686 vfp_output_fldmd (f, IP_REGNUM,
14687 (start_reg - FIRST_VFP_REGNUM) / 2,
14688 (reg - start_reg) / 2);
14691 if (TARGET_IWMMXT)
14693 /* The frame pointer is guaranteed to be non-double-word aligned.
14694 This is because it is set to (old_stack_pointer - 4) and the
14695 old_stack_pointer was double word aligned. Thus the offset to
14696 the iWMMXt registers to be loaded must also be non-double-word
14697 sized, so that the resultant address *is* double-word aligned.
14698 We can ignore floats_offset since that was already included in
14699 the live_regs_mask. */
14700 lrm_count += (lrm_count % 2 ? 2 : 1);
14702 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14703 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14705 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14706 reg, FP_REGNUM, lrm_count * 4);
14707 lrm_count += 2;
14711 /* saved_regs_mask should contain the IP, which at the time of stack
14712 frame generation actually contains the old stack pointer. So a
14713 quick way to unwind the stack is just pop the IP register directly
14714 into the stack pointer. */
14715 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14716 saved_regs_mask &= ~ (1 << IP_REGNUM);
14717 saved_regs_mask |= (1 << SP_REGNUM);
14719 /* There are two registers left in saved_regs_mask - LR and PC. We
14720 only need to restore the LR register (the return address), but to
14721 save time we can load it directly into the PC, unless we need a
14722 special function exit sequence, or we are not really returning. */
14723 if (really_return
14724 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14725 && !crtl->calls_eh_return)
14726 /* Delete the LR from the register mask, so that the LR on
14727 the stack is loaded into the PC in the register mask. */
14728 saved_regs_mask &= ~ (1 << LR_REGNUM);
14729 else
14730 saved_regs_mask &= ~ (1 << PC_REGNUM);
14732 /* We must use SP as the base register, because SP is one of the
14733 registers being restored. If an interrupt or page fault
14734 happens in the ldm instruction, the SP might or might not
14735 have been restored. That would be bad, as then SP will no
14736 longer indicate the safe area of stack, and we can get stack
14737 corruption. Using SP as the base register means that it will
14738 be reset correctly to the original value, should an interrupt
14739 occur. If the stack pointer already points at the right
14740 place, then omit the subtraction. */
14741 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14742 || cfun->calls_alloca)
14743 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14744 4 * bit_count (saved_regs_mask));
14745 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14747 if (IS_INTERRUPT (func_type))
14748 /* Interrupt handlers will have pushed the
14749 IP onto the stack, so restore it now. */
14750 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14752 else
14754 /* This branch is executed for ARM mode (non-apcs frames) and
14755 Thumb-2 mode. Frame layout is essentially the same for those
14756 cases, except that in ARM mode frame pointer points to the
14757 first saved register, while in Thumb-2 mode the frame pointer points
14758 to the last saved register.
14760 It is possible to make frame pointer point to last saved
14761 register in both cases, and remove some conditionals below.
14762 That means that fp setup in prologue would be just "mov fp, sp"
14763 and sp restore in epilogue would be just "mov sp, fp", whereas
14764 now we have to use add/sub in those cases. However, the value
14765 of that would be marginal, as both mov and add/sub are 32-bit
14766 in ARM mode, and it would require extra conditionals
14767 in arm_expand_prologue to distingish ARM-apcs-frame case
14768 (where frame pointer is required to point at first register)
14769 and ARM-non-apcs-frame. Therefore, such change is postponed
14770 until real need arise. */
14771 unsigned HOST_WIDE_INT amount;
14772 int rfe;
14773 /* Restore stack pointer if necessary. */
14774 if (TARGET_ARM && frame_pointer_needed)
14776 operands[0] = stack_pointer_rtx;
14777 operands[1] = hard_frame_pointer_rtx;
14779 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14780 output_add_immediate (operands);
14782 else
14784 if (frame_pointer_needed)
14786 /* For Thumb-2 restore sp from the frame pointer.
14787 Operand restrictions mean we have to incrememnt FP, then copy
14788 to SP. */
14789 amount = offsets->locals_base - offsets->saved_regs;
14790 operands[0] = hard_frame_pointer_rtx;
14792 else
14794 unsigned long count;
14795 operands[0] = stack_pointer_rtx;
14796 amount = offsets->outgoing_args - offsets->saved_regs;
14797 /* pop call clobbered registers if it avoids a
14798 separate stack adjustment. */
14799 count = offsets->saved_regs - offsets->saved_args;
14800 if (optimize_size
14801 && count != 0
14802 && !crtl->calls_eh_return
14803 && bit_count(saved_regs_mask) * 4 == count
14804 && !IS_INTERRUPT (func_type)
14805 && !crtl->tail_call_emit)
14807 unsigned long mask;
14808 /* Preserve return values, of any size. */
14809 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14810 mask ^= 0xf;
14811 mask &= ~saved_regs_mask;
14812 reg = 0;
14813 while (bit_count (mask) * 4 > amount)
14815 while ((mask & (1 << reg)) == 0)
14816 reg++;
14817 mask &= ~(1 << reg);
14819 if (bit_count (mask) * 4 == amount) {
14820 amount = 0;
14821 saved_regs_mask |= mask;
14826 if (amount)
14828 operands[1] = operands[0];
14829 operands[2] = GEN_INT (amount);
14830 output_add_immediate (operands);
14832 if (frame_pointer_needed)
14833 asm_fprintf (f, "\tmov\t%r, %r\n",
14834 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14837 if (TARGET_FPA_EMU2)
14839 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14840 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14841 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14842 reg, SP_REGNUM);
14844 else
14846 start_reg = FIRST_FPA_REGNUM;
14848 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14850 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14852 if (reg - start_reg == 3)
14854 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14855 start_reg, SP_REGNUM);
14856 start_reg = reg + 1;
14859 else
14861 if (reg != start_reg)
14862 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14863 start_reg, reg - start_reg,
14864 SP_REGNUM);
14866 start_reg = reg + 1;
14870 /* Just in case the last register checked also needs unstacking. */
14871 if (reg != start_reg)
14872 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14873 start_reg, reg - start_reg, SP_REGNUM);
14876 if (TARGET_HARD_FLOAT && TARGET_VFP)
14878 int end_reg = LAST_VFP_REGNUM + 1;
14880 /* Scan the registers in reverse order. We need to match
14881 any groupings made in the prologue and generate matching
14882 pop operations. */
14883 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14885 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14886 && (!df_regs_ever_live_p (reg + 1)
14887 || call_used_regs[reg + 1]))
14889 if (end_reg > reg + 2)
14890 vfp_output_fldmd (f, SP_REGNUM,
14891 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14892 (end_reg - (reg + 2)) / 2);
14893 end_reg = reg;
14896 if (end_reg > reg + 2)
14897 vfp_output_fldmd (f, SP_REGNUM, 0,
14898 (end_reg - (reg + 2)) / 2);
14901 if (TARGET_IWMMXT)
14902 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14903 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14904 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14906 /* If we can, restore the LR into the PC. */
14907 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14908 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14909 && !IS_STACKALIGN (func_type)
14910 && really_return
14911 && crtl->args.pretend_args_size == 0
14912 && saved_regs_mask & (1 << LR_REGNUM)
14913 && !crtl->calls_eh_return)
14915 saved_regs_mask &= ~ (1 << LR_REGNUM);
14916 saved_regs_mask |= (1 << PC_REGNUM);
14917 rfe = IS_INTERRUPT (func_type);
14919 else
14920 rfe = 0;
14922 /* Load the registers off the stack. If we only have one register
14923 to load use the LDR instruction - it is faster. For Thumb-2
14924 always use pop and the assembler will pick the best instruction.*/
14925 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14926 && !IS_INTERRUPT(func_type))
14928 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14930 else if (saved_regs_mask)
14932 if (saved_regs_mask & (1 << SP_REGNUM))
14933 /* Note - write back to the stack register is not enabled
14934 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14935 in the list of registers and if we add writeback the
14936 instruction becomes UNPREDICTABLE. */
14937 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14938 rfe);
14939 else if (TARGET_ARM)
14940 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14941 rfe);
14942 else
14943 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14946 if (crtl->args.pretend_args_size)
14948 /* Unwind the pre-pushed regs. */
14949 operands[0] = operands[1] = stack_pointer_rtx;
14950 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14951 output_add_immediate (operands);
14955 /* We may have already restored PC directly from the stack. */
14956 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14957 return "";
14959 /* Stack adjustment for exception handler. */
14960 if (crtl->calls_eh_return)
14961 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14962 ARM_EH_STACKADJ_REGNUM);
14964 /* Generate the return instruction. */
14965 switch ((int) ARM_FUNC_TYPE (func_type))
14967 case ARM_FT_ISR:
14968 case ARM_FT_FIQ:
14969 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14970 break;
14972 case ARM_FT_EXCEPTION:
14973 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14974 break;
14976 case ARM_FT_INTERWORKED:
14977 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14978 break;
14980 default:
14981 if (IS_STACKALIGN (func_type))
14983 /* See comment in arm_expand_prologue. */
14984 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14986 if (arm_arch5 || arm_arch4t)
14987 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14988 else
14989 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14990 break;
14993 return "";
14996 static void
14997 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14998 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15000 arm_stack_offsets *offsets;
15002 if (TARGET_THUMB1)
15004 int regno;
15006 /* Emit any call-via-reg trampolines that are needed for v4t support
15007 of call_reg and call_value_reg type insns. */
15008 for (regno = 0; regno < LR_REGNUM; regno++)
15010 rtx label = cfun->machine->call_via[regno];
15012 if (label != NULL)
15014 switch_to_section (function_section (current_function_decl));
15015 targetm.asm_out.internal_label (asm_out_file, "L",
15016 CODE_LABEL_NUMBER (label));
15017 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15021 /* ??? Probably not safe to set this here, since it assumes that a
15022 function will be emitted as assembly immediately after we generate
15023 RTL for it. This does not happen for inline functions. */
15024 cfun->machine->return_used_this_function = 0;
15026 else /* TARGET_32BIT */
15028 /* We need to take into account any stack-frame rounding. */
15029 offsets = arm_get_frame_offsets ();
15031 gcc_assert (!use_return_insn (FALSE, NULL)
15032 || (cfun->machine->return_used_this_function != 0)
15033 || offsets->saved_regs == offsets->outgoing_args
15034 || frame_pointer_needed);
15036 /* Reset the ARM-specific per-function variables. */
15037 after_arm_reorg = 0;
15041 /* Generate and emit an insn that we will recognize as a push_multi.
15042 Unfortunately, since this insn does not reflect very well the actual
15043 semantics of the operation, we need to annotate the insn for the benefit
15044 of DWARF2 frame unwind information. */
15045 static rtx
15046 emit_multi_reg_push (unsigned long mask)
15048 int num_regs = 0;
15049 int num_dwarf_regs;
15050 int i, j;
15051 rtx par;
15052 rtx dwarf;
15053 int dwarf_par_index;
15054 rtx tmp, reg;
15056 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15057 if (mask & (1 << i))
15058 num_regs++;
15060 gcc_assert (num_regs && num_regs <= 16);
15062 /* We don't record the PC in the dwarf frame information. */
15063 num_dwarf_regs = num_regs;
15064 if (mask & (1 << PC_REGNUM))
15065 num_dwarf_regs--;
15067 /* For the body of the insn we are going to generate an UNSPEC in
15068 parallel with several USEs. This allows the insn to be recognized
15069 by the push_multi pattern in the arm.md file.
15071 The body of the insn looks something like this:
15073 (parallel [
15074 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15075 (const_int:SI <num>)))
15076 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15077 (use (reg:SI XX))
15078 (use (reg:SI YY))
15082 For the frame note however, we try to be more explicit and actually
15083 show each register being stored into the stack frame, plus a (single)
15084 decrement of the stack pointer. We do it this way in order to be
15085 friendly to the stack unwinding code, which only wants to see a single
15086 stack decrement per instruction. The RTL we generate for the note looks
15087 something like this:
15089 (sequence [
15090 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15091 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15092 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15093 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15097 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15098 instead we'd have a parallel expression detailing all
15099 the stores to the various memory addresses so that debug
15100 information is more up-to-date. Remember however while writing
15101 this to take care of the constraints with the push instruction.
15103 Note also that this has to be taken care of for the VFP registers.
15105 For more see PR43399. */
15107 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15108 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15109 dwarf_par_index = 1;
15111 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15113 if (mask & (1 << i))
15115 reg = gen_rtx_REG (SImode, i);
15117 XVECEXP (par, 0, 0)
15118 = gen_rtx_SET (VOIDmode,
15119 gen_frame_mem
15120 (BLKmode,
15121 gen_rtx_PRE_MODIFY (Pmode,
15122 stack_pointer_rtx,
15123 plus_constant
15124 (stack_pointer_rtx,
15125 -4 * num_regs))
15127 gen_rtx_UNSPEC (BLKmode,
15128 gen_rtvec (1, reg),
15129 UNSPEC_PUSH_MULT));
15131 if (i != PC_REGNUM)
15133 tmp = gen_rtx_SET (VOIDmode,
15134 gen_frame_mem (SImode, stack_pointer_rtx),
15135 reg);
15136 RTX_FRAME_RELATED_P (tmp) = 1;
15137 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15138 dwarf_par_index++;
15141 break;
15145 for (j = 1, i++; j < num_regs; i++)
15147 if (mask & (1 << i))
15149 reg = gen_rtx_REG (SImode, i);
15151 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15153 if (i != PC_REGNUM)
15156 = gen_rtx_SET (VOIDmode,
15157 gen_frame_mem
15158 (SImode,
15159 plus_constant (stack_pointer_rtx,
15160 4 * j)),
15161 reg);
15162 RTX_FRAME_RELATED_P (tmp) = 1;
15163 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15166 j++;
15170 par = emit_insn (par);
15172 tmp = gen_rtx_SET (VOIDmode,
15173 stack_pointer_rtx,
15174 plus_constant (stack_pointer_rtx, -4 * num_regs));
15175 RTX_FRAME_RELATED_P (tmp) = 1;
15176 XVECEXP (dwarf, 0, 0) = tmp;
15178 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15180 return par;
15183 /* Calculate the size of the return value that is passed in registers. */
15184 static unsigned
15185 arm_size_return_regs (void)
15187 enum machine_mode mode;
15189 if (crtl->return_rtx != 0)
15190 mode = GET_MODE (crtl->return_rtx);
15191 else
15192 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15194 return GET_MODE_SIZE (mode);
15197 static rtx
15198 emit_sfm (int base_reg, int count)
15200 rtx par;
15201 rtx dwarf;
15202 rtx tmp, reg;
15203 int i;
15205 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15206 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15208 reg = gen_rtx_REG (XFmode, base_reg++);
15210 XVECEXP (par, 0, 0)
15211 = gen_rtx_SET (VOIDmode,
15212 gen_frame_mem
15213 (BLKmode,
15214 gen_rtx_PRE_MODIFY (Pmode,
15215 stack_pointer_rtx,
15216 plus_constant
15217 (stack_pointer_rtx,
15218 -12 * count))
15220 gen_rtx_UNSPEC (BLKmode,
15221 gen_rtvec (1, reg),
15222 UNSPEC_PUSH_MULT));
15223 tmp = gen_rtx_SET (VOIDmode,
15224 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15225 RTX_FRAME_RELATED_P (tmp) = 1;
15226 XVECEXP (dwarf, 0, 1) = tmp;
15228 for (i = 1; i < count; i++)
15230 reg = gen_rtx_REG (XFmode, base_reg++);
15231 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15233 tmp = gen_rtx_SET (VOIDmode,
15234 gen_frame_mem (XFmode,
15235 plus_constant (stack_pointer_rtx,
15236 i * 12)),
15237 reg);
15238 RTX_FRAME_RELATED_P (tmp) = 1;
15239 XVECEXP (dwarf, 0, i + 1) = tmp;
15242 tmp = gen_rtx_SET (VOIDmode,
15243 stack_pointer_rtx,
15244 plus_constant (stack_pointer_rtx, -12 * count));
15246 RTX_FRAME_RELATED_P (tmp) = 1;
15247 XVECEXP (dwarf, 0, 0) = tmp;
15249 par = emit_insn (par);
15250 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15252 return par;
15256 /* Return true if the current function needs to save/restore LR. */
15258 static bool
15259 thumb_force_lr_save (void)
15261 return !cfun->machine->lr_save_eliminated
15262 && (!leaf_function_p ()
15263 || thumb_far_jump_used_p ()
15264 || df_regs_ever_live_p (LR_REGNUM));
15268 /* Return true if r3 is used by any of the tail call insns in the
15269 current function. */
15271 static bool
15272 any_sibcall_uses_r3 (void)
15274 edge_iterator ei;
15275 edge e;
15277 if (!crtl->tail_call_emit)
15278 return false;
15279 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15280 if (e->flags & EDGE_SIBCALL)
15282 rtx call = BB_END (e->src);
15283 if (!CALL_P (call))
15284 call = prev_nonnote_nondebug_insn (call);
15285 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15286 if (find_regno_fusage (call, USE, 3))
15287 return true;
15289 return false;
15293 /* Compute the distance from register FROM to register TO.
15294 These can be the arg pointer (26), the soft frame pointer (25),
15295 the stack pointer (13) or the hard frame pointer (11).
15296 In thumb mode r7 is used as the soft frame pointer, if needed.
15297 Typical stack layout looks like this:
15299 old stack pointer -> | |
15300 ----
15301 | | \
15302 | | saved arguments for
15303 | | vararg functions
15304 | | /
15306 hard FP & arg pointer -> | | \
15307 | | stack
15308 | | frame
15309 | | /
15311 | | \
15312 | | call saved
15313 | | registers
15314 soft frame pointer -> | | /
15316 | | \
15317 | | local
15318 | | variables
15319 locals base pointer -> | | /
15321 | | \
15322 | | outgoing
15323 | | arguments
15324 current stack pointer -> | | /
15327 For a given function some or all of these stack components
15328 may not be needed, giving rise to the possibility of
15329 eliminating some of the registers.
15331 The values returned by this function must reflect the behavior
15332 of arm_expand_prologue() and arm_compute_save_reg_mask().
15334 The sign of the number returned reflects the direction of stack
15335 growth, so the values are positive for all eliminations except
15336 from the soft frame pointer to the hard frame pointer.
15338 SFP may point just inside the local variables block to ensure correct
15339 alignment. */
15342 /* Calculate stack offsets. These are used to calculate register elimination
15343 offsets and in prologue/epilogue code. Also calculates which registers
15344 should be saved. */
15346 static arm_stack_offsets *
15347 arm_get_frame_offsets (void)
15349 struct arm_stack_offsets *offsets;
15350 unsigned long func_type;
15351 int leaf;
15352 int saved;
15353 int core_saved;
15354 HOST_WIDE_INT frame_size;
15355 int i;
15357 offsets = &cfun->machine->stack_offsets;
15359 /* We need to know if we are a leaf function. Unfortunately, it
15360 is possible to be called after start_sequence has been called,
15361 which causes get_insns to return the insns for the sequence,
15362 not the function, which will cause leaf_function_p to return
15363 the incorrect result.
15365 to know about leaf functions once reload has completed, and the
15366 frame size cannot be changed after that time, so we can safely
15367 use the cached value. */
15369 if (reload_completed)
15370 return offsets;
15372 /* Initially this is the size of the local variables. It will translated
15373 into an offset once we have determined the size of preceding data. */
15374 frame_size = ROUND_UP_WORD (get_frame_size ());
15376 leaf = leaf_function_p ();
15378 /* Space for variadic functions. */
15379 offsets->saved_args = crtl->args.pretend_args_size;
15381 /* In Thumb mode this is incorrect, but never used. */
15382 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15383 arm_compute_static_chain_stack_bytes();
15385 if (TARGET_32BIT)
15387 unsigned int regno;
15389 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15390 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15391 saved = core_saved;
15393 /* We know that SP will be doubleword aligned on entry, and we must
15394 preserve that condition at any subroutine call. We also require the
15395 soft frame pointer to be doubleword aligned. */
15397 if (TARGET_REALLY_IWMMXT)
15399 /* Check for the call-saved iWMMXt registers. */
15400 for (regno = FIRST_IWMMXT_REGNUM;
15401 regno <= LAST_IWMMXT_REGNUM;
15402 regno++)
15403 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15404 saved += 8;
15407 func_type = arm_current_func_type ();
15408 if (! IS_VOLATILE (func_type))
15410 /* Space for saved FPA registers. */
15411 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15412 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15413 saved += 12;
15415 /* Space for saved VFP registers. */
15416 if (TARGET_HARD_FLOAT && TARGET_VFP)
15417 saved += arm_get_vfp_saved_size ();
15420 else /* TARGET_THUMB1 */
15422 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15423 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15424 saved = core_saved;
15425 if (TARGET_BACKTRACE)
15426 saved += 16;
15429 /* Saved registers include the stack frame. */
15430 offsets->saved_regs = offsets->saved_args + saved +
15431 arm_compute_static_chain_stack_bytes();
15432 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15433 /* A leaf function does not need any stack alignment if it has nothing
15434 on the stack. */
15435 if (leaf && frame_size == 0
15436 /* However if it calls alloca(), we have a dynamically allocated
15437 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15438 && ! cfun->calls_alloca)
15440 offsets->outgoing_args = offsets->soft_frame;
15441 offsets->locals_base = offsets->soft_frame;
15442 return offsets;
15445 /* Ensure SFP has the correct alignment. */
15446 if (ARM_DOUBLEWORD_ALIGN
15447 && (offsets->soft_frame & 7))
15449 offsets->soft_frame += 4;
15450 /* Try to align stack by pushing an extra reg. Don't bother doing this
15451 when there is a stack frame as the alignment will be rolled into
15452 the normal stack adjustment. */
15453 if (frame_size + crtl->outgoing_args_size == 0)
15455 int reg = -1;
15457 /* If it is safe to use r3, then do so. This sometimes
15458 generates better code on Thumb-2 by avoiding the need to
15459 use 32-bit push/pop instructions. */
15460 if (! any_sibcall_uses_r3 ()
15461 && arm_size_return_regs () <= 12
15462 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15464 reg = 3;
15466 else
15467 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15469 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15471 reg = i;
15472 break;
15476 if (reg != -1)
15478 offsets->saved_regs += 4;
15479 offsets->saved_regs_mask |= (1 << reg);
15484 offsets->locals_base = offsets->soft_frame + frame_size;
15485 offsets->outgoing_args = (offsets->locals_base
15486 + crtl->outgoing_args_size);
15488 if (ARM_DOUBLEWORD_ALIGN)
15490 /* Ensure SP remains doubleword aligned. */
15491 if (offsets->outgoing_args & 7)
15492 offsets->outgoing_args += 4;
15493 gcc_assert (!(offsets->outgoing_args & 7));
15496 return offsets;
15500 /* Calculate the relative offsets for the different stack pointers. Positive
15501 offsets are in the direction of stack growth. */
15503 HOST_WIDE_INT
15504 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15506 arm_stack_offsets *offsets;
15508 offsets = arm_get_frame_offsets ();
15510 /* OK, now we have enough information to compute the distances.
15511 There must be an entry in these switch tables for each pair
15512 of registers in ELIMINABLE_REGS, even if some of the entries
15513 seem to be redundant or useless. */
15514 switch (from)
15516 case ARG_POINTER_REGNUM:
15517 switch (to)
15519 case THUMB_HARD_FRAME_POINTER_REGNUM:
15520 return 0;
15522 case FRAME_POINTER_REGNUM:
15523 /* This is the reverse of the soft frame pointer
15524 to hard frame pointer elimination below. */
15525 return offsets->soft_frame - offsets->saved_args;
15527 case ARM_HARD_FRAME_POINTER_REGNUM:
15528 /* This is only non-zero in the case where the static chain register
15529 is stored above the frame. */
15530 return offsets->frame - offsets->saved_args - 4;
15532 case STACK_POINTER_REGNUM:
15533 /* If nothing has been pushed on the stack at all
15534 then this will return -4. This *is* correct! */
15535 return offsets->outgoing_args - (offsets->saved_args + 4);
15537 default:
15538 gcc_unreachable ();
15540 gcc_unreachable ();
15542 case FRAME_POINTER_REGNUM:
15543 switch (to)
15545 case THUMB_HARD_FRAME_POINTER_REGNUM:
15546 return 0;
15548 case ARM_HARD_FRAME_POINTER_REGNUM:
15549 /* The hard frame pointer points to the top entry in the
15550 stack frame. The soft frame pointer to the bottom entry
15551 in the stack frame. If there is no stack frame at all,
15552 then they are identical. */
15554 return offsets->frame - offsets->soft_frame;
15556 case STACK_POINTER_REGNUM:
15557 return offsets->outgoing_args - offsets->soft_frame;
15559 default:
15560 gcc_unreachable ();
15562 gcc_unreachable ();
15564 default:
15565 /* You cannot eliminate from the stack pointer.
15566 In theory you could eliminate from the hard frame
15567 pointer to the stack pointer, but this will never
15568 happen, since if a stack frame is not needed the
15569 hard frame pointer will never be used. */
15570 gcc_unreachable ();
15574 /* Given FROM and TO register numbers, say whether this elimination is
15575 allowed. Frame pointer elimination is automatically handled.
15577 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15578 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15579 pointer, we must eliminate FRAME_POINTER_REGNUM into
15580 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15581 ARG_POINTER_REGNUM. */
15583 bool
15584 arm_can_eliminate (const int from, const int to)
15586 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15587 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15588 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15589 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15590 true);
15593 /* Emit RTL to save coprocessor registers on function entry. Returns the
15594 number of bytes pushed. */
15596 static int
15597 arm_save_coproc_regs(void)
15599 int saved_size = 0;
15600 unsigned reg;
15601 unsigned start_reg;
15602 rtx insn;
15604 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15605 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15607 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15608 insn = gen_rtx_MEM (V2SImode, insn);
15609 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15610 RTX_FRAME_RELATED_P (insn) = 1;
15611 saved_size += 8;
15614 /* Save any floating point call-saved registers used by this
15615 function. */
15616 if (TARGET_FPA_EMU2)
15618 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15619 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15621 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15622 insn = gen_rtx_MEM (XFmode, insn);
15623 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15624 RTX_FRAME_RELATED_P (insn) = 1;
15625 saved_size += 12;
15628 else
15630 start_reg = LAST_FPA_REGNUM;
15632 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15634 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15636 if (start_reg - reg == 3)
15638 insn = emit_sfm (reg, 4);
15639 RTX_FRAME_RELATED_P (insn) = 1;
15640 saved_size += 48;
15641 start_reg = reg - 1;
15644 else
15646 if (start_reg != reg)
15648 insn = emit_sfm (reg + 1, start_reg - reg);
15649 RTX_FRAME_RELATED_P (insn) = 1;
15650 saved_size += (start_reg - reg) * 12;
15652 start_reg = reg - 1;
15656 if (start_reg != reg)
15658 insn = emit_sfm (reg + 1, start_reg - reg);
15659 saved_size += (start_reg - reg) * 12;
15660 RTX_FRAME_RELATED_P (insn) = 1;
15663 if (TARGET_HARD_FLOAT && TARGET_VFP)
15665 start_reg = FIRST_VFP_REGNUM;
15667 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15669 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15670 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15672 if (start_reg != reg)
15673 saved_size += vfp_emit_fstmd (start_reg,
15674 (reg - start_reg) / 2);
15675 start_reg = reg + 2;
15678 if (start_reg != reg)
15679 saved_size += vfp_emit_fstmd (start_reg,
15680 (reg - start_reg) / 2);
15682 return saved_size;
15686 /* Set the Thumb frame pointer from the stack pointer. */
15688 static void
15689 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15691 HOST_WIDE_INT amount;
15692 rtx insn, dwarf;
15694 amount = offsets->outgoing_args - offsets->locals_base;
15695 if (amount < 1024)
15696 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15697 stack_pointer_rtx, GEN_INT (amount)));
15698 else
15700 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15701 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15702 expects the first two operands to be the same. */
15703 if (TARGET_THUMB2)
15705 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15706 stack_pointer_rtx,
15707 hard_frame_pointer_rtx));
15709 else
15711 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15712 hard_frame_pointer_rtx,
15713 stack_pointer_rtx));
15715 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15716 plus_constant (stack_pointer_rtx, amount));
15717 RTX_FRAME_RELATED_P (dwarf) = 1;
15718 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15721 RTX_FRAME_RELATED_P (insn) = 1;
15724 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15725 function. */
15726 void
15727 arm_expand_prologue (void)
15729 rtx amount;
15730 rtx insn;
15731 rtx ip_rtx;
15732 unsigned long live_regs_mask;
15733 unsigned long func_type;
15734 int fp_offset = 0;
15735 int saved_pretend_args = 0;
15736 int saved_regs = 0;
15737 unsigned HOST_WIDE_INT args_to_push;
15738 arm_stack_offsets *offsets;
15740 func_type = arm_current_func_type ();
15742 /* Naked functions don't have prologues. */
15743 if (IS_NAKED (func_type))
15744 return;
15746 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15747 args_to_push = crtl->args.pretend_args_size;
15749 /* Compute which register we will have to save onto the stack. */
15750 offsets = arm_get_frame_offsets ();
15751 live_regs_mask = offsets->saved_regs_mask;
15753 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15755 if (IS_STACKALIGN (func_type))
15757 rtx dwarf;
15758 rtx r0;
15759 rtx r1;
15760 /* Handle a word-aligned stack pointer. We generate the following:
15762 mov r0, sp
15763 bic r1, r0, #7
15764 mov sp, r1
15765 <save and restore r0 in normal prologue/epilogue>
15766 mov sp, r0
15767 bx lr
15769 The unwinder doesn't need to know about the stack realignment.
15770 Just tell it we saved SP in r0. */
15771 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15773 r0 = gen_rtx_REG (SImode, 0);
15774 r1 = gen_rtx_REG (SImode, 1);
15775 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15776 compiler won't choke. */
15777 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15778 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15779 insn = gen_movsi (r0, stack_pointer_rtx);
15780 RTX_FRAME_RELATED_P (insn) = 1;
15781 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15782 emit_insn (insn);
15783 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15784 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15787 /* For APCS frames, if IP register is clobbered
15788 when creating frame, save that register in a special
15789 way. */
15790 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15792 if (IS_INTERRUPT (func_type))
15794 /* Interrupt functions must not corrupt any registers.
15795 Creating a frame pointer however, corrupts the IP
15796 register, so we must push it first. */
15797 insn = emit_multi_reg_push (1 << IP_REGNUM);
15799 /* Do not set RTX_FRAME_RELATED_P on this insn.
15800 The dwarf stack unwinding code only wants to see one
15801 stack decrement per function, and this is not it. If
15802 this instruction is labeled as being part of the frame
15803 creation sequence then dwarf2out_frame_debug_expr will
15804 die when it encounters the assignment of IP to FP
15805 later on, since the use of SP here establishes SP as
15806 the CFA register and not IP.
15808 Anyway this instruction is not really part of the stack
15809 frame creation although it is part of the prologue. */
15811 else if (IS_NESTED (func_type))
15813 /* The Static chain register is the same as the IP register
15814 used as a scratch register during stack frame creation.
15815 To get around this need to find somewhere to store IP
15816 whilst the frame is being created. We try the following
15817 places in order:
15819 1. The last argument register.
15820 2. A slot on the stack above the frame. (This only
15821 works if the function is not a varargs function).
15822 3. Register r3, after pushing the argument registers
15823 onto the stack.
15825 Note - we only need to tell the dwarf2 backend about the SP
15826 adjustment in the second variant; the static chain register
15827 doesn't need to be unwound, as it doesn't contain a value
15828 inherited from the caller. */
15830 if (df_regs_ever_live_p (3) == false)
15831 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15832 else if (args_to_push == 0)
15834 rtx dwarf;
15836 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15837 saved_regs += 4;
15839 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15840 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15841 fp_offset = 4;
15843 /* Just tell the dwarf backend that we adjusted SP. */
15844 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15845 plus_constant (stack_pointer_rtx,
15846 -fp_offset));
15847 RTX_FRAME_RELATED_P (insn) = 1;
15848 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15850 else
15852 /* Store the args on the stack. */
15853 if (cfun->machine->uses_anonymous_args)
15854 insn = emit_multi_reg_push
15855 ((0xf0 >> (args_to_push / 4)) & 0xf);
15856 else
15857 insn = emit_insn
15858 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15859 GEN_INT (- args_to_push)));
15861 RTX_FRAME_RELATED_P (insn) = 1;
15863 saved_pretend_args = 1;
15864 fp_offset = args_to_push;
15865 args_to_push = 0;
15867 /* Now reuse r3 to preserve IP. */
15868 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15872 insn = emit_set_insn (ip_rtx,
15873 plus_constant (stack_pointer_rtx, fp_offset));
15874 RTX_FRAME_RELATED_P (insn) = 1;
15877 if (args_to_push)
15879 /* Push the argument registers, or reserve space for them. */
15880 if (cfun->machine->uses_anonymous_args)
15881 insn = emit_multi_reg_push
15882 ((0xf0 >> (args_to_push / 4)) & 0xf);
15883 else
15884 insn = emit_insn
15885 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15886 GEN_INT (- args_to_push)));
15887 RTX_FRAME_RELATED_P (insn) = 1;
15890 /* If this is an interrupt service routine, and the link register
15891 is going to be pushed, and we're not generating extra
15892 push of IP (needed when frame is needed and frame layout if apcs),
15893 subtracting four from LR now will mean that the function return
15894 can be done with a single instruction. */
15895 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15896 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15897 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15898 && TARGET_ARM)
15900 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15902 emit_set_insn (lr, plus_constant (lr, -4));
15905 if (live_regs_mask)
15907 saved_regs += bit_count (live_regs_mask) * 4;
15908 if (optimize_size && !frame_pointer_needed
15909 && saved_regs == offsets->saved_regs - offsets->saved_args)
15911 /* If no coprocessor registers are being pushed and we don't have
15912 to worry about a frame pointer then push extra registers to
15913 create the stack frame. This is done is a way that does not
15914 alter the frame layout, so is independent of the epilogue. */
15915 int n;
15916 int frame;
15917 n = 0;
15918 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15919 n++;
15920 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15921 if (frame && n * 4 >= frame)
15923 n = frame / 4;
15924 live_regs_mask |= (1 << n) - 1;
15925 saved_regs += frame;
15928 insn = emit_multi_reg_push (live_regs_mask);
15929 RTX_FRAME_RELATED_P (insn) = 1;
15932 if (! IS_VOLATILE (func_type))
15933 saved_regs += arm_save_coproc_regs ();
15935 if (frame_pointer_needed && TARGET_ARM)
15937 /* Create the new frame pointer. */
15938 if (TARGET_APCS_FRAME)
15940 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15941 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15942 RTX_FRAME_RELATED_P (insn) = 1;
15944 if (IS_NESTED (func_type))
15946 /* Recover the static chain register. */
15947 if (!df_regs_ever_live_p (3)
15948 || saved_pretend_args)
15949 insn = gen_rtx_REG (SImode, 3);
15950 else /* if (crtl->args.pretend_args_size == 0) */
15952 insn = plus_constant (hard_frame_pointer_rtx, 4);
15953 insn = gen_frame_mem (SImode, insn);
15955 emit_set_insn (ip_rtx, insn);
15956 /* Add a USE to stop propagate_one_insn() from barfing. */
15957 emit_insn (gen_prologue_use (ip_rtx));
15960 else
15962 insn = GEN_INT (saved_regs - 4);
15963 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15964 stack_pointer_rtx, insn));
15965 RTX_FRAME_RELATED_P (insn) = 1;
15969 if (flag_stack_usage)
15970 current_function_static_stack_size
15971 = offsets->outgoing_args - offsets->saved_args;
15973 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15975 /* This add can produce multiple insns for a large constant, so we
15976 need to get tricky. */
15977 rtx last = get_last_insn ();
15979 amount = GEN_INT (offsets->saved_args + saved_regs
15980 - offsets->outgoing_args);
15982 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15983 amount));
15986 last = last ? NEXT_INSN (last) : get_insns ();
15987 RTX_FRAME_RELATED_P (last) = 1;
15989 while (last != insn);
15991 /* If the frame pointer is needed, emit a special barrier that
15992 will prevent the scheduler from moving stores to the frame
15993 before the stack adjustment. */
15994 if (frame_pointer_needed)
15995 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15996 hard_frame_pointer_rtx));
16000 if (frame_pointer_needed && TARGET_THUMB2)
16001 thumb_set_frame_pointer (offsets);
16003 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16005 unsigned long mask;
16007 mask = live_regs_mask;
16008 mask &= THUMB2_WORK_REGS;
16009 if (!IS_NESTED (func_type))
16010 mask |= (1 << IP_REGNUM);
16011 arm_load_pic_register (mask);
16014 /* If we are profiling, make sure no instructions are scheduled before
16015 the call to mcount. Similarly if the user has requested no
16016 scheduling in the prolog. Similarly if we want non-call exceptions
16017 using the EABI unwinder, to prevent faulting instructions from being
16018 swapped with a stack adjustment. */
16019 if (crtl->profile || !TARGET_SCHED_PROLOG
16020 || (arm_except_unwind_info (&global_options) == UI_TARGET
16021 && cfun->can_throw_non_call_exceptions))
16022 emit_insn (gen_blockage ());
16024 /* If the link register is being kept alive, with the return address in it,
16025 then make sure that it does not get reused by the ce2 pass. */
16026 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16027 cfun->machine->lr_save_eliminated = 1;
16030 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16031 static void
16032 arm_print_condition (FILE *stream)
16034 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16036 /* Branch conversion is not implemented for Thumb-2. */
16037 if (TARGET_THUMB)
16039 output_operand_lossage ("predicated Thumb instruction");
16040 return;
16042 if (current_insn_predicate != NULL)
16044 output_operand_lossage
16045 ("predicated instruction in conditional sequence");
16046 return;
16049 fputs (arm_condition_codes[arm_current_cc], stream);
16051 else if (current_insn_predicate)
16053 enum arm_cond_code code;
16055 if (TARGET_THUMB1)
16057 output_operand_lossage ("predicated Thumb instruction");
16058 return;
16061 code = get_arm_condition_code (current_insn_predicate);
16062 fputs (arm_condition_codes[code], stream);
16067 /* If CODE is 'd', then the X is a condition operand and the instruction
16068 should only be executed if the condition is true.
16069 if CODE is 'D', then the X is a condition operand and the instruction
16070 should only be executed if the condition is false: however, if the mode
16071 of the comparison is CCFPEmode, then always execute the instruction -- we
16072 do this because in these circumstances !GE does not necessarily imply LT;
16073 in these cases the instruction pattern will take care to make sure that
16074 an instruction containing %d will follow, thereby undoing the effects of
16075 doing this instruction unconditionally.
16076 If CODE is 'N' then X is a floating point operand that must be negated
16077 before output.
16078 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16079 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16080 static void
16081 arm_print_operand (FILE *stream, rtx x, int code)
16083 switch (code)
16085 case '@':
16086 fputs (ASM_COMMENT_START, stream);
16087 return;
16089 case '_':
16090 fputs (user_label_prefix, stream);
16091 return;
16093 case '|':
16094 fputs (REGISTER_PREFIX, stream);
16095 return;
16097 case '?':
16098 arm_print_condition (stream);
16099 return;
16101 case '(':
16102 /* Nothing in unified syntax, otherwise the current condition code. */
16103 if (!TARGET_UNIFIED_ASM)
16104 arm_print_condition (stream);
16105 break;
16107 case ')':
16108 /* The current condition code in unified syntax, otherwise nothing. */
16109 if (TARGET_UNIFIED_ASM)
16110 arm_print_condition (stream);
16111 break;
16113 case '.':
16114 /* The current condition code for a condition code setting instruction.
16115 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16116 if (TARGET_UNIFIED_ASM)
16118 fputc('s', stream);
16119 arm_print_condition (stream);
16121 else
16123 arm_print_condition (stream);
16124 fputc('s', stream);
16126 return;
16128 case '!':
16129 /* If the instruction is conditionally executed then print
16130 the current condition code, otherwise print 's'. */
16131 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16132 if (current_insn_predicate)
16133 arm_print_condition (stream);
16134 else
16135 fputc('s', stream);
16136 break;
16138 /* %# is a "break" sequence. It doesn't output anything, but is used to
16139 separate e.g. operand numbers from following text, if that text consists
16140 of further digits which we don't want to be part of the operand
16141 number. */
16142 case '#':
16143 return;
16145 case 'N':
16147 REAL_VALUE_TYPE r;
16148 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16149 r = real_value_negate (&r);
16150 fprintf (stream, "%s", fp_const_from_val (&r));
16152 return;
16154 /* An integer or symbol address without a preceding # sign. */
16155 case 'c':
16156 switch (GET_CODE (x))
16158 case CONST_INT:
16159 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16160 break;
16162 case SYMBOL_REF:
16163 output_addr_const (stream, x);
16164 break;
16166 default:
16167 gcc_unreachable ();
16169 return;
16171 case 'B':
16172 if (GET_CODE (x) == CONST_INT)
16174 HOST_WIDE_INT val;
16175 val = ARM_SIGN_EXTEND (~INTVAL (x));
16176 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16178 else
16180 putc ('~', stream);
16181 output_addr_const (stream, x);
16183 return;
16185 case 'L':
16186 /* The low 16 bits of an immediate constant. */
16187 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16188 return;
16190 case 'i':
16191 fprintf (stream, "%s", arithmetic_instr (x, 1));
16192 return;
16194 /* Truncate Cirrus shift counts. */
16195 case 's':
16196 if (GET_CODE (x) == CONST_INT)
16198 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16199 return;
16201 arm_print_operand (stream, x, 0);
16202 return;
16204 case 'I':
16205 fprintf (stream, "%s", arithmetic_instr (x, 0));
16206 return;
16208 case 'S':
16210 HOST_WIDE_INT val;
16211 const char *shift;
16213 if (!shift_operator (x, SImode))
16215 output_operand_lossage ("invalid shift operand");
16216 break;
16219 shift = shift_op (x, &val);
16221 if (shift)
16223 fprintf (stream, ", %s ", shift);
16224 if (val == -1)
16225 arm_print_operand (stream, XEXP (x, 1), 0);
16226 else
16227 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16230 return;
16232 /* An explanation of the 'Q', 'R' and 'H' register operands:
16234 In a pair of registers containing a DI or DF value the 'Q'
16235 operand returns the register number of the register containing
16236 the least significant part of the value. The 'R' operand returns
16237 the register number of the register containing the most
16238 significant part of the value.
16240 The 'H' operand returns the higher of the two register numbers.
16241 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16242 same as the 'Q' operand, since the most significant part of the
16243 value is held in the lower number register. The reverse is true
16244 on systems where WORDS_BIG_ENDIAN is false.
16246 The purpose of these operands is to distinguish between cases
16247 where the endian-ness of the values is important (for example
16248 when they are added together), and cases where the endian-ness
16249 is irrelevant, but the order of register operations is important.
16250 For example when loading a value from memory into a register
16251 pair, the endian-ness does not matter. Provided that the value
16252 from the lower memory address is put into the lower numbered
16253 register, and the value from the higher address is put into the
16254 higher numbered register, the load will work regardless of whether
16255 the value being loaded is big-wordian or little-wordian. The
16256 order of the two register loads can matter however, if the address
16257 of the memory location is actually held in one of the registers
16258 being overwritten by the load.
16260 The 'Q' and 'R' constraints are also available for 64-bit
16261 constants. */
16262 case 'Q':
16263 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16265 rtx part = gen_lowpart (SImode, x);
16266 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16267 return;
16270 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16272 output_operand_lossage ("invalid operand for code '%c'", code);
16273 return;
16276 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16277 return;
16279 case 'R':
16280 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16282 enum machine_mode mode = GET_MODE (x);
16283 rtx part;
16285 if (mode == VOIDmode)
16286 mode = DImode;
16287 part = gen_highpart_mode (SImode, mode, x);
16288 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16289 return;
16292 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16294 output_operand_lossage ("invalid operand for code '%c'", code);
16295 return;
16298 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16299 return;
16301 case 'H':
16302 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16304 output_operand_lossage ("invalid operand for code '%c'", code);
16305 return;
16308 asm_fprintf (stream, "%r", REGNO (x) + 1);
16309 return;
16311 case 'J':
16312 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16314 output_operand_lossage ("invalid operand for code '%c'", code);
16315 return;
16318 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16319 return;
16321 case 'K':
16322 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16324 output_operand_lossage ("invalid operand for code '%c'", code);
16325 return;
16328 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16329 return;
16331 case 'm':
16332 asm_fprintf (stream, "%r",
16333 GET_CODE (XEXP (x, 0)) == REG
16334 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16335 return;
16337 case 'M':
16338 asm_fprintf (stream, "{%r-%r}",
16339 REGNO (x),
16340 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16341 return;
16343 /* Like 'M', but writing doubleword vector registers, for use by Neon
16344 insns. */
16345 case 'h':
16347 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16348 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16349 if (numregs == 1)
16350 asm_fprintf (stream, "{d%d}", regno);
16351 else
16352 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16354 return;
16356 case 'd':
16357 /* CONST_TRUE_RTX means always -- that's the default. */
16358 if (x == const_true_rtx)
16359 return;
16361 if (!COMPARISON_P (x))
16363 output_operand_lossage ("invalid operand for code '%c'", code);
16364 return;
16367 fputs (arm_condition_codes[get_arm_condition_code (x)],
16368 stream);
16369 return;
16371 case 'D':
16372 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16373 want to do that. */
16374 if (x == const_true_rtx)
16376 output_operand_lossage ("instruction never executed");
16377 return;
16379 if (!COMPARISON_P (x))
16381 output_operand_lossage ("invalid operand for code '%c'", code);
16382 return;
16385 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16386 (get_arm_condition_code (x))],
16387 stream);
16388 return;
16390 /* Cirrus registers can be accessed in a variety of ways:
16391 single floating point (f)
16392 double floating point (d)
16393 32bit integer (fx)
16394 64bit integer (dx). */
16395 case 'W': /* Cirrus register in F mode. */
16396 case 'X': /* Cirrus register in D mode. */
16397 case 'Y': /* Cirrus register in FX mode. */
16398 case 'Z': /* Cirrus register in DX mode. */
16399 gcc_assert (GET_CODE (x) == REG
16400 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16402 fprintf (stream, "mv%s%s",
16403 code == 'W' ? "f"
16404 : code == 'X' ? "d"
16405 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16407 return;
16409 /* Print cirrus register in the mode specified by the register's mode. */
16410 case 'V':
16412 int mode = GET_MODE (x);
16414 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16416 output_operand_lossage ("invalid operand for code '%c'", code);
16417 return;
16420 fprintf (stream, "mv%s%s",
16421 mode == DFmode ? "d"
16422 : mode == SImode ? "fx"
16423 : mode == DImode ? "dx"
16424 : "f", reg_names[REGNO (x)] + 2);
16426 return;
16429 case 'U':
16430 if (GET_CODE (x) != REG
16431 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16432 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16433 /* Bad value for wCG register number. */
16435 output_operand_lossage ("invalid operand for code '%c'", code);
16436 return;
16439 else
16440 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16441 return;
16443 /* Print an iWMMXt control register name. */
16444 case 'w':
16445 if (GET_CODE (x) != CONST_INT
16446 || INTVAL (x) < 0
16447 || INTVAL (x) >= 16)
16448 /* Bad value for wC register number. */
16450 output_operand_lossage ("invalid operand for code '%c'", code);
16451 return;
16454 else
16456 static const char * wc_reg_names [16] =
16458 "wCID", "wCon", "wCSSF", "wCASF",
16459 "wC4", "wC5", "wC6", "wC7",
16460 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16461 "wC12", "wC13", "wC14", "wC15"
16464 fprintf (stream, wc_reg_names [INTVAL (x)]);
16466 return;
16468 /* Print the high single-precision register of a VFP double-precision
16469 register. */
16470 case 'p':
16472 int mode = GET_MODE (x);
16473 int regno;
16475 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16477 output_operand_lossage ("invalid operand for code '%c'", code);
16478 return;
16481 regno = REGNO (x);
16482 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16484 output_operand_lossage ("invalid operand for code '%c'", code);
16485 return;
16488 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16490 return;
16492 /* Print a VFP/Neon double precision or quad precision register name. */
16493 case 'P':
16494 case 'q':
16496 int mode = GET_MODE (x);
16497 int is_quad = (code == 'q');
16498 int regno;
16500 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16502 output_operand_lossage ("invalid operand for code '%c'", code);
16503 return;
16506 if (GET_CODE (x) != REG
16507 || !IS_VFP_REGNUM (REGNO (x)))
16509 output_operand_lossage ("invalid operand for code '%c'", code);
16510 return;
16513 regno = REGNO (x);
16514 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16515 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16517 output_operand_lossage ("invalid operand for code '%c'", code);
16518 return;
16521 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16522 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16524 return;
16526 /* These two codes print the low/high doubleword register of a Neon quad
16527 register, respectively. For pair-structure types, can also print
16528 low/high quadword registers. */
16529 case 'e':
16530 case 'f':
16532 int mode = GET_MODE (x);
16533 int regno;
16535 if ((GET_MODE_SIZE (mode) != 16
16536 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16538 output_operand_lossage ("invalid operand for code '%c'", code);
16539 return;
16542 regno = REGNO (x);
16543 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16545 output_operand_lossage ("invalid operand for code '%c'", code);
16546 return;
16549 if (GET_MODE_SIZE (mode) == 16)
16550 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16551 + (code == 'f' ? 1 : 0));
16552 else
16553 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16554 + (code == 'f' ? 1 : 0));
16556 return;
16558 /* Print a VFPv3 floating-point constant, represented as an integer
16559 index. */
16560 case 'G':
16562 int index = vfp3_const_double_index (x);
16563 gcc_assert (index != -1);
16564 fprintf (stream, "%d", index);
16566 return;
16568 /* Print bits representing opcode features for Neon.
16570 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16571 and polynomials as unsigned.
16573 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16575 Bit 2 is 1 for rounding functions, 0 otherwise. */
16577 /* Identify the type as 's', 'u', 'p' or 'f'. */
16578 case 'T':
16580 HOST_WIDE_INT bits = INTVAL (x);
16581 fputc ("uspf"[bits & 3], stream);
16583 return;
16585 /* Likewise, but signed and unsigned integers are both 'i'. */
16586 case 'F':
16588 HOST_WIDE_INT bits = INTVAL (x);
16589 fputc ("iipf"[bits & 3], stream);
16591 return;
16593 /* As for 'T', but emit 'u' instead of 'p'. */
16594 case 't':
16596 HOST_WIDE_INT bits = INTVAL (x);
16597 fputc ("usuf"[bits & 3], stream);
16599 return;
16601 /* Bit 2: rounding (vs none). */
16602 case 'O':
16604 HOST_WIDE_INT bits = INTVAL (x);
16605 fputs ((bits & 4) != 0 ? "r" : "", stream);
16607 return;
16609 /* Memory operand for vld1/vst1 instruction. */
16610 case 'A':
16612 rtx addr;
16613 bool postinc = FALSE;
16614 unsigned align, modesize, align_bits;
16616 gcc_assert (GET_CODE (x) == MEM);
16617 addr = XEXP (x, 0);
16618 if (GET_CODE (addr) == POST_INC)
16620 postinc = 1;
16621 addr = XEXP (addr, 0);
16623 asm_fprintf (stream, "[%r", REGNO (addr));
16625 /* We know the alignment of this access, so we can emit a hint in the
16626 instruction (for some alignments) as an aid to the memory subsystem
16627 of the target. */
16628 align = MEM_ALIGN (x) >> 3;
16629 modesize = GET_MODE_SIZE (GET_MODE (x));
16631 /* Only certain alignment specifiers are supported by the hardware. */
16632 if (modesize == 16 && (align % 32) == 0)
16633 align_bits = 256;
16634 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16635 align_bits = 128;
16636 else if ((align % 8) == 0)
16637 align_bits = 64;
16638 else
16639 align_bits = 0;
16641 if (align_bits != 0)
16642 asm_fprintf (stream, ":%d", align_bits);
16644 asm_fprintf (stream, "]");
16646 if (postinc)
16647 fputs("!", stream);
16649 return;
16651 case 'C':
16653 rtx addr;
16655 gcc_assert (GET_CODE (x) == MEM);
16656 addr = XEXP (x, 0);
16657 gcc_assert (GET_CODE (addr) == REG);
16658 asm_fprintf (stream, "[%r]", REGNO (addr));
16660 return;
16662 /* Translate an S register number into a D register number and element index. */
16663 case 'y':
16665 int mode = GET_MODE (x);
16666 int regno;
16668 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16670 output_operand_lossage ("invalid operand for code '%c'", code);
16671 return;
16674 regno = REGNO (x);
16675 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16677 output_operand_lossage ("invalid operand for code '%c'", code);
16678 return;
16681 regno = regno - FIRST_VFP_REGNUM;
16682 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16684 return;
16686 /* Register specifier for vld1.16/vst1.16. Translate the S register
16687 number into a D register number and element index. */
16688 case 'z':
16690 int mode = GET_MODE (x);
16691 int regno;
16693 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16695 output_operand_lossage ("invalid operand for code '%c'", code);
16696 return;
16699 regno = REGNO (x);
16700 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16702 output_operand_lossage ("invalid operand for code '%c'", code);
16703 return;
16706 regno = regno - FIRST_VFP_REGNUM;
16707 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16709 return;
16711 default:
16712 if (x == 0)
16714 output_operand_lossage ("missing operand");
16715 return;
16718 switch (GET_CODE (x))
16720 case REG:
16721 asm_fprintf (stream, "%r", REGNO (x));
16722 break;
16724 case MEM:
16725 output_memory_reference_mode = GET_MODE (x);
16726 output_address (XEXP (x, 0));
16727 break;
16729 case CONST_DOUBLE:
16730 if (TARGET_NEON)
16732 char fpstr[20];
16733 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16734 sizeof (fpstr), 0, 1);
16735 fprintf (stream, "#%s", fpstr);
16737 else
16738 fprintf (stream, "#%s", fp_immediate_constant (x));
16739 break;
16741 default:
16742 gcc_assert (GET_CODE (x) != NEG);
16743 fputc ('#', stream);
16744 if (GET_CODE (x) == HIGH)
16746 fputs (":lower16:", stream);
16747 x = XEXP (x, 0);
16750 output_addr_const (stream, x);
16751 break;
16756 /* Target hook for printing a memory address. */
16757 static void
16758 arm_print_operand_address (FILE *stream, rtx x)
16760 if (TARGET_32BIT)
16762 int is_minus = GET_CODE (x) == MINUS;
16764 if (GET_CODE (x) == REG)
16765 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16766 else if (GET_CODE (x) == PLUS || is_minus)
16768 rtx base = XEXP (x, 0);
16769 rtx index = XEXP (x, 1);
16770 HOST_WIDE_INT offset = 0;
16771 if (GET_CODE (base) != REG
16772 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16774 /* Ensure that BASE is a register. */
16775 /* (one of them must be). */
16776 /* Also ensure the SP is not used as in index register. */
16777 rtx temp = base;
16778 base = index;
16779 index = temp;
16781 switch (GET_CODE (index))
16783 case CONST_INT:
16784 offset = INTVAL (index);
16785 if (is_minus)
16786 offset = -offset;
16787 asm_fprintf (stream, "[%r, #%wd]",
16788 REGNO (base), offset);
16789 break;
16791 case REG:
16792 asm_fprintf (stream, "[%r, %s%r]",
16793 REGNO (base), is_minus ? "-" : "",
16794 REGNO (index));
16795 break;
16797 case MULT:
16798 case ASHIFTRT:
16799 case LSHIFTRT:
16800 case ASHIFT:
16801 case ROTATERT:
16803 asm_fprintf (stream, "[%r, %s%r",
16804 REGNO (base), is_minus ? "-" : "",
16805 REGNO (XEXP (index, 0)));
16806 arm_print_operand (stream, index, 'S');
16807 fputs ("]", stream);
16808 break;
16811 default:
16812 gcc_unreachable ();
16815 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16816 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16818 extern enum machine_mode output_memory_reference_mode;
16820 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16822 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16823 asm_fprintf (stream, "[%r, #%s%d]!",
16824 REGNO (XEXP (x, 0)),
16825 GET_CODE (x) == PRE_DEC ? "-" : "",
16826 GET_MODE_SIZE (output_memory_reference_mode));
16827 else
16828 asm_fprintf (stream, "[%r], #%s%d",
16829 REGNO (XEXP (x, 0)),
16830 GET_CODE (x) == POST_DEC ? "-" : "",
16831 GET_MODE_SIZE (output_memory_reference_mode));
16833 else if (GET_CODE (x) == PRE_MODIFY)
16835 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16836 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16837 asm_fprintf (stream, "#%wd]!",
16838 INTVAL (XEXP (XEXP (x, 1), 1)));
16839 else
16840 asm_fprintf (stream, "%r]!",
16841 REGNO (XEXP (XEXP (x, 1), 1)));
16843 else if (GET_CODE (x) == POST_MODIFY)
16845 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16846 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16847 asm_fprintf (stream, "#%wd",
16848 INTVAL (XEXP (XEXP (x, 1), 1)));
16849 else
16850 asm_fprintf (stream, "%r",
16851 REGNO (XEXP (XEXP (x, 1), 1)));
16853 else output_addr_const (stream, x);
16855 else
16857 if (GET_CODE (x) == REG)
16858 asm_fprintf (stream, "[%r]", REGNO (x));
16859 else if (GET_CODE (x) == POST_INC)
16860 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16861 else if (GET_CODE (x) == PLUS)
16863 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16864 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16865 asm_fprintf (stream, "[%r, #%wd]",
16866 REGNO (XEXP (x, 0)),
16867 INTVAL (XEXP (x, 1)));
16868 else
16869 asm_fprintf (stream, "[%r, %r]",
16870 REGNO (XEXP (x, 0)),
16871 REGNO (XEXP (x, 1)));
16873 else
16874 output_addr_const (stream, x);
16878 /* Target hook for indicating whether a punctuation character for
16879 TARGET_PRINT_OPERAND is valid. */
16880 static bool
16881 arm_print_operand_punct_valid_p (unsigned char code)
16883 return (code == '@' || code == '|' || code == '.'
16884 || code == '(' || code == ')' || code == '#'
16885 || (TARGET_32BIT && (code == '?'))
16886 || (TARGET_THUMB2 && (code == '!'))
16887 || (TARGET_THUMB && (code == '_')));
16890 /* Target hook for assembling integer objects. The ARM version needs to
16891 handle word-sized values specially. */
16892 static bool
16893 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16895 enum machine_mode mode;
16897 if (size == UNITS_PER_WORD && aligned_p)
16899 fputs ("\t.word\t", asm_out_file);
16900 output_addr_const (asm_out_file, x);
16902 /* Mark symbols as position independent. We only do this in the
16903 .text segment, not in the .data segment. */
16904 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16905 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16907 /* See legitimize_pic_address for an explanation of the
16908 TARGET_VXWORKS_RTP check. */
16909 if (TARGET_VXWORKS_RTP
16910 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16911 fputs ("(GOT)", asm_out_file);
16912 else
16913 fputs ("(GOTOFF)", asm_out_file);
16915 fputc ('\n', asm_out_file);
16916 return true;
16919 mode = GET_MODE (x);
16921 if (arm_vector_mode_supported_p (mode))
16923 int i, units;
16925 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16927 units = CONST_VECTOR_NUNITS (x);
16928 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16930 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16931 for (i = 0; i < units; i++)
16933 rtx elt = CONST_VECTOR_ELT (x, i);
16934 assemble_integer
16935 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16937 else
16938 for (i = 0; i < units; i++)
16940 rtx elt = CONST_VECTOR_ELT (x, i);
16941 REAL_VALUE_TYPE rval;
16943 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16945 assemble_real
16946 (rval, GET_MODE_INNER (mode),
16947 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16950 return true;
16953 return default_assemble_integer (x, size, aligned_p);
16956 static void
16957 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16959 section *s;
16961 if (!TARGET_AAPCS_BASED)
16963 (is_ctor ?
16964 default_named_section_asm_out_constructor
16965 : default_named_section_asm_out_destructor) (symbol, priority);
16966 return;
16969 /* Put these in the .init_array section, using a special relocation. */
16970 if (priority != DEFAULT_INIT_PRIORITY)
16972 char buf[18];
16973 sprintf (buf, "%s.%.5u",
16974 is_ctor ? ".init_array" : ".fini_array",
16975 priority);
16976 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16978 else if (is_ctor)
16979 s = ctors_section;
16980 else
16981 s = dtors_section;
16983 switch_to_section (s);
16984 assemble_align (POINTER_SIZE);
16985 fputs ("\t.word\t", asm_out_file);
16986 output_addr_const (asm_out_file, symbol);
16987 fputs ("(target1)\n", asm_out_file);
16990 /* Add a function to the list of static constructors. */
16992 static void
16993 arm_elf_asm_constructor (rtx symbol, int priority)
16995 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16998 /* Add a function to the list of static destructors. */
17000 static void
17001 arm_elf_asm_destructor (rtx symbol, int priority)
17003 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17006 /* A finite state machine takes care of noticing whether or not instructions
17007 can be conditionally executed, and thus decrease execution time and code
17008 size by deleting branch instructions. The fsm is controlled by
17009 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17011 /* The state of the fsm controlling condition codes are:
17012 0: normal, do nothing special
17013 1: make ASM_OUTPUT_OPCODE not output this instruction
17014 2: make ASM_OUTPUT_OPCODE not output this instruction
17015 3: make instructions conditional
17016 4: make instructions conditional
17018 State transitions (state->state by whom under condition):
17019 0 -> 1 final_prescan_insn if the `target' is a label
17020 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17021 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17022 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17023 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17024 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17025 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17026 (the target insn is arm_target_insn).
17028 If the jump clobbers the conditions then we use states 2 and 4.
17030 A similar thing can be done with conditional return insns.
17032 XXX In case the `target' is an unconditional branch, this conditionalising
17033 of the instructions always reduces code size, but not always execution
17034 time. But then, I want to reduce the code size to somewhere near what
17035 /bin/cc produces. */
17037 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17038 instructions. When a COND_EXEC instruction is seen the subsequent
17039 instructions are scanned so that multiple conditional instructions can be
17040 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17041 specify the length and true/false mask for the IT block. These will be
17042 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17044 /* Returns the index of the ARM condition code string in
17045 `arm_condition_codes'. COMPARISON should be an rtx like
17046 `(eq (...) (...))'. */
17047 static enum arm_cond_code
17048 get_arm_condition_code (rtx comparison)
17050 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17051 enum arm_cond_code code;
17052 enum rtx_code comp_code = GET_CODE (comparison);
17054 if (GET_MODE_CLASS (mode) != MODE_CC)
17055 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17056 XEXP (comparison, 1));
17058 switch (mode)
17060 case CC_DNEmode: code = ARM_NE; goto dominance;
17061 case CC_DEQmode: code = ARM_EQ; goto dominance;
17062 case CC_DGEmode: code = ARM_GE; goto dominance;
17063 case CC_DGTmode: code = ARM_GT; goto dominance;
17064 case CC_DLEmode: code = ARM_LE; goto dominance;
17065 case CC_DLTmode: code = ARM_LT; goto dominance;
17066 case CC_DGEUmode: code = ARM_CS; goto dominance;
17067 case CC_DGTUmode: code = ARM_HI; goto dominance;
17068 case CC_DLEUmode: code = ARM_LS; goto dominance;
17069 case CC_DLTUmode: code = ARM_CC;
17071 dominance:
17072 gcc_assert (comp_code == EQ || comp_code == NE);
17074 if (comp_code == EQ)
17075 return ARM_INVERSE_CONDITION_CODE (code);
17076 return code;
17078 case CC_NOOVmode:
17079 switch (comp_code)
17081 case NE: return ARM_NE;
17082 case EQ: return ARM_EQ;
17083 case GE: return ARM_PL;
17084 case LT: return ARM_MI;
17085 default: gcc_unreachable ();
17088 case CC_Zmode:
17089 switch (comp_code)
17091 case NE: return ARM_NE;
17092 case EQ: return ARM_EQ;
17093 default: gcc_unreachable ();
17096 case CC_Nmode:
17097 switch (comp_code)
17099 case NE: return ARM_MI;
17100 case EQ: return ARM_PL;
17101 default: gcc_unreachable ();
17104 case CCFPEmode:
17105 case CCFPmode:
17106 /* These encodings assume that AC=1 in the FPA system control
17107 byte. This allows us to handle all cases except UNEQ and
17108 LTGT. */
17109 switch (comp_code)
17111 case GE: return ARM_GE;
17112 case GT: return ARM_GT;
17113 case LE: return ARM_LS;
17114 case LT: return ARM_MI;
17115 case NE: return ARM_NE;
17116 case EQ: return ARM_EQ;
17117 case ORDERED: return ARM_VC;
17118 case UNORDERED: return ARM_VS;
17119 case UNLT: return ARM_LT;
17120 case UNLE: return ARM_LE;
17121 case UNGT: return ARM_HI;
17122 case UNGE: return ARM_PL;
17123 /* UNEQ and LTGT do not have a representation. */
17124 case UNEQ: /* Fall through. */
17125 case LTGT: /* Fall through. */
17126 default: gcc_unreachable ();
17129 case CC_SWPmode:
17130 switch (comp_code)
17132 case NE: return ARM_NE;
17133 case EQ: return ARM_EQ;
17134 case GE: return ARM_LE;
17135 case GT: return ARM_LT;
17136 case LE: return ARM_GE;
17137 case LT: return ARM_GT;
17138 case GEU: return ARM_LS;
17139 case GTU: return ARM_CC;
17140 case LEU: return ARM_CS;
17141 case LTU: return ARM_HI;
17142 default: gcc_unreachable ();
17145 case CC_Cmode:
17146 switch (comp_code)
17148 case LTU: return ARM_CS;
17149 case GEU: return ARM_CC;
17150 default: gcc_unreachable ();
17153 case CC_CZmode:
17154 switch (comp_code)
17156 case NE: return ARM_NE;
17157 case EQ: return ARM_EQ;
17158 case GEU: return ARM_CS;
17159 case GTU: return ARM_HI;
17160 case LEU: return ARM_LS;
17161 case LTU: return ARM_CC;
17162 default: gcc_unreachable ();
17165 case CC_NCVmode:
17166 switch (comp_code)
17168 case GE: return ARM_GE;
17169 case LT: return ARM_LT;
17170 case GEU: return ARM_CS;
17171 case LTU: return ARM_CC;
17172 default: gcc_unreachable ();
17175 case CCmode:
17176 switch (comp_code)
17178 case NE: return ARM_NE;
17179 case EQ: return ARM_EQ;
17180 case GE: return ARM_GE;
17181 case GT: return ARM_GT;
17182 case LE: return ARM_LE;
17183 case LT: return ARM_LT;
17184 case GEU: return ARM_CS;
17185 case GTU: return ARM_HI;
17186 case LEU: return ARM_LS;
17187 case LTU: return ARM_CC;
17188 default: gcc_unreachable ();
17191 default: gcc_unreachable ();
17195 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17196 instructions. */
17197 void
17198 thumb2_final_prescan_insn (rtx insn)
17200 rtx first_insn = insn;
17201 rtx body = PATTERN (insn);
17202 rtx predicate;
17203 enum arm_cond_code code;
17204 int n;
17205 int mask;
17207 /* Remove the previous insn from the count of insns to be output. */
17208 if (arm_condexec_count)
17209 arm_condexec_count--;
17211 /* Nothing to do if we are already inside a conditional block. */
17212 if (arm_condexec_count)
17213 return;
17215 if (GET_CODE (body) != COND_EXEC)
17216 return;
17218 /* Conditional jumps are implemented directly. */
17219 if (GET_CODE (insn) == JUMP_INSN)
17220 return;
17222 predicate = COND_EXEC_TEST (body);
17223 arm_current_cc = get_arm_condition_code (predicate);
17225 n = get_attr_ce_count (insn);
17226 arm_condexec_count = 1;
17227 arm_condexec_mask = (1 << n) - 1;
17228 arm_condexec_masklen = n;
17229 /* See if subsequent instructions can be combined into the same block. */
17230 for (;;)
17232 insn = next_nonnote_insn (insn);
17234 /* Jumping into the middle of an IT block is illegal, so a label or
17235 barrier terminates the block. */
17236 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17237 break;
17239 body = PATTERN (insn);
17240 /* USE and CLOBBER aren't really insns, so just skip them. */
17241 if (GET_CODE (body) == USE
17242 || GET_CODE (body) == CLOBBER)
17243 continue;
17245 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17246 if (GET_CODE (body) != COND_EXEC)
17247 break;
17248 /* Allow up to 4 conditionally executed instructions in a block. */
17249 n = get_attr_ce_count (insn);
17250 if (arm_condexec_masklen + n > 4)
17251 break;
17253 predicate = COND_EXEC_TEST (body);
17254 code = get_arm_condition_code (predicate);
17255 mask = (1 << n) - 1;
17256 if (arm_current_cc == code)
17257 arm_condexec_mask |= (mask << arm_condexec_masklen);
17258 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17259 break;
17261 arm_condexec_count++;
17262 arm_condexec_masklen += n;
17264 /* A jump must be the last instruction in a conditional block. */
17265 if (GET_CODE(insn) == JUMP_INSN)
17266 break;
17268 /* Restore recog_data (getting the attributes of other insns can
17269 destroy this array, but final.c assumes that it remains intact
17270 across this call). */
17271 extract_constrain_insn_cached (first_insn);
17274 void
17275 arm_final_prescan_insn (rtx insn)
17277 /* BODY will hold the body of INSN. */
17278 rtx body = PATTERN (insn);
17280 /* This will be 1 if trying to repeat the trick, and things need to be
17281 reversed if it appears to fail. */
17282 int reverse = 0;
17284 /* If we start with a return insn, we only succeed if we find another one. */
17285 int seeking_return = 0;
17287 /* START_INSN will hold the insn from where we start looking. This is the
17288 first insn after the following code_label if REVERSE is true. */
17289 rtx start_insn = insn;
17291 /* If in state 4, check if the target branch is reached, in order to
17292 change back to state 0. */
17293 if (arm_ccfsm_state == 4)
17295 if (insn == arm_target_insn)
17297 arm_target_insn = NULL;
17298 arm_ccfsm_state = 0;
17300 return;
17303 /* If in state 3, it is possible to repeat the trick, if this insn is an
17304 unconditional branch to a label, and immediately following this branch
17305 is the previous target label which is only used once, and the label this
17306 branch jumps to is not too far off. */
17307 if (arm_ccfsm_state == 3)
17309 if (simplejump_p (insn))
17311 start_insn = next_nonnote_insn (start_insn);
17312 if (GET_CODE (start_insn) == BARRIER)
17314 /* XXX Isn't this always a barrier? */
17315 start_insn = next_nonnote_insn (start_insn);
17317 if (GET_CODE (start_insn) == CODE_LABEL
17318 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17319 && LABEL_NUSES (start_insn) == 1)
17320 reverse = TRUE;
17321 else
17322 return;
17324 else if (GET_CODE (body) == RETURN)
17326 start_insn = next_nonnote_insn (start_insn);
17327 if (GET_CODE (start_insn) == BARRIER)
17328 start_insn = next_nonnote_insn (start_insn);
17329 if (GET_CODE (start_insn) == CODE_LABEL
17330 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17331 && LABEL_NUSES (start_insn) == 1)
17333 reverse = TRUE;
17334 seeking_return = 1;
17336 else
17337 return;
17339 else
17340 return;
17343 gcc_assert (!arm_ccfsm_state || reverse);
17344 if (GET_CODE (insn) != JUMP_INSN)
17345 return;
17347 /* This jump might be paralleled with a clobber of the condition codes
17348 the jump should always come first */
17349 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17350 body = XVECEXP (body, 0, 0);
17352 if (reverse
17353 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17354 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17356 int insns_skipped;
17357 int fail = FALSE, succeed = FALSE;
17358 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17359 int then_not_else = TRUE;
17360 rtx this_insn = start_insn, label = 0;
17362 /* Register the insn jumped to. */
17363 if (reverse)
17365 if (!seeking_return)
17366 label = XEXP (SET_SRC (body), 0);
17368 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17369 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17370 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17372 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17373 then_not_else = FALSE;
17375 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17376 seeking_return = 1;
17377 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17379 seeking_return = 1;
17380 then_not_else = FALSE;
17382 else
17383 gcc_unreachable ();
17385 /* See how many insns this branch skips, and what kind of insns. If all
17386 insns are okay, and the label or unconditional branch to the same
17387 label is not too far away, succeed. */
17388 for (insns_skipped = 0;
17389 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17391 rtx scanbody;
17393 this_insn = next_nonnote_insn (this_insn);
17394 if (!this_insn)
17395 break;
17397 switch (GET_CODE (this_insn))
17399 case CODE_LABEL:
17400 /* Succeed if it is the target label, otherwise fail since
17401 control falls in from somewhere else. */
17402 if (this_insn == label)
17404 arm_ccfsm_state = 1;
17405 succeed = TRUE;
17407 else
17408 fail = TRUE;
17409 break;
17411 case BARRIER:
17412 /* Succeed if the following insn is the target label.
17413 Otherwise fail.
17414 If return insns are used then the last insn in a function
17415 will be a barrier. */
17416 this_insn = next_nonnote_insn (this_insn);
17417 if (this_insn && this_insn == label)
17419 arm_ccfsm_state = 1;
17420 succeed = TRUE;
17422 else
17423 fail = TRUE;
17424 break;
17426 case CALL_INSN:
17427 /* The AAPCS says that conditional calls should not be
17428 used since they make interworking inefficient (the
17429 linker can't transform BL<cond> into BLX). That's
17430 only a problem if the machine has BLX. */
17431 if (arm_arch5)
17433 fail = TRUE;
17434 break;
17437 /* Succeed if the following insn is the target label, or
17438 if the following two insns are a barrier and the
17439 target label. */
17440 this_insn = next_nonnote_insn (this_insn);
17441 if (this_insn && GET_CODE (this_insn) == BARRIER)
17442 this_insn = next_nonnote_insn (this_insn);
17444 if (this_insn && this_insn == label
17445 && insns_skipped < max_insns_skipped)
17447 arm_ccfsm_state = 1;
17448 succeed = TRUE;
17450 else
17451 fail = TRUE;
17452 break;
17454 case JUMP_INSN:
17455 /* If this is an unconditional branch to the same label, succeed.
17456 If it is to another label, do nothing. If it is conditional,
17457 fail. */
17458 /* XXX Probably, the tests for SET and the PC are
17459 unnecessary. */
17461 scanbody = PATTERN (this_insn);
17462 if (GET_CODE (scanbody) == SET
17463 && GET_CODE (SET_DEST (scanbody)) == PC)
17465 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17466 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17468 arm_ccfsm_state = 2;
17469 succeed = TRUE;
17471 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17472 fail = TRUE;
17474 /* Fail if a conditional return is undesirable (e.g. on a
17475 StrongARM), but still allow this if optimizing for size. */
17476 else if (GET_CODE (scanbody) == RETURN
17477 && !use_return_insn (TRUE, NULL)
17478 && !optimize_size)
17479 fail = TRUE;
17480 else if (GET_CODE (scanbody) == RETURN
17481 && seeking_return)
17483 arm_ccfsm_state = 2;
17484 succeed = TRUE;
17486 else if (GET_CODE (scanbody) == PARALLEL)
17488 switch (get_attr_conds (this_insn))
17490 case CONDS_NOCOND:
17491 break;
17492 default:
17493 fail = TRUE;
17494 break;
17497 else
17498 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17500 break;
17502 case INSN:
17503 /* Instructions using or affecting the condition codes make it
17504 fail. */
17505 scanbody = PATTERN (this_insn);
17506 if (!(GET_CODE (scanbody) == SET
17507 || GET_CODE (scanbody) == PARALLEL)
17508 || get_attr_conds (this_insn) != CONDS_NOCOND)
17509 fail = TRUE;
17511 /* A conditional cirrus instruction must be followed by
17512 a non Cirrus instruction. However, since we
17513 conditionalize instructions in this function and by
17514 the time we get here we can't add instructions
17515 (nops), because shorten_branches() has already been
17516 called, we will disable conditionalizing Cirrus
17517 instructions to be safe. */
17518 if (GET_CODE (scanbody) != USE
17519 && GET_CODE (scanbody) != CLOBBER
17520 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17521 fail = TRUE;
17522 break;
17524 default:
17525 break;
17528 if (succeed)
17530 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17531 arm_target_label = CODE_LABEL_NUMBER (label);
17532 else
17534 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17536 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17538 this_insn = next_nonnote_insn (this_insn);
17539 gcc_assert (!this_insn
17540 || (GET_CODE (this_insn) != BARRIER
17541 && GET_CODE (this_insn) != CODE_LABEL));
17543 if (!this_insn)
17545 /* Oh, dear! we ran off the end.. give up. */
17546 extract_constrain_insn_cached (insn);
17547 arm_ccfsm_state = 0;
17548 arm_target_insn = NULL;
17549 return;
17551 arm_target_insn = this_insn;
17554 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17555 what it was. */
17556 if (!reverse)
17557 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17559 if (reverse || then_not_else)
17560 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17563 /* Restore recog_data (getting the attributes of other insns can
17564 destroy this array, but final.c assumes that it remains intact
17565 across this call. */
17566 extract_constrain_insn_cached (insn);
17570 /* Output IT instructions. */
17571 void
17572 thumb2_asm_output_opcode (FILE * stream)
17574 char buff[5];
17575 int n;
17577 if (arm_condexec_mask)
17579 for (n = 0; n < arm_condexec_masklen; n++)
17580 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17581 buff[n] = 0;
17582 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17583 arm_condition_codes[arm_current_cc]);
17584 arm_condexec_mask = 0;
17588 /* Returns true if REGNO is a valid register
17589 for holding a quantity of type MODE. */
17591 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17593 if (GET_MODE_CLASS (mode) == MODE_CC)
17594 return (regno == CC_REGNUM
17595 || (TARGET_HARD_FLOAT && TARGET_VFP
17596 && regno == VFPCC_REGNUM));
17598 if (TARGET_THUMB1)
17599 /* For the Thumb we only allow values bigger than SImode in
17600 registers 0 - 6, so that there is always a second low
17601 register available to hold the upper part of the value.
17602 We probably we ought to ensure that the register is the
17603 start of an even numbered register pair. */
17604 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17606 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17607 && IS_CIRRUS_REGNUM (regno))
17608 /* We have outlawed SI values in Cirrus registers because they
17609 reside in the lower 32 bits, but SF values reside in the
17610 upper 32 bits. This causes gcc all sorts of grief. We can't
17611 even split the registers into pairs because Cirrus SI values
17612 get sign extended to 64bits-- aldyh. */
17613 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17615 if (TARGET_HARD_FLOAT && TARGET_VFP
17616 && IS_VFP_REGNUM (regno))
17618 if (mode == SFmode || mode == SImode)
17619 return VFP_REGNO_OK_FOR_SINGLE (regno);
17621 if (mode == DFmode)
17622 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17624 /* VFP registers can hold HFmode values, but there is no point in
17625 putting them there unless we have hardware conversion insns. */
17626 if (mode == HFmode)
17627 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17629 if (TARGET_NEON)
17630 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17631 || (VALID_NEON_QREG_MODE (mode)
17632 && NEON_REGNO_OK_FOR_QUAD (regno))
17633 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17634 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17635 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17636 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17637 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17639 return FALSE;
17642 if (TARGET_REALLY_IWMMXT)
17644 if (IS_IWMMXT_GR_REGNUM (regno))
17645 return mode == SImode;
17647 if (IS_IWMMXT_REGNUM (regno))
17648 return VALID_IWMMXT_REG_MODE (mode);
17651 /* We allow almost any value to be stored in the general registers.
17652 Restrict doubleword quantities to even register pairs so that we can
17653 use ldrd. Do not allow very large Neon structure opaque modes in
17654 general registers; they would use too many. */
17655 if (regno <= LAST_ARM_REGNUM)
17656 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17657 && ARM_NUM_REGS (mode) <= 4;
17659 if (regno == FRAME_POINTER_REGNUM
17660 || regno == ARG_POINTER_REGNUM)
17661 /* We only allow integers in the fake hard registers. */
17662 return GET_MODE_CLASS (mode) == MODE_INT;
17664 /* The only registers left are the FPA registers
17665 which we only allow to hold FP values. */
17666 return (TARGET_HARD_FLOAT && TARGET_FPA
17667 && GET_MODE_CLASS (mode) == MODE_FLOAT
17668 && regno >= FIRST_FPA_REGNUM
17669 && regno <= LAST_FPA_REGNUM);
17672 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17673 not used in arm mode. */
17675 enum reg_class
17676 arm_regno_class (int regno)
17678 if (TARGET_THUMB1)
17680 if (regno == STACK_POINTER_REGNUM)
17681 return STACK_REG;
17682 if (regno == CC_REGNUM)
17683 return CC_REG;
17684 if (regno < 8)
17685 return LO_REGS;
17686 return HI_REGS;
17689 if (TARGET_THUMB2 && regno < 8)
17690 return LO_REGS;
17692 if ( regno <= LAST_ARM_REGNUM
17693 || regno == FRAME_POINTER_REGNUM
17694 || regno == ARG_POINTER_REGNUM)
17695 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17697 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17698 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17700 if (IS_CIRRUS_REGNUM (regno))
17701 return CIRRUS_REGS;
17703 if (IS_VFP_REGNUM (regno))
17705 if (regno <= D7_VFP_REGNUM)
17706 return VFP_D0_D7_REGS;
17707 else if (regno <= LAST_LO_VFP_REGNUM)
17708 return VFP_LO_REGS;
17709 else
17710 return VFP_HI_REGS;
17713 if (IS_IWMMXT_REGNUM (regno))
17714 return IWMMXT_REGS;
17716 if (IS_IWMMXT_GR_REGNUM (regno))
17717 return IWMMXT_GR_REGS;
17719 return FPA_REGS;
17722 /* Handle a special case when computing the offset
17723 of an argument from the frame pointer. */
17725 arm_debugger_arg_offset (int value, rtx addr)
17727 rtx insn;
17729 /* We are only interested if dbxout_parms() failed to compute the offset. */
17730 if (value != 0)
17731 return 0;
17733 /* We can only cope with the case where the address is held in a register. */
17734 if (GET_CODE (addr) != REG)
17735 return 0;
17737 /* If we are using the frame pointer to point at the argument, then
17738 an offset of 0 is correct. */
17739 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17740 return 0;
17742 /* If we are using the stack pointer to point at the
17743 argument, then an offset of 0 is correct. */
17744 /* ??? Check this is consistent with thumb2 frame layout. */
17745 if ((TARGET_THUMB || !frame_pointer_needed)
17746 && REGNO (addr) == SP_REGNUM)
17747 return 0;
17749 /* Oh dear. The argument is pointed to by a register rather
17750 than being held in a register, or being stored at a known
17751 offset from the frame pointer. Since GDB only understands
17752 those two kinds of argument we must translate the address
17753 held in the register into an offset from the frame pointer.
17754 We do this by searching through the insns for the function
17755 looking to see where this register gets its value. If the
17756 register is initialized from the frame pointer plus an offset
17757 then we are in luck and we can continue, otherwise we give up.
17759 This code is exercised by producing debugging information
17760 for a function with arguments like this:
17762 double func (double a, double b, int c, double d) {return d;}
17764 Without this code the stab for parameter 'd' will be set to
17765 an offset of 0 from the frame pointer, rather than 8. */
17767 /* The if() statement says:
17769 If the insn is a normal instruction
17770 and if the insn is setting the value in a register
17771 and if the register being set is the register holding the address of the argument
17772 and if the address is computing by an addition
17773 that involves adding to a register
17774 which is the frame pointer
17775 a constant integer
17777 then... */
17779 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17781 if ( GET_CODE (insn) == INSN
17782 && GET_CODE (PATTERN (insn)) == SET
17783 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17784 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17785 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17786 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17787 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17790 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17792 break;
17796 if (value == 0)
17798 debug_rtx (addr);
17799 warning (0, "unable to compute real location of stacked parameter");
17800 value = 8; /* XXX magic hack */
17803 return value;
17806 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17807 do \
17809 if ((MASK) & insn_flags) \
17810 add_builtin_function ((NAME), (TYPE), (CODE), \
17811 BUILT_IN_MD, NULL, NULL_TREE); \
17813 while (0)
17815 struct builtin_description
17817 const unsigned int mask;
17818 const enum insn_code icode;
17819 const char * const name;
17820 const enum arm_builtins code;
17821 const enum rtx_code comparison;
17822 const unsigned int flag;
17825 static const struct builtin_description bdesc_2arg[] =
17827 #define IWMMXT_BUILTIN(code, string, builtin) \
17828 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17829 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17831 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17832 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17833 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17834 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17835 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17836 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17837 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17838 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17839 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17840 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17841 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17842 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17843 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17844 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17845 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17846 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17847 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17848 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17849 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17850 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17851 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17852 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17853 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17854 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17855 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17856 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17857 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17858 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17859 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17860 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17861 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17862 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17863 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17864 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17865 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17866 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17867 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17868 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17869 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17870 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17871 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17872 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17873 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17874 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17875 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17876 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17877 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17878 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17879 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17880 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17881 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17882 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17883 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17884 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17885 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17886 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17887 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17888 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17890 #define IWMMXT_BUILTIN2(code, builtin) \
17891 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17893 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17894 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17895 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17896 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17897 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17898 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17899 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17900 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17901 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17902 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17903 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17904 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17905 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17906 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17907 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17908 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17909 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17910 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17911 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17912 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17913 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17914 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17915 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17916 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17917 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17918 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17919 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17920 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17921 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17922 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17923 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17924 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17927 static const struct builtin_description bdesc_1arg[] =
17929 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17930 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17931 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17932 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17933 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17934 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17935 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17936 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17937 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17938 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17939 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17940 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17941 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17942 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17943 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17944 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17945 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17946 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17949 /* Set up all the iWMMXt builtins. This is
17950 not called if TARGET_IWMMXT is zero. */
17952 static void
17953 arm_init_iwmmxt_builtins (void)
17955 const struct builtin_description * d;
17956 size_t i;
17957 tree endlink = void_list_node;
17959 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17960 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17961 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17963 tree int_ftype_int
17964 = build_function_type (integer_type_node,
17965 tree_cons (NULL_TREE, integer_type_node, endlink));
17966 tree v8qi_ftype_v8qi_v8qi_int
17967 = build_function_type (V8QI_type_node,
17968 tree_cons (NULL_TREE, V8QI_type_node,
17969 tree_cons (NULL_TREE, V8QI_type_node,
17970 tree_cons (NULL_TREE,
17971 integer_type_node,
17972 endlink))));
17973 tree v4hi_ftype_v4hi_int
17974 = build_function_type (V4HI_type_node,
17975 tree_cons (NULL_TREE, V4HI_type_node,
17976 tree_cons (NULL_TREE, integer_type_node,
17977 endlink)));
17978 tree v2si_ftype_v2si_int
17979 = build_function_type (V2SI_type_node,
17980 tree_cons (NULL_TREE, V2SI_type_node,
17981 tree_cons (NULL_TREE, integer_type_node,
17982 endlink)));
17983 tree v2si_ftype_di_di
17984 = build_function_type (V2SI_type_node,
17985 tree_cons (NULL_TREE, long_long_integer_type_node,
17986 tree_cons (NULL_TREE, long_long_integer_type_node,
17987 endlink)));
17988 tree di_ftype_di_int
17989 = build_function_type (long_long_integer_type_node,
17990 tree_cons (NULL_TREE, long_long_integer_type_node,
17991 tree_cons (NULL_TREE, integer_type_node,
17992 endlink)));
17993 tree di_ftype_di_int_int
17994 = build_function_type (long_long_integer_type_node,
17995 tree_cons (NULL_TREE, long_long_integer_type_node,
17996 tree_cons (NULL_TREE, integer_type_node,
17997 tree_cons (NULL_TREE,
17998 integer_type_node,
17999 endlink))));
18000 tree int_ftype_v8qi
18001 = build_function_type (integer_type_node,
18002 tree_cons (NULL_TREE, V8QI_type_node,
18003 endlink));
18004 tree int_ftype_v4hi
18005 = build_function_type (integer_type_node,
18006 tree_cons (NULL_TREE, V4HI_type_node,
18007 endlink));
18008 tree int_ftype_v2si
18009 = build_function_type (integer_type_node,
18010 tree_cons (NULL_TREE, V2SI_type_node,
18011 endlink));
18012 tree int_ftype_v8qi_int
18013 = build_function_type (integer_type_node,
18014 tree_cons (NULL_TREE, V8QI_type_node,
18015 tree_cons (NULL_TREE, integer_type_node,
18016 endlink)));
18017 tree int_ftype_v4hi_int
18018 = build_function_type (integer_type_node,
18019 tree_cons (NULL_TREE, V4HI_type_node,
18020 tree_cons (NULL_TREE, integer_type_node,
18021 endlink)));
18022 tree int_ftype_v2si_int
18023 = build_function_type (integer_type_node,
18024 tree_cons (NULL_TREE, V2SI_type_node,
18025 tree_cons (NULL_TREE, integer_type_node,
18026 endlink)));
18027 tree v8qi_ftype_v8qi_int_int
18028 = build_function_type (V8QI_type_node,
18029 tree_cons (NULL_TREE, V8QI_type_node,
18030 tree_cons (NULL_TREE, integer_type_node,
18031 tree_cons (NULL_TREE,
18032 integer_type_node,
18033 endlink))));
18034 tree v4hi_ftype_v4hi_int_int
18035 = build_function_type (V4HI_type_node,
18036 tree_cons (NULL_TREE, V4HI_type_node,
18037 tree_cons (NULL_TREE, integer_type_node,
18038 tree_cons (NULL_TREE,
18039 integer_type_node,
18040 endlink))));
18041 tree v2si_ftype_v2si_int_int
18042 = build_function_type (V2SI_type_node,
18043 tree_cons (NULL_TREE, V2SI_type_node,
18044 tree_cons (NULL_TREE, integer_type_node,
18045 tree_cons (NULL_TREE,
18046 integer_type_node,
18047 endlink))));
18048 /* Miscellaneous. */
18049 tree v8qi_ftype_v4hi_v4hi
18050 = build_function_type (V8QI_type_node,
18051 tree_cons (NULL_TREE, V4HI_type_node,
18052 tree_cons (NULL_TREE, V4HI_type_node,
18053 endlink)));
18054 tree v4hi_ftype_v2si_v2si
18055 = build_function_type (V4HI_type_node,
18056 tree_cons (NULL_TREE, V2SI_type_node,
18057 tree_cons (NULL_TREE, V2SI_type_node,
18058 endlink)));
18059 tree v2si_ftype_v4hi_v4hi
18060 = build_function_type (V2SI_type_node,
18061 tree_cons (NULL_TREE, V4HI_type_node,
18062 tree_cons (NULL_TREE, V4HI_type_node,
18063 endlink)));
18064 tree v2si_ftype_v8qi_v8qi
18065 = build_function_type (V2SI_type_node,
18066 tree_cons (NULL_TREE, V8QI_type_node,
18067 tree_cons (NULL_TREE, V8QI_type_node,
18068 endlink)));
18069 tree v4hi_ftype_v4hi_di
18070 = build_function_type (V4HI_type_node,
18071 tree_cons (NULL_TREE, V4HI_type_node,
18072 tree_cons (NULL_TREE,
18073 long_long_integer_type_node,
18074 endlink)));
18075 tree v2si_ftype_v2si_di
18076 = build_function_type (V2SI_type_node,
18077 tree_cons (NULL_TREE, V2SI_type_node,
18078 tree_cons (NULL_TREE,
18079 long_long_integer_type_node,
18080 endlink)));
18081 tree void_ftype_int_int
18082 = build_function_type (void_type_node,
18083 tree_cons (NULL_TREE, integer_type_node,
18084 tree_cons (NULL_TREE, integer_type_node,
18085 endlink)));
18086 tree di_ftype_void
18087 = build_function_type (long_long_unsigned_type_node, endlink);
18088 tree di_ftype_v8qi
18089 = build_function_type (long_long_integer_type_node,
18090 tree_cons (NULL_TREE, V8QI_type_node,
18091 endlink));
18092 tree di_ftype_v4hi
18093 = build_function_type (long_long_integer_type_node,
18094 tree_cons (NULL_TREE, V4HI_type_node,
18095 endlink));
18096 tree di_ftype_v2si
18097 = build_function_type (long_long_integer_type_node,
18098 tree_cons (NULL_TREE, V2SI_type_node,
18099 endlink));
18100 tree v2si_ftype_v4hi
18101 = build_function_type (V2SI_type_node,
18102 tree_cons (NULL_TREE, V4HI_type_node,
18103 endlink));
18104 tree v4hi_ftype_v8qi
18105 = build_function_type (V4HI_type_node,
18106 tree_cons (NULL_TREE, V8QI_type_node,
18107 endlink));
18109 tree di_ftype_di_v4hi_v4hi
18110 = build_function_type (long_long_unsigned_type_node,
18111 tree_cons (NULL_TREE,
18112 long_long_unsigned_type_node,
18113 tree_cons (NULL_TREE, V4HI_type_node,
18114 tree_cons (NULL_TREE,
18115 V4HI_type_node,
18116 endlink))));
18118 tree di_ftype_v4hi_v4hi
18119 = build_function_type (long_long_unsigned_type_node,
18120 tree_cons (NULL_TREE, V4HI_type_node,
18121 tree_cons (NULL_TREE, V4HI_type_node,
18122 endlink)));
18124 /* Normal vector binops. */
18125 tree v8qi_ftype_v8qi_v8qi
18126 = build_function_type (V8QI_type_node,
18127 tree_cons (NULL_TREE, V8QI_type_node,
18128 tree_cons (NULL_TREE, V8QI_type_node,
18129 endlink)));
18130 tree v4hi_ftype_v4hi_v4hi
18131 = build_function_type (V4HI_type_node,
18132 tree_cons (NULL_TREE, V4HI_type_node,
18133 tree_cons (NULL_TREE, V4HI_type_node,
18134 endlink)));
18135 tree v2si_ftype_v2si_v2si
18136 = build_function_type (V2SI_type_node,
18137 tree_cons (NULL_TREE, V2SI_type_node,
18138 tree_cons (NULL_TREE, V2SI_type_node,
18139 endlink)));
18140 tree di_ftype_di_di
18141 = build_function_type (long_long_unsigned_type_node,
18142 tree_cons (NULL_TREE, long_long_unsigned_type_node,
18143 tree_cons (NULL_TREE,
18144 long_long_unsigned_type_node,
18145 endlink)));
18147 /* Add all builtins that are more or less simple operations on two
18148 operands. */
18149 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18151 /* Use one of the operands; the target can have a different mode for
18152 mask-generating compares. */
18153 enum machine_mode mode;
18154 tree type;
18156 if (d->name == 0)
18157 continue;
18159 mode = insn_data[d->icode].operand[1].mode;
18161 switch (mode)
18163 case V8QImode:
18164 type = v8qi_ftype_v8qi_v8qi;
18165 break;
18166 case V4HImode:
18167 type = v4hi_ftype_v4hi_v4hi;
18168 break;
18169 case V2SImode:
18170 type = v2si_ftype_v2si_v2si;
18171 break;
18172 case DImode:
18173 type = di_ftype_di_di;
18174 break;
18176 default:
18177 gcc_unreachable ();
18180 def_mbuiltin (d->mask, d->name, type, d->code);
18183 /* Add the remaining MMX insns with somewhat more complicated types. */
18184 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18185 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18186 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18188 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18189 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18190 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18191 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18192 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18193 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18195 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18196 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18197 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18198 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18199 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18200 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18202 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18203 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18204 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18205 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18206 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18207 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18209 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18210 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18211 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18212 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18213 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18214 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18216 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18218 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18219 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18220 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18221 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18223 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18224 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18225 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18226 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18227 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18228 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18229 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18230 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18231 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18233 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18234 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18235 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18237 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18238 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18239 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18241 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18242 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18243 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18244 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18245 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18246 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18248 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18249 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18250 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18251 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18252 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18253 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18254 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18255 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18256 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18257 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18258 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18259 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18261 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18262 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18263 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18264 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18266 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18267 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18268 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18269 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18270 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18271 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18272 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18275 static void
18276 arm_init_tls_builtins (void)
18278 tree ftype, decl;
18280 ftype = build_function_type (ptr_type_node, void_list_node);
18281 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18282 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18283 NULL, NULL_TREE);
18284 TREE_NOTHROW (decl) = 1;
18285 TREE_READONLY (decl) = 1;
18288 enum neon_builtin_type_bits {
18289 T_V8QI = 0x0001,
18290 T_V4HI = 0x0002,
18291 T_V2SI = 0x0004,
18292 T_V2SF = 0x0008,
18293 T_DI = 0x0010,
18294 T_V16QI = 0x0020,
18295 T_V8HI = 0x0040,
18296 T_V4SI = 0x0080,
18297 T_V4SF = 0x0100,
18298 T_V2DI = 0x0200,
18299 T_TI = 0x0400,
18300 T_EI = 0x0800,
18301 T_OI = 0x1000
18304 #define v8qi_UP T_V8QI
18305 #define v4hi_UP T_V4HI
18306 #define v2si_UP T_V2SI
18307 #define v2sf_UP T_V2SF
18308 #define di_UP T_DI
18309 #define v16qi_UP T_V16QI
18310 #define v8hi_UP T_V8HI
18311 #define v4si_UP T_V4SI
18312 #define v4sf_UP T_V4SF
18313 #define v2di_UP T_V2DI
18314 #define ti_UP T_TI
18315 #define ei_UP T_EI
18316 #define oi_UP T_OI
18318 #define UP(X) X##_UP
18320 #define T_MAX 13
18322 typedef enum {
18323 NEON_BINOP,
18324 NEON_TERNOP,
18325 NEON_UNOP,
18326 NEON_GETLANE,
18327 NEON_SETLANE,
18328 NEON_CREATE,
18329 NEON_DUP,
18330 NEON_DUPLANE,
18331 NEON_COMBINE,
18332 NEON_SPLIT,
18333 NEON_LANEMUL,
18334 NEON_LANEMULL,
18335 NEON_LANEMULH,
18336 NEON_LANEMAC,
18337 NEON_SCALARMUL,
18338 NEON_SCALARMULL,
18339 NEON_SCALARMULH,
18340 NEON_SCALARMAC,
18341 NEON_CONVERT,
18342 NEON_FIXCONV,
18343 NEON_SELECT,
18344 NEON_RESULTPAIR,
18345 NEON_REINTERP,
18346 NEON_VTBL,
18347 NEON_VTBX,
18348 NEON_LOAD1,
18349 NEON_LOAD1LANE,
18350 NEON_STORE1,
18351 NEON_STORE1LANE,
18352 NEON_LOADSTRUCT,
18353 NEON_LOADSTRUCTLANE,
18354 NEON_STORESTRUCT,
18355 NEON_STORESTRUCTLANE,
18356 NEON_LOGICBINOP,
18357 NEON_SHIFTINSERT,
18358 NEON_SHIFTIMM,
18359 NEON_SHIFTACC
18360 } neon_itype;
18362 typedef struct {
18363 const char *name;
18364 const neon_itype itype;
18365 const int bits;
18366 const enum insn_code codes[T_MAX];
18367 const unsigned int num_vars;
18368 unsigned int base_fcode;
18369 } neon_builtin_datum;
18371 #define CF(N,X) CODE_FOR_neon_##N##X
18373 #define VAR1(T, N, A) \
18374 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18375 #define VAR2(T, N, A, B) \
18376 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18377 #define VAR3(T, N, A, B, C) \
18378 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18379 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18380 #define VAR4(T, N, A, B, C, D) \
18381 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18382 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18383 #define VAR5(T, N, A, B, C, D, E) \
18384 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18385 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18386 #define VAR6(T, N, A, B, C, D, E, F) \
18387 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18388 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18389 #define VAR7(T, N, A, B, C, D, E, F, G) \
18390 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18391 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18392 CF (N, G) }, 7, 0
18393 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18394 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18395 | UP (H), \
18396 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18397 CF (N, G), CF (N, H) }, 8, 0
18398 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18399 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18400 | UP (H) | UP (I), \
18401 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18402 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18403 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18404 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18405 | UP (H) | UP (I) | UP (J), \
18406 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18407 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18409 /* The mode entries in the following table correspond to the "key" type of the
18410 instruction variant, i.e. equivalent to that which would be specified after
18411 the assembler mnemonic, which usually refers to the last vector operand.
18412 (Signed/unsigned/polynomial types are not differentiated between though, and
18413 are all mapped onto the same mode for a given element size.) The modes
18414 listed per instruction should be the same as those defined for that
18415 instruction's pattern in neon.md.
18416 WARNING: Variants should be listed in the same increasing order as
18417 neon_builtin_type_bits. */
18419 static neon_builtin_datum neon_builtin_data[] =
18421 { VAR10 (BINOP, vadd,
18422 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18423 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18424 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18425 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18426 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18427 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18428 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18429 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18430 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18431 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18432 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18433 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18434 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18435 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18436 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18437 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18438 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18439 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18440 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18441 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18442 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18443 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18444 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18445 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18446 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18447 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18448 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18449 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18450 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18451 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18452 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18453 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18454 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18455 { VAR10 (BINOP, vsub,
18456 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18457 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18458 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18459 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18460 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18461 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18462 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18463 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18464 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18465 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18466 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18467 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18468 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18469 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18470 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18471 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18472 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18473 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18474 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18475 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18476 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18477 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18478 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18479 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18480 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18481 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18482 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18483 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18484 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18485 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18486 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18487 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18488 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18489 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18490 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18491 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18492 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18493 /* FIXME: vget_lane supports more variants than this! */
18494 { VAR10 (GETLANE, vget_lane,
18495 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18496 { VAR10 (SETLANE, vset_lane,
18497 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18498 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18499 { VAR10 (DUP, vdup_n,
18500 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18501 { VAR10 (DUPLANE, vdup_lane,
18502 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18503 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18504 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18505 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18506 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18507 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18508 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18509 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18510 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18511 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18512 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18513 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18514 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18515 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18516 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18517 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18518 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18519 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18520 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18521 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18522 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18523 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18524 { VAR10 (BINOP, vext,
18525 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18526 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18527 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18528 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18529 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18530 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18531 { VAR10 (SELECT, vbsl,
18532 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18533 { VAR1 (VTBL, vtbl1, v8qi) },
18534 { VAR1 (VTBL, vtbl2, v8qi) },
18535 { VAR1 (VTBL, vtbl3, v8qi) },
18536 { VAR1 (VTBL, vtbl4, v8qi) },
18537 { VAR1 (VTBX, vtbx1, v8qi) },
18538 { VAR1 (VTBX, vtbx2, v8qi) },
18539 { VAR1 (VTBX, vtbx3, v8qi) },
18540 { VAR1 (VTBX, vtbx4, v8qi) },
18541 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18542 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18543 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18544 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18545 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18546 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18547 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18548 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18549 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18550 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18551 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18552 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18553 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18554 { VAR10 (LOAD1, vld1,
18555 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18556 { VAR10 (LOAD1LANE, vld1_lane,
18557 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18558 { VAR10 (LOAD1, vld1_dup,
18559 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18560 { VAR10 (STORE1, vst1,
18561 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18562 { VAR10 (STORE1LANE, vst1_lane,
18563 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18564 { VAR9 (LOADSTRUCT,
18565 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18566 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18567 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18568 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18569 { VAR9 (STORESTRUCT, vst2,
18570 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18571 { VAR7 (STORESTRUCTLANE, vst2_lane,
18572 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18573 { VAR9 (LOADSTRUCT,
18574 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18575 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18576 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18577 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18578 { VAR9 (STORESTRUCT, vst3,
18579 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18580 { VAR7 (STORESTRUCTLANE, vst3_lane,
18581 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18582 { VAR9 (LOADSTRUCT, vld4,
18583 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18584 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18585 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18586 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18587 { VAR9 (STORESTRUCT, vst4,
18588 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18589 { VAR7 (STORESTRUCTLANE, vst4_lane,
18590 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18591 { VAR10 (LOGICBINOP, vand,
18592 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18593 { VAR10 (LOGICBINOP, vorr,
18594 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18595 { VAR10 (BINOP, veor,
18596 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18597 { VAR10 (LOGICBINOP, vbic,
18598 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18599 { VAR10 (LOGICBINOP, vorn,
18600 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18603 #undef CF
18604 #undef VAR1
18605 #undef VAR2
18606 #undef VAR3
18607 #undef VAR4
18608 #undef VAR5
18609 #undef VAR6
18610 #undef VAR7
18611 #undef VAR8
18612 #undef VAR9
18613 #undef VAR10
18615 static void
18616 arm_init_neon_builtins (void)
18618 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18620 tree neon_intQI_type_node;
18621 tree neon_intHI_type_node;
18622 tree neon_polyQI_type_node;
18623 tree neon_polyHI_type_node;
18624 tree neon_intSI_type_node;
18625 tree neon_intDI_type_node;
18626 tree neon_float_type_node;
18628 tree intQI_pointer_node;
18629 tree intHI_pointer_node;
18630 tree intSI_pointer_node;
18631 tree intDI_pointer_node;
18632 tree float_pointer_node;
18634 tree const_intQI_node;
18635 tree const_intHI_node;
18636 tree const_intSI_node;
18637 tree const_intDI_node;
18638 tree const_float_node;
18640 tree const_intQI_pointer_node;
18641 tree const_intHI_pointer_node;
18642 tree const_intSI_pointer_node;
18643 tree const_intDI_pointer_node;
18644 tree const_float_pointer_node;
18646 tree V8QI_type_node;
18647 tree V4HI_type_node;
18648 tree V2SI_type_node;
18649 tree V2SF_type_node;
18650 tree V16QI_type_node;
18651 tree V8HI_type_node;
18652 tree V4SI_type_node;
18653 tree V4SF_type_node;
18654 tree V2DI_type_node;
18656 tree intUQI_type_node;
18657 tree intUHI_type_node;
18658 tree intUSI_type_node;
18659 tree intUDI_type_node;
18661 tree intEI_type_node;
18662 tree intOI_type_node;
18663 tree intCI_type_node;
18664 tree intXI_type_node;
18666 tree V8QI_pointer_node;
18667 tree V4HI_pointer_node;
18668 tree V2SI_pointer_node;
18669 tree V2SF_pointer_node;
18670 tree V16QI_pointer_node;
18671 tree V8HI_pointer_node;
18672 tree V4SI_pointer_node;
18673 tree V4SF_pointer_node;
18674 tree V2DI_pointer_node;
18676 tree void_ftype_pv8qi_v8qi_v8qi;
18677 tree void_ftype_pv4hi_v4hi_v4hi;
18678 tree void_ftype_pv2si_v2si_v2si;
18679 tree void_ftype_pv2sf_v2sf_v2sf;
18680 tree void_ftype_pdi_di_di;
18681 tree void_ftype_pv16qi_v16qi_v16qi;
18682 tree void_ftype_pv8hi_v8hi_v8hi;
18683 tree void_ftype_pv4si_v4si_v4si;
18684 tree void_ftype_pv4sf_v4sf_v4sf;
18685 tree void_ftype_pv2di_v2di_v2di;
18687 tree reinterp_ftype_dreg[5][5];
18688 tree reinterp_ftype_qreg[5][5];
18689 tree dreg_types[5], qreg_types[5];
18691 /* Create distinguished type nodes for NEON vector element types,
18692 and pointers to values of such types, so we can detect them later. */
18693 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18694 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18695 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18696 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18697 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18698 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18699 neon_float_type_node = make_node (REAL_TYPE);
18700 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18701 layout_type (neon_float_type_node);
18703 /* Define typedefs which exactly correspond to the modes we are basing vector
18704 types on. If you change these names you'll need to change
18705 the table used by arm_mangle_type too. */
18706 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18707 "__builtin_neon_qi");
18708 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18709 "__builtin_neon_hi");
18710 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18711 "__builtin_neon_si");
18712 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18713 "__builtin_neon_sf");
18714 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18715 "__builtin_neon_di");
18716 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18717 "__builtin_neon_poly8");
18718 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18719 "__builtin_neon_poly16");
18721 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18722 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18723 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18724 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18725 float_pointer_node = build_pointer_type (neon_float_type_node);
18727 /* Next create constant-qualified versions of the above types. */
18728 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18729 TYPE_QUAL_CONST);
18730 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18731 TYPE_QUAL_CONST);
18732 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18733 TYPE_QUAL_CONST);
18734 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18735 TYPE_QUAL_CONST);
18736 const_float_node = build_qualified_type (neon_float_type_node,
18737 TYPE_QUAL_CONST);
18739 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18740 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18741 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18742 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18743 const_float_pointer_node = build_pointer_type (const_float_node);
18745 /* Now create vector types based on our NEON element types. */
18746 /* 64-bit vectors. */
18747 V8QI_type_node =
18748 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18749 V4HI_type_node =
18750 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18751 V2SI_type_node =
18752 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18753 V2SF_type_node =
18754 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18755 /* 128-bit vectors. */
18756 V16QI_type_node =
18757 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18758 V8HI_type_node =
18759 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18760 V4SI_type_node =
18761 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18762 V4SF_type_node =
18763 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18764 V2DI_type_node =
18765 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18767 /* Unsigned integer types for various mode sizes. */
18768 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18769 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18770 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18771 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18773 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18774 "__builtin_neon_uqi");
18775 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18776 "__builtin_neon_uhi");
18777 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18778 "__builtin_neon_usi");
18779 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18780 "__builtin_neon_udi");
18782 /* Opaque integer types for structures of vectors. */
18783 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18784 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18785 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18786 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18788 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18789 "__builtin_neon_ti");
18790 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18791 "__builtin_neon_ei");
18792 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18793 "__builtin_neon_oi");
18794 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18795 "__builtin_neon_ci");
18796 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18797 "__builtin_neon_xi");
18799 /* Pointers to vector types. */
18800 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18801 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18802 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18803 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18804 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18805 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18806 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18807 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18808 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18810 /* Operations which return results as pairs. */
18811 void_ftype_pv8qi_v8qi_v8qi =
18812 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18813 V8QI_type_node, NULL);
18814 void_ftype_pv4hi_v4hi_v4hi =
18815 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18816 V4HI_type_node, NULL);
18817 void_ftype_pv2si_v2si_v2si =
18818 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18819 V2SI_type_node, NULL);
18820 void_ftype_pv2sf_v2sf_v2sf =
18821 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18822 V2SF_type_node, NULL);
18823 void_ftype_pdi_di_di =
18824 build_function_type_list (void_type_node, intDI_pointer_node,
18825 neon_intDI_type_node, neon_intDI_type_node, NULL);
18826 void_ftype_pv16qi_v16qi_v16qi =
18827 build_function_type_list (void_type_node, V16QI_pointer_node,
18828 V16QI_type_node, V16QI_type_node, NULL);
18829 void_ftype_pv8hi_v8hi_v8hi =
18830 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18831 V8HI_type_node, NULL);
18832 void_ftype_pv4si_v4si_v4si =
18833 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18834 V4SI_type_node, NULL);
18835 void_ftype_pv4sf_v4sf_v4sf =
18836 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18837 V4SF_type_node, NULL);
18838 void_ftype_pv2di_v2di_v2di =
18839 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18840 V2DI_type_node, NULL);
18842 dreg_types[0] = V8QI_type_node;
18843 dreg_types[1] = V4HI_type_node;
18844 dreg_types[2] = V2SI_type_node;
18845 dreg_types[3] = V2SF_type_node;
18846 dreg_types[4] = neon_intDI_type_node;
18848 qreg_types[0] = V16QI_type_node;
18849 qreg_types[1] = V8HI_type_node;
18850 qreg_types[2] = V4SI_type_node;
18851 qreg_types[3] = V4SF_type_node;
18852 qreg_types[4] = V2DI_type_node;
18854 for (i = 0; i < 5; i++)
18856 int j;
18857 for (j = 0; j < 5; j++)
18859 reinterp_ftype_dreg[i][j]
18860 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18861 reinterp_ftype_qreg[i][j]
18862 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18866 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18868 neon_builtin_datum *d = &neon_builtin_data[i];
18869 unsigned int j, codeidx = 0;
18871 d->base_fcode = fcode;
18873 for (j = 0; j < T_MAX; j++)
18875 const char* const modenames[] = {
18876 "v8qi", "v4hi", "v2si", "v2sf", "di",
18877 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18879 char namebuf[60];
18880 tree ftype = NULL;
18881 enum insn_code icode;
18882 int is_load = 0, is_store = 0;
18884 if ((d->bits & (1 << j)) == 0)
18885 continue;
18887 icode = d->codes[codeidx++];
18889 switch (d->itype)
18891 case NEON_LOAD1:
18892 case NEON_LOAD1LANE:
18893 case NEON_LOADSTRUCT:
18894 case NEON_LOADSTRUCTLANE:
18895 is_load = 1;
18896 /* Fall through. */
18897 case NEON_STORE1:
18898 case NEON_STORE1LANE:
18899 case NEON_STORESTRUCT:
18900 case NEON_STORESTRUCTLANE:
18901 if (!is_load)
18902 is_store = 1;
18903 /* Fall through. */
18904 case NEON_UNOP:
18905 case NEON_BINOP:
18906 case NEON_LOGICBINOP:
18907 case NEON_SHIFTINSERT:
18908 case NEON_TERNOP:
18909 case NEON_GETLANE:
18910 case NEON_SETLANE:
18911 case NEON_CREATE:
18912 case NEON_DUP:
18913 case NEON_DUPLANE:
18914 case NEON_SHIFTIMM:
18915 case NEON_SHIFTACC:
18916 case NEON_COMBINE:
18917 case NEON_SPLIT:
18918 case NEON_CONVERT:
18919 case NEON_FIXCONV:
18920 case NEON_LANEMUL:
18921 case NEON_LANEMULL:
18922 case NEON_LANEMULH:
18923 case NEON_LANEMAC:
18924 case NEON_SCALARMUL:
18925 case NEON_SCALARMULL:
18926 case NEON_SCALARMULH:
18927 case NEON_SCALARMAC:
18928 case NEON_SELECT:
18929 case NEON_VTBL:
18930 case NEON_VTBX:
18932 int k;
18933 tree return_type = void_type_node, args = void_list_node;
18935 /* Build a function type directly from the insn_data for this
18936 builtin. The build_function_type() function takes care of
18937 removing duplicates for us. */
18938 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18940 tree eltype;
18942 if (is_load && k == 1)
18944 /* Neon load patterns always have the memory operand
18945 (a SImode pointer) in the operand 1 position. We
18946 want a const pointer to the element type in that
18947 position. */
18948 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18950 switch (1 << j)
18952 case T_V8QI:
18953 case T_V16QI:
18954 eltype = const_intQI_pointer_node;
18955 break;
18957 case T_V4HI:
18958 case T_V8HI:
18959 eltype = const_intHI_pointer_node;
18960 break;
18962 case T_V2SI:
18963 case T_V4SI:
18964 eltype = const_intSI_pointer_node;
18965 break;
18967 case T_V2SF:
18968 case T_V4SF:
18969 eltype = const_float_pointer_node;
18970 break;
18972 case T_DI:
18973 case T_V2DI:
18974 eltype = const_intDI_pointer_node;
18975 break;
18977 default: gcc_unreachable ();
18980 else if (is_store && k == 0)
18982 /* Similarly, Neon store patterns use operand 0 as
18983 the memory location to store to (a SImode pointer).
18984 Use a pointer to the element type of the store in
18985 that position. */
18986 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18988 switch (1 << j)
18990 case T_V8QI:
18991 case T_V16QI:
18992 eltype = intQI_pointer_node;
18993 break;
18995 case T_V4HI:
18996 case T_V8HI:
18997 eltype = intHI_pointer_node;
18998 break;
19000 case T_V2SI:
19001 case T_V4SI:
19002 eltype = intSI_pointer_node;
19003 break;
19005 case T_V2SF:
19006 case T_V4SF:
19007 eltype = float_pointer_node;
19008 break;
19010 case T_DI:
19011 case T_V2DI:
19012 eltype = intDI_pointer_node;
19013 break;
19015 default: gcc_unreachable ();
19018 else
19020 switch (insn_data[icode].operand[k].mode)
19022 case VOIDmode: eltype = void_type_node; break;
19023 /* Scalars. */
19024 case QImode: eltype = neon_intQI_type_node; break;
19025 case HImode: eltype = neon_intHI_type_node; break;
19026 case SImode: eltype = neon_intSI_type_node; break;
19027 case SFmode: eltype = neon_float_type_node; break;
19028 case DImode: eltype = neon_intDI_type_node; break;
19029 case TImode: eltype = intTI_type_node; break;
19030 case EImode: eltype = intEI_type_node; break;
19031 case OImode: eltype = intOI_type_node; break;
19032 case CImode: eltype = intCI_type_node; break;
19033 case XImode: eltype = intXI_type_node; break;
19034 /* 64-bit vectors. */
19035 case V8QImode: eltype = V8QI_type_node; break;
19036 case V4HImode: eltype = V4HI_type_node; break;
19037 case V2SImode: eltype = V2SI_type_node; break;
19038 case V2SFmode: eltype = V2SF_type_node; break;
19039 /* 128-bit vectors. */
19040 case V16QImode: eltype = V16QI_type_node; break;
19041 case V8HImode: eltype = V8HI_type_node; break;
19042 case V4SImode: eltype = V4SI_type_node; break;
19043 case V4SFmode: eltype = V4SF_type_node; break;
19044 case V2DImode: eltype = V2DI_type_node; break;
19045 default: gcc_unreachable ();
19049 if (k == 0 && !is_store)
19050 return_type = eltype;
19051 else
19052 args = tree_cons (NULL_TREE, eltype, args);
19055 ftype = build_function_type (return_type, args);
19057 break;
19059 case NEON_RESULTPAIR:
19061 switch (insn_data[icode].operand[1].mode)
19063 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19064 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19065 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19066 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19067 case DImode: ftype = void_ftype_pdi_di_di; break;
19068 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19069 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19070 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19071 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19072 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19073 default: gcc_unreachable ();
19076 break;
19078 case NEON_REINTERP:
19080 /* We iterate over 5 doubleword types, then 5 quadword
19081 types. */
19082 int rhs = j % 5;
19083 switch (insn_data[icode].operand[0].mode)
19085 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19086 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19087 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19088 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19089 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19090 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19091 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19092 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19093 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19094 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19095 default: gcc_unreachable ();
19098 break;
19100 default:
19101 gcc_unreachable ();
19104 gcc_assert (ftype != NULL);
19106 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
19108 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
19109 NULL_TREE);
19114 static void
19115 arm_init_fp16_builtins (void)
19117 tree fp16_type = make_node (REAL_TYPE);
19118 TYPE_PRECISION (fp16_type) = 16;
19119 layout_type (fp16_type);
19120 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19123 static void
19124 arm_init_builtins (void)
19126 arm_init_tls_builtins ();
19128 if (TARGET_REALLY_IWMMXT)
19129 arm_init_iwmmxt_builtins ();
19131 if (TARGET_NEON)
19132 arm_init_neon_builtins ();
19134 if (arm_fp16_format)
19135 arm_init_fp16_builtins ();
19138 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19140 static const char *
19141 arm_invalid_parameter_type (const_tree t)
19143 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19144 return N_("function parameters cannot have __fp16 type");
19145 return NULL;
19148 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19150 static const char *
19151 arm_invalid_return_type (const_tree t)
19153 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19154 return N_("functions cannot return __fp16 type");
19155 return NULL;
19158 /* Implement TARGET_PROMOTED_TYPE. */
19160 static tree
19161 arm_promoted_type (const_tree t)
19163 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19164 return float_type_node;
19165 return NULL_TREE;
19168 /* Implement TARGET_CONVERT_TO_TYPE.
19169 Specifically, this hook implements the peculiarity of the ARM
19170 half-precision floating-point C semantics that requires conversions between
19171 __fp16 to or from double to do an intermediate conversion to float. */
19173 static tree
19174 arm_convert_to_type (tree type, tree expr)
19176 tree fromtype = TREE_TYPE (expr);
19177 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19178 return NULL_TREE;
19179 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19180 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19181 return convert (type, convert (float_type_node, expr));
19182 return NULL_TREE;
19185 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19186 This simply adds HFmode as a supported mode; even though we don't
19187 implement arithmetic on this type directly, it's supported by
19188 optabs conversions, much the way the double-word arithmetic is
19189 special-cased in the default hook. */
19191 static bool
19192 arm_scalar_mode_supported_p (enum machine_mode mode)
19194 if (mode == HFmode)
19195 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19196 else
19197 return default_scalar_mode_supported_p (mode);
19200 /* Errors in the source file can cause expand_expr to return const0_rtx
19201 where we expect a vector. To avoid crashing, use one of the vector
19202 clear instructions. */
19204 static rtx
19205 safe_vector_operand (rtx x, enum machine_mode mode)
19207 if (x != const0_rtx)
19208 return x;
19209 x = gen_reg_rtx (mode);
19211 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19212 : gen_rtx_SUBREG (DImode, x, 0)));
19213 return x;
19216 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19218 static rtx
19219 arm_expand_binop_builtin (enum insn_code icode,
19220 tree exp, rtx target)
19222 rtx pat;
19223 tree arg0 = CALL_EXPR_ARG (exp, 0);
19224 tree arg1 = CALL_EXPR_ARG (exp, 1);
19225 rtx op0 = expand_normal (arg0);
19226 rtx op1 = expand_normal (arg1);
19227 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19228 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19229 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19231 if (VECTOR_MODE_P (mode0))
19232 op0 = safe_vector_operand (op0, mode0);
19233 if (VECTOR_MODE_P (mode1))
19234 op1 = safe_vector_operand (op1, mode1);
19236 if (! target
19237 || GET_MODE (target) != tmode
19238 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19239 target = gen_reg_rtx (tmode);
19241 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19243 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19244 op0 = copy_to_mode_reg (mode0, op0);
19245 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19246 op1 = copy_to_mode_reg (mode1, op1);
19248 pat = GEN_FCN (icode) (target, op0, op1);
19249 if (! pat)
19250 return 0;
19251 emit_insn (pat);
19252 return target;
19255 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19257 static rtx
19258 arm_expand_unop_builtin (enum insn_code icode,
19259 tree exp, rtx target, int do_load)
19261 rtx pat;
19262 tree arg0 = CALL_EXPR_ARG (exp, 0);
19263 rtx op0 = expand_normal (arg0);
19264 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19265 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19267 if (! target
19268 || GET_MODE (target) != tmode
19269 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19270 target = gen_reg_rtx (tmode);
19271 if (do_load)
19272 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19273 else
19275 if (VECTOR_MODE_P (mode0))
19276 op0 = safe_vector_operand (op0, mode0);
19278 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19279 op0 = copy_to_mode_reg (mode0, op0);
19282 pat = GEN_FCN (icode) (target, op0);
19283 if (! pat)
19284 return 0;
19285 emit_insn (pat);
19286 return target;
19289 static int
19290 neon_builtin_compare (const void *a, const void *b)
19292 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19293 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19294 unsigned int soughtcode = key->base_fcode;
19296 if (soughtcode >= memb->base_fcode
19297 && soughtcode < memb->base_fcode + memb->num_vars)
19298 return 0;
19299 else if (soughtcode < memb->base_fcode)
19300 return -1;
19301 else
19302 return 1;
19305 static enum insn_code
19306 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19308 neon_builtin_datum key
19309 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19310 neon_builtin_datum *found;
19311 int idx;
19313 key.base_fcode = fcode;
19314 found = (neon_builtin_datum *)
19315 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19316 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19317 gcc_assert (found);
19318 idx = fcode - (int) found->base_fcode;
19319 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19321 if (itype)
19322 *itype = found->itype;
19324 return found->codes[idx];
19327 typedef enum {
19328 NEON_ARG_COPY_TO_REG,
19329 NEON_ARG_CONSTANT,
19330 NEON_ARG_STOP
19331 } builtin_arg;
19333 #define NEON_MAX_BUILTIN_ARGS 5
19335 /* Expand a Neon builtin. */
19336 static rtx
19337 arm_expand_neon_args (rtx target, int icode, int have_retval,
19338 tree exp, ...)
19340 va_list ap;
19341 rtx pat;
19342 tree arg[NEON_MAX_BUILTIN_ARGS];
19343 rtx op[NEON_MAX_BUILTIN_ARGS];
19344 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19345 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19346 int argc = 0;
19348 if (have_retval
19349 && (!target
19350 || GET_MODE (target) != tmode
19351 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19352 target = gen_reg_rtx (tmode);
19354 va_start (ap, exp);
19356 for (;;)
19358 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19360 if (thisarg == NEON_ARG_STOP)
19361 break;
19362 else
19364 arg[argc] = CALL_EXPR_ARG (exp, argc);
19365 op[argc] = expand_normal (arg[argc]);
19366 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19368 switch (thisarg)
19370 case NEON_ARG_COPY_TO_REG:
19371 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19372 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19373 (op[argc], mode[argc]))
19374 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19375 break;
19377 case NEON_ARG_CONSTANT:
19378 /* FIXME: This error message is somewhat unhelpful. */
19379 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19380 (op[argc], mode[argc]))
19381 error ("argument must be a constant");
19382 break;
19384 case NEON_ARG_STOP:
19385 gcc_unreachable ();
19388 argc++;
19392 va_end (ap);
19394 if (have_retval)
19395 switch (argc)
19397 case 1:
19398 pat = GEN_FCN (icode) (target, op[0]);
19399 break;
19401 case 2:
19402 pat = GEN_FCN (icode) (target, op[0], op[1]);
19403 break;
19405 case 3:
19406 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19407 break;
19409 case 4:
19410 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19411 break;
19413 case 5:
19414 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19415 break;
19417 default:
19418 gcc_unreachable ();
19420 else
19421 switch (argc)
19423 case 1:
19424 pat = GEN_FCN (icode) (op[0]);
19425 break;
19427 case 2:
19428 pat = GEN_FCN (icode) (op[0], op[1]);
19429 break;
19431 case 3:
19432 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19433 break;
19435 case 4:
19436 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19437 break;
19439 case 5:
19440 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19441 break;
19443 default:
19444 gcc_unreachable ();
19447 if (!pat)
19448 return 0;
19450 emit_insn (pat);
19452 return target;
19455 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19456 constants defined per-instruction or per instruction-variant. Instead, the
19457 required info is looked up in the table neon_builtin_data. */
19458 static rtx
19459 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19461 neon_itype itype;
19462 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19464 switch (itype)
19466 case NEON_UNOP:
19467 case NEON_CONVERT:
19468 case NEON_DUPLANE:
19469 return arm_expand_neon_args (target, icode, 1, exp,
19470 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19472 case NEON_BINOP:
19473 case NEON_SETLANE:
19474 case NEON_SCALARMUL:
19475 case NEON_SCALARMULL:
19476 case NEON_SCALARMULH:
19477 case NEON_SHIFTINSERT:
19478 case NEON_LOGICBINOP:
19479 return arm_expand_neon_args (target, icode, 1, exp,
19480 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19481 NEON_ARG_STOP);
19483 case NEON_TERNOP:
19484 return arm_expand_neon_args (target, icode, 1, exp,
19485 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19486 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19488 case NEON_GETLANE:
19489 case NEON_FIXCONV:
19490 case NEON_SHIFTIMM:
19491 return arm_expand_neon_args (target, icode, 1, exp,
19492 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19493 NEON_ARG_STOP);
19495 case NEON_CREATE:
19496 return arm_expand_neon_args (target, icode, 1, exp,
19497 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19499 case NEON_DUP:
19500 case NEON_SPLIT:
19501 case NEON_REINTERP:
19502 return arm_expand_neon_args (target, icode, 1, exp,
19503 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19505 case NEON_COMBINE:
19506 case NEON_VTBL:
19507 return arm_expand_neon_args (target, icode, 1, exp,
19508 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19510 case NEON_RESULTPAIR:
19511 return arm_expand_neon_args (target, icode, 0, exp,
19512 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19513 NEON_ARG_STOP);
19515 case NEON_LANEMUL:
19516 case NEON_LANEMULL:
19517 case NEON_LANEMULH:
19518 return arm_expand_neon_args (target, icode, 1, exp,
19519 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19520 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19522 case NEON_LANEMAC:
19523 return arm_expand_neon_args (target, icode, 1, exp,
19524 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19525 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19527 case NEON_SHIFTACC:
19528 return arm_expand_neon_args (target, icode, 1, exp,
19529 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19530 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19532 case NEON_SCALARMAC:
19533 return arm_expand_neon_args (target, icode, 1, exp,
19534 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19535 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19537 case NEON_SELECT:
19538 case NEON_VTBX:
19539 return arm_expand_neon_args (target, icode, 1, exp,
19540 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19541 NEON_ARG_STOP);
19543 case NEON_LOAD1:
19544 case NEON_LOADSTRUCT:
19545 return arm_expand_neon_args (target, icode, 1, exp,
19546 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19548 case NEON_LOAD1LANE:
19549 case NEON_LOADSTRUCTLANE:
19550 return arm_expand_neon_args (target, icode, 1, exp,
19551 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19552 NEON_ARG_STOP);
19554 case NEON_STORE1:
19555 case NEON_STORESTRUCT:
19556 return arm_expand_neon_args (target, icode, 0, exp,
19557 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19559 case NEON_STORE1LANE:
19560 case NEON_STORESTRUCTLANE:
19561 return arm_expand_neon_args (target, icode, 0, exp,
19562 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19563 NEON_ARG_STOP);
19566 gcc_unreachable ();
19569 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19570 void
19571 neon_reinterpret (rtx dest, rtx src)
19573 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19576 /* Emit code to place a Neon pair result in memory locations (with equal
19577 registers). */
19578 void
19579 neon_emit_pair_result_insn (enum machine_mode mode,
19580 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19581 rtx op1, rtx op2)
19583 rtx mem = gen_rtx_MEM (mode, destaddr);
19584 rtx tmp1 = gen_reg_rtx (mode);
19585 rtx tmp2 = gen_reg_rtx (mode);
19587 emit_insn (intfn (tmp1, op1, tmp2, op2));
19589 emit_move_insn (mem, tmp1);
19590 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19591 emit_move_insn (mem, tmp2);
19594 /* Set up operands for a register copy from src to dest, taking care not to
19595 clobber registers in the process.
19596 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19597 be called with a large N, so that should be OK. */
19599 void
19600 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19602 unsigned int copied = 0, opctr = 0;
19603 unsigned int done = (1 << count) - 1;
19604 unsigned int i, j;
19606 while (copied != done)
19608 for (i = 0; i < count; i++)
19610 int good = 1;
19612 for (j = 0; good && j < count; j++)
19613 if (i != j && (copied & (1 << j)) == 0
19614 && reg_overlap_mentioned_p (src[j], dest[i]))
19615 good = 0;
19617 if (good)
19619 operands[opctr++] = dest[i];
19620 operands[opctr++] = src[i];
19621 copied |= 1 << i;
19626 gcc_assert (opctr == count * 2);
19629 /* Expand an expression EXP that calls a built-in function,
19630 with result going to TARGET if that's convenient
19631 (and in mode MODE if that's convenient).
19632 SUBTARGET may be used as the target for computing one of EXP's operands.
19633 IGNORE is nonzero if the value is to be ignored. */
19635 static rtx
19636 arm_expand_builtin (tree exp,
19637 rtx target,
19638 rtx subtarget ATTRIBUTE_UNUSED,
19639 enum machine_mode mode ATTRIBUTE_UNUSED,
19640 int ignore ATTRIBUTE_UNUSED)
19642 const struct builtin_description * d;
19643 enum insn_code icode;
19644 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19645 tree arg0;
19646 tree arg1;
19647 tree arg2;
19648 rtx op0;
19649 rtx op1;
19650 rtx op2;
19651 rtx pat;
19652 int fcode = DECL_FUNCTION_CODE (fndecl);
19653 size_t i;
19654 enum machine_mode tmode;
19655 enum machine_mode mode0;
19656 enum machine_mode mode1;
19657 enum machine_mode mode2;
19659 if (fcode >= ARM_BUILTIN_NEON_BASE)
19660 return arm_expand_neon_builtin (fcode, exp, target);
19662 switch (fcode)
19664 case ARM_BUILTIN_TEXTRMSB:
19665 case ARM_BUILTIN_TEXTRMUB:
19666 case ARM_BUILTIN_TEXTRMSH:
19667 case ARM_BUILTIN_TEXTRMUH:
19668 case ARM_BUILTIN_TEXTRMSW:
19669 case ARM_BUILTIN_TEXTRMUW:
19670 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19671 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19672 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19673 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19674 : CODE_FOR_iwmmxt_textrmw);
19676 arg0 = CALL_EXPR_ARG (exp, 0);
19677 arg1 = CALL_EXPR_ARG (exp, 1);
19678 op0 = expand_normal (arg0);
19679 op1 = expand_normal (arg1);
19680 tmode = insn_data[icode].operand[0].mode;
19681 mode0 = insn_data[icode].operand[1].mode;
19682 mode1 = insn_data[icode].operand[2].mode;
19684 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19685 op0 = copy_to_mode_reg (mode0, op0);
19686 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19688 /* @@@ better error message */
19689 error ("selector must be an immediate");
19690 return gen_reg_rtx (tmode);
19692 if (target == 0
19693 || GET_MODE (target) != tmode
19694 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19695 target = gen_reg_rtx (tmode);
19696 pat = GEN_FCN (icode) (target, op0, op1);
19697 if (! pat)
19698 return 0;
19699 emit_insn (pat);
19700 return target;
19702 case ARM_BUILTIN_TINSRB:
19703 case ARM_BUILTIN_TINSRH:
19704 case ARM_BUILTIN_TINSRW:
19705 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19706 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19707 : CODE_FOR_iwmmxt_tinsrw);
19708 arg0 = CALL_EXPR_ARG (exp, 0);
19709 arg1 = CALL_EXPR_ARG (exp, 1);
19710 arg2 = CALL_EXPR_ARG (exp, 2);
19711 op0 = expand_normal (arg0);
19712 op1 = expand_normal (arg1);
19713 op2 = expand_normal (arg2);
19714 tmode = insn_data[icode].operand[0].mode;
19715 mode0 = insn_data[icode].operand[1].mode;
19716 mode1 = insn_data[icode].operand[2].mode;
19717 mode2 = insn_data[icode].operand[3].mode;
19719 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19720 op0 = copy_to_mode_reg (mode0, op0);
19721 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19722 op1 = copy_to_mode_reg (mode1, op1);
19723 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19725 /* @@@ better error message */
19726 error ("selector must be an immediate");
19727 return const0_rtx;
19729 if (target == 0
19730 || GET_MODE (target) != tmode
19731 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19732 target = gen_reg_rtx (tmode);
19733 pat = GEN_FCN (icode) (target, op0, op1, op2);
19734 if (! pat)
19735 return 0;
19736 emit_insn (pat);
19737 return target;
19739 case ARM_BUILTIN_SETWCX:
19740 arg0 = CALL_EXPR_ARG (exp, 0);
19741 arg1 = CALL_EXPR_ARG (exp, 1);
19742 op0 = force_reg (SImode, expand_normal (arg0));
19743 op1 = expand_normal (arg1);
19744 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19745 return 0;
19747 case ARM_BUILTIN_GETWCX:
19748 arg0 = CALL_EXPR_ARG (exp, 0);
19749 op0 = expand_normal (arg0);
19750 target = gen_reg_rtx (SImode);
19751 emit_insn (gen_iwmmxt_tmrc (target, op0));
19752 return target;
19754 case ARM_BUILTIN_WSHUFH:
19755 icode = CODE_FOR_iwmmxt_wshufh;
19756 arg0 = CALL_EXPR_ARG (exp, 0);
19757 arg1 = CALL_EXPR_ARG (exp, 1);
19758 op0 = expand_normal (arg0);
19759 op1 = expand_normal (arg1);
19760 tmode = insn_data[icode].operand[0].mode;
19761 mode1 = insn_data[icode].operand[1].mode;
19762 mode2 = insn_data[icode].operand[2].mode;
19764 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19765 op0 = copy_to_mode_reg (mode1, op0);
19766 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19768 /* @@@ better error message */
19769 error ("mask must be an immediate");
19770 return const0_rtx;
19772 if (target == 0
19773 || GET_MODE (target) != tmode
19774 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19775 target = gen_reg_rtx (tmode);
19776 pat = GEN_FCN (icode) (target, op0, op1);
19777 if (! pat)
19778 return 0;
19779 emit_insn (pat);
19780 return target;
19782 case ARM_BUILTIN_WSADB:
19783 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19784 case ARM_BUILTIN_WSADH:
19785 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19786 case ARM_BUILTIN_WSADBZ:
19787 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19788 case ARM_BUILTIN_WSADHZ:
19789 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19791 /* Several three-argument builtins. */
19792 case ARM_BUILTIN_WMACS:
19793 case ARM_BUILTIN_WMACU:
19794 case ARM_BUILTIN_WALIGN:
19795 case ARM_BUILTIN_TMIA:
19796 case ARM_BUILTIN_TMIAPH:
19797 case ARM_BUILTIN_TMIATT:
19798 case ARM_BUILTIN_TMIATB:
19799 case ARM_BUILTIN_TMIABT:
19800 case ARM_BUILTIN_TMIABB:
19801 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19802 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19803 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19804 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19805 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19806 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19807 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19808 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19809 : CODE_FOR_iwmmxt_walign);
19810 arg0 = CALL_EXPR_ARG (exp, 0);
19811 arg1 = CALL_EXPR_ARG (exp, 1);
19812 arg2 = CALL_EXPR_ARG (exp, 2);
19813 op0 = expand_normal (arg0);
19814 op1 = expand_normal (arg1);
19815 op2 = expand_normal (arg2);
19816 tmode = insn_data[icode].operand[0].mode;
19817 mode0 = insn_data[icode].operand[1].mode;
19818 mode1 = insn_data[icode].operand[2].mode;
19819 mode2 = insn_data[icode].operand[3].mode;
19821 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19822 op0 = copy_to_mode_reg (mode0, op0);
19823 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19824 op1 = copy_to_mode_reg (mode1, op1);
19825 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19826 op2 = copy_to_mode_reg (mode2, op2);
19827 if (target == 0
19828 || GET_MODE (target) != tmode
19829 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19830 target = gen_reg_rtx (tmode);
19831 pat = GEN_FCN (icode) (target, op0, op1, op2);
19832 if (! pat)
19833 return 0;
19834 emit_insn (pat);
19835 return target;
19837 case ARM_BUILTIN_WZERO:
19838 target = gen_reg_rtx (DImode);
19839 emit_insn (gen_iwmmxt_clrdi (target));
19840 return target;
19842 case ARM_BUILTIN_THREAD_POINTER:
19843 return arm_load_tp (target);
19845 default:
19846 break;
19849 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19850 if (d->code == (const enum arm_builtins) fcode)
19851 return arm_expand_binop_builtin (d->icode, exp, target);
19853 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19854 if (d->code == (const enum arm_builtins) fcode)
19855 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19857 /* @@@ Should really do something sensible here. */
19858 return NULL_RTX;
19861 /* Return the number (counting from 0) of
19862 the least significant set bit in MASK. */
19864 inline static int
19865 number_of_first_bit_set (unsigned mask)
19867 int bit;
19869 for (bit = 0;
19870 (mask & (1 << bit)) == 0;
19871 ++bit)
19872 continue;
19874 return bit;
19877 /* Emit code to push or pop registers to or from the stack. F is the
19878 assembly file. MASK is the registers to push or pop. PUSH is
19879 nonzero if we should push, and zero if we should pop. For debugging
19880 output, if pushing, adjust CFA_OFFSET by the amount of space added
19881 to the stack. REAL_REGS should have the same number of bits set as
19882 MASK, and will be used instead (in the same order) to describe which
19883 registers were saved - this is used to mark the save slots when we
19884 push high registers after moving them to low registers. */
19885 static void
19886 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19887 unsigned long real_regs)
19889 int regno;
19890 int lo_mask = mask & 0xFF;
19891 int pushed_words = 0;
19893 gcc_assert (mask);
19895 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19897 /* Special case. Do not generate a POP PC statement here, do it in
19898 thumb_exit() */
19899 thumb_exit (f, -1);
19900 return;
19903 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19905 fprintf (f, "\t.save\t{");
19906 for (regno = 0; regno < 15; regno++)
19908 if (real_regs & (1 << regno))
19910 if (real_regs & ((1 << regno) -1))
19911 fprintf (f, ", ");
19912 asm_fprintf (f, "%r", regno);
19915 fprintf (f, "}\n");
19918 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19920 /* Look at the low registers first. */
19921 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19923 if (lo_mask & 1)
19925 asm_fprintf (f, "%r", regno);
19927 if ((lo_mask & ~1) != 0)
19928 fprintf (f, ", ");
19930 pushed_words++;
19934 if (push && (mask & (1 << LR_REGNUM)))
19936 /* Catch pushing the LR. */
19937 if (mask & 0xFF)
19938 fprintf (f, ", ");
19940 asm_fprintf (f, "%r", LR_REGNUM);
19942 pushed_words++;
19944 else if (!push && (mask & (1 << PC_REGNUM)))
19946 /* Catch popping the PC. */
19947 if (TARGET_INTERWORK || TARGET_BACKTRACE
19948 || crtl->calls_eh_return)
19950 /* The PC is never poped directly, instead
19951 it is popped into r3 and then BX is used. */
19952 fprintf (f, "}\n");
19954 thumb_exit (f, -1);
19956 return;
19958 else
19960 if (mask & 0xFF)
19961 fprintf (f, ", ");
19963 asm_fprintf (f, "%r", PC_REGNUM);
19967 fprintf (f, "}\n");
19969 if (push && pushed_words && dwarf2out_do_frame ())
19971 char *l = dwarf2out_cfi_label (false);
19972 int pushed_mask = real_regs;
19974 *cfa_offset += pushed_words * 4;
19975 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19977 pushed_words = 0;
19978 pushed_mask = real_regs;
19979 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19981 if (pushed_mask & 1)
19982 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19987 /* Generate code to return from a thumb function.
19988 If 'reg_containing_return_addr' is -1, then the return address is
19989 actually on the stack, at the stack pointer. */
19990 static void
19991 thumb_exit (FILE *f, int reg_containing_return_addr)
19993 unsigned regs_available_for_popping;
19994 unsigned regs_to_pop;
19995 int pops_needed;
19996 unsigned available;
19997 unsigned required;
19998 int mode;
19999 int size;
20000 int restore_a4 = FALSE;
20002 /* Compute the registers we need to pop. */
20003 regs_to_pop = 0;
20004 pops_needed = 0;
20006 if (reg_containing_return_addr == -1)
20008 regs_to_pop |= 1 << LR_REGNUM;
20009 ++pops_needed;
20012 if (TARGET_BACKTRACE)
20014 /* Restore the (ARM) frame pointer and stack pointer. */
20015 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20016 pops_needed += 2;
20019 /* If there is nothing to pop then just emit the BX instruction and
20020 return. */
20021 if (pops_needed == 0)
20023 if (crtl->calls_eh_return)
20024 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20026 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20027 return;
20029 /* Otherwise if we are not supporting interworking and we have not created
20030 a backtrace structure and the function was not entered in ARM mode then
20031 just pop the return address straight into the PC. */
20032 else if (!TARGET_INTERWORK
20033 && !TARGET_BACKTRACE
20034 && !is_called_in_ARM_mode (current_function_decl)
20035 && !crtl->calls_eh_return)
20037 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20038 return;
20041 /* Find out how many of the (return) argument registers we can corrupt. */
20042 regs_available_for_popping = 0;
20044 /* If returning via __builtin_eh_return, the bottom three registers
20045 all contain information needed for the return. */
20046 if (crtl->calls_eh_return)
20047 size = 12;
20048 else
20050 /* If we can deduce the registers used from the function's
20051 return value. This is more reliable that examining
20052 df_regs_ever_live_p () because that will be set if the register is
20053 ever used in the function, not just if the register is used
20054 to hold a return value. */
20056 if (crtl->return_rtx != 0)
20057 mode = GET_MODE (crtl->return_rtx);
20058 else
20059 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20061 size = GET_MODE_SIZE (mode);
20063 if (size == 0)
20065 /* In a void function we can use any argument register.
20066 In a function that returns a structure on the stack
20067 we can use the second and third argument registers. */
20068 if (mode == VOIDmode)
20069 regs_available_for_popping =
20070 (1 << ARG_REGISTER (1))
20071 | (1 << ARG_REGISTER (2))
20072 | (1 << ARG_REGISTER (3));
20073 else
20074 regs_available_for_popping =
20075 (1 << ARG_REGISTER (2))
20076 | (1 << ARG_REGISTER (3));
20078 else if (size <= 4)
20079 regs_available_for_popping =
20080 (1 << ARG_REGISTER (2))
20081 | (1 << ARG_REGISTER (3));
20082 else if (size <= 8)
20083 regs_available_for_popping =
20084 (1 << ARG_REGISTER (3));
20087 /* Match registers to be popped with registers into which we pop them. */
20088 for (available = regs_available_for_popping,
20089 required = regs_to_pop;
20090 required != 0 && available != 0;
20091 available &= ~(available & - available),
20092 required &= ~(required & - required))
20093 -- pops_needed;
20095 /* If we have any popping registers left over, remove them. */
20096 if (available > 0)
20097 regs_available_for_popping &= ~available;
20099 /* Otherwise if we need another popping register we can use
20100 the fourth argument register. */
20101 else if (pops_needed)
20103 /* If we have not found any free argument registers and
20104 reg a4 contains the return address, we must move it. */
20105 if (regs_available_for_popping == 0
20106 && reg_containing_return_addr == LAST_ARG_REGNUM)
20108 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20109 reg_containing_return_addr = LR_REGNUM;
20111 else if (size > 12)
20113 /* Register a4 is being used to hold part of the return value,
20114 but we have dire need of a free, low register. */
20115 restore_a4 = TRUE;
20117 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20120 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20122 /* The fourth argument register is available. */
20123 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20125 --pops_needed;
20129 /* Pop as many registers as we can. */
20130 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20131 regs_available_for_popping);
20133 /* Process the registers we popped. */
20134 if (reg_containing_return_addr == -1)
20136 /* The return address was popped into the lowest numbered register. */
20137 regs_to_pop &= ~(1 << LR_REGNUM);
20139 reg_containing_return_addr =
20140 number_of_first_bit_set (regs_available_for_popping);
20142 /* Remove this register for the mask of available registers, so that
20143 the return address will not be corrupted by further pops. */
20144 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20147 /* If we popped other registers then handle them here. */
20148 if (regs_available_for_popping)
20150 int frame_pointer;
20152 /* Work out which register currently contains the frame pointer. */
20153 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20155 /* Move it into the correct place. */
20156 asm_fprintf (f, "\tmov\t%r, %r\n",
20157 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20159 /* (Temporarily) remove it from the mask of popped registers. */
20160 regs_available_for_popping &= ~(1 << frame_pointer);
20161 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20163 if (regs_available_for_popping)
20165 int stack_pointer;
20167 /* We popped the stack pointer as well,
20168 find the register that contains it. */
20169 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20171 /* Move it into the stack register. */
20172 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20174 /* At this point we have popped all necessary registers, so
20175 do not worry about restoring regs_available_for_popping
20176 to its correct value:
20178 assert (pops_needed == 0)
20179 assert (regs_available_for_popping == (1 << frame_pointer))
20180 assert (regs_to_pop == (1 << STACK_POINTER)) */
20182 else
20184 /* Since we have just move the popped value into the frame
20185 pointer, the popping register is available for reuse, and
20186 we know that we still have the stack pointer left to pop. */
20187 regs_available_for_popping |= (1 << frame_pointer);
20191 /* If we still have registers left on the stack, but we no longer have
20192 any registers into which we can pop them, then we must move the return
20193 address into the link register and make available the register that
20194 contained it. */
20195 if (regs_available_for_popping == 0 && pops_needed > 0)
20197 regs_available_for_popping |= 1 << reg_containing_return_addr;
20199 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20200 reg_containing_return_addr);
20202 reg_containing_return_addr = LR_REGNUM;
20205 /* If we have registers left on the stack then pop some more.
20206 We know that at most we will want to pop FP and SP. */
20207 if (pops_needed > 0)
20209 int popped_into;
20210 int move_to;
20212 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20213 regs_available_for_popping);
20215 /* We have popped either FP or SP.
20216 Move whichever one it is into the correct register. */
20217 popped_into = number_of_first_bit_set (regs_available_for_popping);
20218 move_to = number_of_first_bit_set (regs_to_pop);
20220 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20222 regs_to_pop &= ~(1 << move_to);
20224 --pops_needed;
20227 /* If we still have not popped everything then we must have only
20228 had one register available to us and we are now popping the SP. */
20229 if (pops_needed > 0)
20231 int popped_into;
20233 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20234 regs_available_for_popping);
20236 popped_into = number_of_first_bit_set (regs_available_for_popping);
20238 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20240 assert (regs_to_pop == (1 << STACK_POINTER))
20241 assert (pops_needed == 1)
20245 /* If necessary restore the a4 register. */
20246 if (restore_a4)
20248 if (reg_containing_return_addr != LR_REGNUM)
20250 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20251 reg_containing_return_addr = LR_REGNUM;
20254 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20257 if (crtl->calls_eh_return)
20258 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20260 /* Return to caller. */
20261 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20264 /* Scan INSN just before assembler is output for it.
20265 For Thumb-1, we track the status of the condition codes; this
20266 information is used in the cbranchsi4_insn pattern. */
20267 void
20268 thumb1_final_prescan_insn (rtx insn)
20270 if (flag_print_asm_name)
20271 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20272 INSN_ADDRESSES (INSN_UID (insn)));
20273 /* Don't overwrite the previous setter when we get to a cbranch. */
20274 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20276 enum attr_conds conds;
20278 if (cfun->machine->thumb1_cc_insn)
20280 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20281 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20282 CC_STATUS_INIT;
20284 conds = get_attr_conds (insn);
20285 if (conds == CONDS_SET)
20287 rtx set = single_set (insn);
20288 cfun->machine->thumb1_cc_insn = insn;
20289 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20290 cfun->machine->thumb1_cc_op1 = const0_rtx;
20291 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20292 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20294 rtx src1 = XEXP (SET_SRC (set), 1);
20295 if (src1 == const0_rtx)
20296 cfun->machine->thumb1_cc_mode = CCmode;
20299 else if (conds != CONDS_NOCOND)
20300 cfun->machine->thumb1_cc_insn = NULL_RTX;
20305 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20307 unsigned HOST_WIDE_INT mask = 0xff;
20308 int i;
20310 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20311 if (val == 0) /* XXX */
20312 return 0;
20314 for (i = 0; i < 25; i++)
20315 if ((val & (mask << i)) == val)
20316 return 1;
20318 return 0;
20321 /* Returns nonzero if the current function contains,
20322 or might contain a far jump. */
20323 static int
20324 thumb_far_jump_used_p (void)
20326 rtx insn;
20328 /* This test is only important for leaf functions. */
20329 /* assert (!leaf_function_p ()); */
20331 /* If we have already decided that far jumps may be used,
20332 do not bother checking again, and always return true even if
20333 it turns out that they are not being used. Once we have made
20334 the decision that far jumps are present (and that hence the link
20335 register will be pushed onto the stack) we cannot go back on it. */
20336 if (cfun->machine->far_jump_used)
20337 return 1;
20339 /* If this function is not being called from the prologue/epilogue
20340 generation code then it must be being called from the
20341 INITIAL_ELIMINATION_OFFSET macro. */
20342 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20344 /* In this case we know that we are being asked about the elimination
20345 of the arg pointer register. If that register is not being used,
20346 then there are no arguments on the stack, and we do not have to
20347 worry that a far jump might force the prologue to push the link
20348 register, changing the stack offsets. In this case we can just
20349 return false, since the presence of far jumps in the function will
20350 not affect stack offsets.
20352 If the arg pointer is live (or if it was live, but has now been
20353 eliminated and so set to dead) then we do have to test to see if
20354 the function might contain a far jump. This test can lead to some
20355 false negatives, since before reload is completed, then length of
20356 branch instructions is not known, so gcc defaults to returning their
20357 longest length, which in turn sets the far jump attribute to true.
20359 A false negative will not result in bad code being generated, but it
20360 will result in a needless push and pop of the link register. We
20361 hope that this does not occur too often.
20363 If we need doubleword stack alignment this could affect the other
20364 elimination offsets so we can't risk getting it wrong. */
20365 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20366 cfun->machine->arg_pointer_live = 1;
20367 else if (!cfun->machine->arg_pointer_live)
20368 return 0;
20371 /* Check to see if the function contains a branch
20372 insn with the far jump attribute set. */
20373 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20375 if (GET_CODE (insn) == JUMP_INSN
20376 /* Ignore tablejump patterns. */
20377 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20378 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20379 && get_attr_far_jump (insn) == FAR_JUMP_YES
20382 /* Record the fact that we have decided that
20383 the function does use far jumps. */
20384 cfun->machine->far_jump_used = 1;
20385 return 1;
20389 return 0;
20392 /* Return nonzero if FUNC must be entered in ARM mode. */
20394 is_called_in_ARM_mode (tree func)
20396 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20398 /* Ignore the problem about functions whose address is taken. */
20399 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20400 return TRUE;
20402 #ifdef ARM_PE
20403 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20404 #else
20405 return FALSE;
20406 #endif
20409 /* Given the stack offsets and register mask in OFFSETS, decide how
20410 many additional registers to push instead of subtracting a constant
20411 from SP. For epilogues the principle is the same except we use pop.
20412 FOR_PROLOGUE indicates which we're generating. */
20413 static int
20414 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20416 HOST_WIDE_INT amount;
20417 unsigned long live_regs_mask = offsets->saved_regs_mask;
20418 /* Extract a mask of the ones we can give to the Thumb's push/pop
20419 instruction. */
20420 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20421 /* Then count how many other high registers will need to be pushed. */
20422 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20423 int n_free, reg_base;
20425 if (!for_prologue && frame_pointer_needed)
20426 amount = offsets->locals_base - offsets->saved_regs;
20427 else
20428 amount = offsets->outgoing_args - offsets->saved_regs;
20430 /* If the stack frame size is 512 exactly, we can save one load
20431 instruction, which should make this a win even when optimizing
20432 for speed. */
20433 if (!optimize_size && amount != 512)
20434 return 0;
20436 /* Can't do this if there are high registers to push. */
20437 if (high_regs_pushed != 0)
20438 return 0;
20440 /* Shouldn't do it in the prologue if no registers would normally
20441 be pushed at all. In the epilogue, also allow it if we'll have
20442 a pop insn for the PC. */
20443 if (l_mask == 0
20444 && (for_prologue
20445 || TARGET_BACKTRACE
20446 || (live_regs_mask & 1 << LR_REGNUM) == 0
20447 || TARGET_INTERWORK
20448 || crtl->args.pretend_args_size != 0))
20449 return 0;
20451 /* Don't do this if thumb_expand_prologue wants to emit instructions
20452 between the push and the stack frame allocation. */
20453 if (for_prologue
20454 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20455 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20456 return 0;
20458 reg_base = 0;
20459 n_free = 0;
20460 if (!for_prologue)
20462 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20463 live_regs_mask >>= reg_base;
20466 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20467 && (for_prologue || call_used_regs[reg_base + n_free]))
20469 live_regs_mask >>= 1;
20470 n_free++;
20473 if (n_free == 0)
20474 return 0;
20475 gcc_assert (amount / 4 * 4 == amount);
20477 if (amount >= 512 && (amount - n_free * 4) < 512)
20478 return (amount - 508) / 4;
20479 if (amount <= n_free * 4)
20480 return amount / 4;
20481 return 0;
20484 /* The bits which aren't usefully expanded as rtl. */
20485 const char *
20486 thumb_unexpanded_epilogue (void)
20488 arm_stack_offsets *offsets;
20489 int regno;
20490 unsigned long live_regs_mask = 0;
20491 int high_regs_pushed = 0;
20492 int extra_pop;
20493 int had_to_push_lr;
20494 int size;
20496 if (cfun->machine->return_used_this_function != 0)
20497 return "";
20499 if (IS_NAKED (arm_current_func_type ()))
20500 return "";
20502 offsets = arm_get_frame_offsets ();
20503 live_regs_mask = offsets->saved_regs_mask;
20504 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20506 /* If we can deduce the registers used from the function's return value.
20507 This is more reliable that examining df_regs_ever_live_p () because that
20508 will be set if the register is ever used in the function, not just if
20509 the register is used to hold a return value. */
20510 size = arm_size_return_regs ();
20512 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20513 if (extra_pop > 0)
20515 unsigned long extra_mask = (1 << extra_pop) - 1;
20516 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20519 /* The prolog may have pushed some high registers to use as
20520 work registers. e.g. the testsuite file:
20521 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20522 compiles to produce:
20523 push {r4, r5, r6, r7, lr}
20524 mov r7, r9
20525 mov r6, r8
20526 push {r6, r7}
20527 as part of the prolog. We have to undo that pushing here. */
20529 if (high_regs_pushed)
20531 unsigned long mask = live_regs_mask & 0xff;
20532 int next_hi_reg;
20534 /* The available low registers depend on the size of the value we are
20535 returning. */
20536 if (size <= 12)
20537 mask |= 1 << 3;
20538 if (size <= 8)
20539 mask |= 1 << 2;
20541 if (mask == 0)
20542 /* Oh dear! We have no low registers into which we can pop
20543 high registers! */
20544 internal_error
20545 ("no low registers available for popping high registers");
20547 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20548 if (live_regs_mask & (1 << next_hi_reg))
20549 break;
20551 while (high_regs_pushed)
20553 /* Find lo register(s) into which the high register(s) can
20554 be popped. */
20555 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20557 if (mask & (1 << regno))
20558 high_regs_pushed--;
20559 if (high_regs_pushed == 0)
20560 break;
20563 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20565 /* Pop the values into the low register(s). */
20566 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20568 /* Move the value(s) into the high registers. */
20569 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20571 if (mask & (1 << regno))
20573 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20574 regno);
20576 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20577 if (live_regs_mask & (1 << next_hi_reg))
20578 break;
20582 live_regs_mask &= ~0x0f00;
20585 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20586 live_regs_mask &= 0xff;
20588 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20590 /* Pop the return address into the PC. */
20591 if (had_to_push_lr)
20592 live_regs_mask |= 1 << PC_REGNUM;
20594 /* Either no argument registers were pushed or a backtrace
20595 structure was created which includes an adjusted stack
20596 pointer, so just pop everything. */
20597 if (live_regs_mask)
20598 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20599 live_regs_mask);
20601 /* We have either just popped the return address into the
20602 PC or it is was kept in LR for the entire function.
20603 Note that thumb_pushpop has already called thumb_exit if the
20604 PC was in the list. */
20605 if (!had_to_push_lr)
20606 thumb_exit (asm_out_file, LR_REGNUM);
20608 else
20610 /* Pop everything but the return address. */
20611 if (live_regs_mask)
20612 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20613 live_regs_mask);
20615 if (had_to_push_lr)
20617 if (size > 12)
20619 /* We have no free low regs, so save one. */
20620 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20621 LAST_ARG_REGNUM);
20624 /* Get the return address into a temporary register. */
20625 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20626 1 << LAST_ARG_REGNUM);
20628 if (size > 12)
20630 /* Move the return address to lr. */
20631 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20632 LAST_ARG_REGNUM);
20633 /* Restore the low register. */
20634 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20635 IP_REGNUM);
20636 regno = LR_REGNUM;
20638 else
20639 regno = LAST_ARG_REGNUM;
20641 else
20642 regno = LR_REGNUM;
20644 /* Remove the argument registers that were pushed onto the stack. */
20645 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20646 SP_REGNUM, SP_REGNUM,
20647 crtl->args.pretend_args_size);
20649 thumb_exit (asm_out_file, regno);
20652 return "";
20655 /* Functions to save and restore machine-specific function data. */
20656 static struct machine_function *
20657 arm_init_machine_status (void)
20659 struct machine_function *machine;
20660 machine = ggc_alloc_cleared_machine_function ();
20662 #if ARM_FT_UNKNOWN != 0
20663 machine->func_type = ARM_FT_UNKNOWN;
20664 #endif
20665 return machine;
20668 /* Return an RTX indicating where the return address to the
20669 calling function can be found. */
20671 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20673 if (count != 0)
20674 return NULL_RTX;
20676 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20679 /* Do anything needed before RTL is emitted for each function. */
20680 void
20681 arm_init_expanders (void)
20683 /* Arrange to initialize and mark the machine per-function status. */
20684 init_machine_status = arm_init_machine_status;
20686 /* This is to stop the combine pass optimizing away the alignment
20687 adjustment of va_arg. */
20688 /* ??? It is claimed that this should not be necessary. */
20689 if (cfun)
20690 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20694 /* Like arm_compute_initial_elimination offset. Simpler because there
20695 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20696 to point at the base of the local variables after static stack
20697 space for a function has been allocated. */
20699 HOST_WIDE_INT
20700 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20702 arm_stack_offsets *offsets;
20704 offsets = arm_get_frame_offsets ();
20706 switch (from)
20708 case ARG_POINTER_REGNUM:
20709 switch (to)
20711 case STACK_POINTER_REGNUM:
20712 return offsets->outgoing_args - offsets->saved_args;
20714 case FRAME_POINTER_REGNUM:
20715 return offsets->soft_frame - offsets->saved_args;
20717 case ARM_HARD_FRAME_POINTER_REGNUM:
20718 return offsets->saved_regs - offsets->saved_args;
20720 case THUMB_HARD_FRAME_POINTER_REGNUM:
20721 return offsets->locals_base - offsets->saved_args;
20723 default:
20724 gcc_unreachable ();
20726 break;
20728 case FRAME_POINTER_REGNUM:
20729 switch (to)
20731 case STACK_POINTER_REGNUM:
20732 return offsets->outgoing_args - offsets->soft_frame;
20734 case ARM_HARD_FRAME_POINTER_REGNUM:
20735 return offsets->saved_regs - offsets->soft_frame;
20737 case THUMB_HARD_FRAME_POINTER_REGNUM:
20738 return offsets->locals_base - offsets->soft_frame;
20740 default:
20741 gcc_unreachable ();
20743 break;
20745 default:
20746 gcc_unreachable ();
20750 /* Generate the rest of a function's prologue. */
20751 void
20752 thumb1_expand_prologue (void)
20754 rtx insn, dwarf;
20756 HOST_WIDE_INT amount;
20757 arm_stack_offsets *offsets;
20758 unsigned long func_type;
20759 int regno;
20760 unsigned long live_regs_mask;
20762 func_type = arm_current_func_type ();
20764 /* Naked functions don't have prologues. */
20765 if (IS_NAKED (func_type))
20766 return;
20768 if (IS_INTERRUPT (func_type))
20770 error ("interrupt Service Routines cannot be coded in Thumb mode");
20771 return;
20774 offsets = arm_get_frame_offsets ();
20775 live_regs_mask = offsets->saved_regs_mask;
20776 /* Load the pic register before setting the frame pointer,
20777 so we can use r7 as a temporary work register. */
20778 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20779 arm_load_pic_register (live_regs_mask);
20781 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20782 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20783 stack_pointer_rtx);
20785 if (flag_stack_usage)
20786 current_function_static_stack_size
20787 = offsets->outgoing_args - offsets->saved_args;
20789 amount = offsets->outgoing_args - offsets->saved_regs;
20790 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20791 if (amount)
20793 if (amount < 512)
20795 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20796 GEN_INT (- amount)));
20797 RTX_FRAME_RELATED_P (insn) = 1;
20799 else
20801 rtx reg;
20803 /* The stack decrement is too big for an immediate value in a single
20804 insn. In theory we could issue multiple subtracts, but after
20805 three of them it becomes more space efficient to place the full
20806 value in the constant pool and load into a register. (Also the
20807 ARM debugger really likes to see only one stack decrement per
20808 function). So instead we look for a scratch register into which
20809 we can load the decrement, and then we subtract this from the
20810 stack pointer. Unfortunately on the thumb the only available
20811 scratch registers are the argument registers, and we cannot use
20812 these as they may hold arguments to the function. Instead we
20813 attempt to locate a call preserved register which is used by this
20814 function. If we can find one, then we know that it will have
20815 been pushed at the start of the prologue and so we can corrupt
20816 it now. */
20817 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20818 if (live_regs_mask & (1 << regno))
20819 break;
20821 gcc_assert(regno <= LAST_LO_REGNUM);
20823 reg = gen_rtx_REG (SImode, regno);
20825 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20827 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20828 stack_pointer_rtx, reg));
20829 RTX_FRAME_RELATED_P (insn) = 1;
20830 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20831 plus_constant (stack_pointer_rtx,
20832 -amount));
20833 RTX_FRAME_RELATED_P (dwarf) = 1;
20834 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20838 if (frame_pointer_needed)
20839 thumb_set_frame_pointer (offsets);
20841 /* If we are profiling, make sure no instructions are scheduled before
20842 the call to mcount. Similarly if the user has requested no
20843 scheduling in the prolog. Similarly if we want non-call exceptions
20844 using the EABI unwinder, to prevent faulting instructions from being
20845 swapped with a stack adjustment. */
20846 if (crtl->profile || !TARGET_SCHED_PROLOG
20847 || (arm_except_unwind_info (&global_options) == UI_TARGET
20848 && cfun->can_throw_non_call_exceptions))
20849 emit_insn (gen_blockage ());
20851 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20852 if (live_regs_mask & 0xff)
20853 cfun->machine->lr_save_eliminated = 0;
20857 void
20858 thumb1_expand_epilogue (void)
20860 HOST_WIDE_INT amount;
20861 arm_stack_offsets *offsets;
20862 int regno;
20864 /* Naked functions don't have prologues. */
20865 if (IS_NAKED (arm_current_func_type ()))
20866 return;
20868 offsets = arm_get_frame_offsets ();
20869 amount = offsets->outgoing_args - offsets->saved_regs;
20871 if (frame_pointer_needed)
20873 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20874 amount = offsets->locals_base - offsets->saved_regs;
20876 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20878 gcc_assert (amount >= 0);
20879 if (amount)
20881 if (amount < 512)
20882 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20883 GEN_INT (amount)));
20884 else
20886 /* r3 is always free in the epilogue. */
20887 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20889 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20890 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20894 /* Emit a USE (stack_pointer_rtx), so that
20895 the stack adjustment will not be deleted. */
20896 emit_insn (gen_prologue_use (stack_pointer_rtx));
20898 if (crtl->profile || !TARGET_SCHED_PROLOG)
20899 emit_insn (gen_blockage ());
20901 /* Emit a clobber for each insn that will be restored in the epilogue,
20902 so that flow2 will get register lifetimes correct. */
20903 for (regno = 0; regno < 13; regno++)
20904 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20905 emit_clobber (gen_rtx_REG (SImode, regno));
20907 if (! df_regs_ever_live_p (LR_REGNUM))
20908 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20911 static void
20912 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20914 arm_stack_offsets *offsets;
20915 unsigned long live_regs_mask = 0;
20916 unsigned long l_mask;
20917 unsigned high_regs_pushed = 0;
20918 int cfa_offset = 0;
20919 int regno;
20921 if (IS_NAKED (arm_current_func_type ()))
20922 return;
20924 if (is_called_in_ARM_mode (current_function_decl))
20926 const char * name;
20928 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20929 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20930 == SYMBOL_REF);
20931 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20933 /* Generate code sequence to switch us into Thumb mode. */
20934 /* The .code 32 directive has already been emitted by
20935 ASM_DECLARE_FUNCTION_NAME. */
20936 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20937 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20939 /* Generate a label, so that the debugger will notice the
20940 change in instruction sets. This label is also used by
20941 the assembler to bypass the ARM code when this function
20942 is called from a Thumb encoded function elsewhere in the
20943 same file. Hence the definition of STUB_NAME here must
20944 agree with the definition in gas/config/tc-arm.c. */
20946 #define STUB_NAME ".real_start_of"
20948 fprintf (f, "\t.code\t16\n");
20949 #ifdef ARM_PE
20950 if (arm_dllexport_name_p (name))
20951 name = arm_strip_name_encoding (name);
20952 #endif
20953 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20954 fprintf (f, "\t.thumb_func\n");
20955 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20958 if (crtl->args.pretend_args_size)
20960 /* Output unwind directive for the stack adjustment. */
20961 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20962 fprintf (f, "\t.pad #%d\n",
20963 crtl->args.pretend_args_size);
20965 if (cfun->machine->uses_anonymous_args)
20967 int num_pushes;
20969 fprintf (f, "\tpush\t{");
20971 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20973 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20974 regno <= LAST_ARG_REGNUM;
20975 regno++)
20976 asm_fprintf (f, "%r%s", regno,
20977 regno == LAST_ARG_REGNUM ? "" : ", ");
20979 fprintf (f, "}\n");
20981 else
20982 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20983 SP_REGNUM, SP_REGNUM,
20984 crtl->args.pretend_args_size);
20986 /* We don't need to record the stores for unwinding (would it
20987 help the debugger any if we did?), but record the change in
20988 the stack pointer. */
20989 if (dwarf2out_do_frame ())
20991 char *l = dwarf2out_cfi_label (false);
20993 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20994 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20998 /* Get the registers we are going to push. */
20999 offsets = arm_get_frame_offsets ();
21000 live_regs_mask = offsets->saved_regs_mask;
21001 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21002 l_mask = live_regs_mask & 0x40ff;
21003 /* Then count how many other high registers will need to be pushed. */
21004 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21006 if (TARGET_BACKTRACE)
21008 unsigned offset;
21009 unsigned work_register;
21011 /* We have been asked to create a stack backtrace structure.
21012 The code looks like this:
21014 0 .align 2
21015 0 func:
21016 0 sub SP, #16 Reserve space for 4 registers.
21017 2 push {R7} Push low registers.
21018 4 add R7, SP, #20 Get the stack pointer before the push.
21019 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21020 8 mov R7, PC Get hold of the start of this code plus 12.
21021 10 str R7, [SP, #16] Store it.
21022 12 mov R7, FP Get hold of the current frame pointer.
21023 14 str R7, [SP, #4] Store it.
21024 16 mov R7, LR Get hold of the current return address.
21025 18 str R7, [SP, #12] Store it.
21026 20 add R7, SP, #16 Point at the start of the backtrace structure.
21027 22 mov FP, R7 Put this value into the frame pointer. */
21029 work_register = thumb_find_work_register (live_regs_mask);
21031 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21032 asm_fprintf (f, "\t.pad #16\n");
21034 asm_fprintf
21035 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21036 SP_REGNUM, SP_REGNUM);
21038 if (dwarf2out_do_frame ())
21040 char *l = dwarf2out_cfi_label (false);
21042 cfa_offset = cfa_offset + 16;
21043 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21046 if (l_mask)
21048 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
21049 offset = bit_count (l_mask) * UNITS_PER_WORD;
21051 else
21052 offset = 0;
21054 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21055 offset + 16 + crtl->args.pretend_args_size);
21057 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21058 offset + 4);
21060 /* Make sure that the instruction fetching the PC is in the right place
21061 to calculate "start of backtrace creation code + 12". */
21062 if (l_mask)
21064 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21065 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21066 offset + 12);
21067 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21068 ARM_HARD_FRAME_POINTER_REGNUM);
21069 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21070 offset);
21072 else
21074 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21075 ARM_HARD_FRAME_POINTER_REGNUM);
21076 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21077 offset);
21078 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21079 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21080 offset + 12);
21083 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21084 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21085 offset + 8);
21086 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21087 offset + 12);
21088 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21089 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21091 /* Optimization: If we are not pushing any low registers but we are going
21092 to push some high registers then delay our first push. This will just
21093 be a push of LR and we can combine it with the push of the first high
21094 register. */
21095 else if ((l_mask & 0xff) != 0
21096 || (high_regs_pushed == 0 && l_mask))
21098 unsigned long mask = l_mask;
21099 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21100 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21103 if (high_regs_pushed)
21105 unsigned pushable_regs;
21106 unsigned next_hi_reg;
21108 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21109 if (live_regs_mask & (1 << next_hi_reg))
21110 break;
21112 pushable_regs = l_mask & 0xff;
21114 if (pushable_regs == 0)
21115 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21117 while (high_regs_pushed > 0)
21119 unsigned long real_regs_mask = 0;
21121 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21123 if (pushable_regs & (1 << regno))
21125 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21127 high_regs_pushed --;
21128 real_regs_mask |= (1 << next_hi_reg);
21130 if (high_regs_pushed)
21132 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21133 next_hi_reg --)
21134 if (live_regs_mask & (1 << next_hi_reg))
21135 break;
21137 else
21139 pushable_regs &= ~((1 << regno) - 1);
21140 break;
21145 /* If we had to find a work register and we have not yet
21146 saved the LR then add it to the list of regs to push. */
21147 if (l_mask == (1 << LR_REGNUM))
21149 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21150 1, &cfa_offset,
21151 real_regs_mask | (1 << LR_REGNUM));
21152 l_mask = 0;
21154 else
21155 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21160 /* Handle the case of a double word load into a low register from
21161 a computed memory address. The computed address may involve a
21162 register which is overwritten by the load. */
21163 const char *
21164 thumb_load_double_from_address (rtx *operands)
21166 rtx addr;
21167 rtx base;
21168 rtx offset;
21169 rtx arg1;
21170 rtx arg2;
21172 gcc_assert (GET_CODE (operands[0]) == REG);
21173 gcc_assert (GET_CODE (operands[1]) == MEM);
21175 /* Get the memory address. */
21176 addr = XEXP (operands[1], 0);
21178 /* Work out how the memory address is computed. */
21179 switch (GET_CODE (addr))
21181 case REG:
21182 operands[2] = adjust_address (operands[1], SImode, 4);
21184 if (REGNO (operands[0]) == REGNO (addr))
21186 output_asm_insn ("ldr\t%H0, %2", operands);
21187 output_asm_insn ("ldr\t%0, %1", operands);
21189 else
21191 output_asm_insn ("ldr\t%0, %1", operands);
21192 output_asm_insn ("ldr\t%H0, %2", operands);
21194 break;
21196 case CONST:
21197 /* Compute <address> + 4 for the high order load. */
21198 operands[2] = adjust_address (operands[1], SImode, 4);
21200 output_asm_insn ("ldr\t%0, %1", operands);
21201 output_asm_insn ("ldr\t%H0, %2", operands);
21202 break;
21204 case PLUS:
21205 arg1 = XEXP (addr, 0);
21206 arg2 = XEXP (addr, 1);
21208 if (CONSTANT_P (arg1))
21209 base = arg2, offset = arg1;
21210 else
21211 base = arg1, offset = arg2;
21213 gcc_assert (GET_CODE (base) == REG);
21215 /* Catch the case of <address> = <reg> + <reg> */
21216 if (GET_CODE (offset) == REG)
21218 int reg_offset = REGNO (offset);
21219 int reg_base = REGNO (base);
21220 int reg_dest = REGNO (operands[0]);
21222 /* Add the base and offset registers together into the
21223 higher destination register. */
21224 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21225 reg_dest + 1, reg_base, reg_offset);
21227 /* Load the lower destination register from the address in
21228 the higher destination register. */
21229 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21230 reg_dest, reg_dest + 1);
21232 /* Load the higher destination register from its own address
21233 plus 4. */
21234 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21235 reg_dest + 1, reg_dest + 1);
21237 else
21239 /* Compute <address> + 4 for the high order load. */
21240 operands[2] = adjust_address (operands[1], SImode, 4);
21242 /* If the computed address is held in the low order register
21243 then load the high order register first, otherwise always
21244 load the low order register first. */
21245 if (REGNO (operands[0]) == REGNO (base))
21247 output_asm_insn ("ldr\t%H0, %2", operands);
21248 output_asm_insn ("ldr\t%0, %1", operands);
21250 else
21252 output_asm_insn ("ldr\t%0, %1", operands);
21253 output_asm_insn ("ldr\t%H0, %2", operands);
21256 break;
21258 case LABEL_REF:
21259 /* With no registers to worry about we can just load the value
21260 directly. */
21261 operands[2] = adjust_address (operands[1], SImode, 4);
21263 output_asm_insn ("ldr\t%H0, %2", operands);
21264 output_asm_insn ("ldr\t%0, %1", operands);
21265 break;
21267 default:
21268 gcc_unreachable ();
21271 return "";
21274 const char *
21275 thumb_output_move_mem_multiple (int n, rtx *operands)
21277 rtx tmp;
21279 switch (n)
21281 case 2:
21282 if (REGNO (operands[4]) > REGNO (operands[5]))
21284 tmp = operands[4];
21285 operands[4] = operands[5];
21286 operands[5] = tmp;
21288 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21289 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21290 break;
21292 case 3:
21293 if (REGNO (operands[4]) > REGNO (operands[5]))
21295 tmp = operands[4];
21296 operands[4] = operands[5];
21297 operands[5] = tmp;
21299 if (REGNO (operands[5]) > REGNO (operands[6]))
21301 tmp = operands[5];
21302 operands[5] = operands[6];
21303 operands[6] = tmp;
21305 if (REGNO (operands[4]) > REGNO (operands[5]))
21307 tmp = operands[4];
21308 operands[4] = operands[5];
21309 operands[5] = tmp;
21312 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21313 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21314 break;
21316 default:
21317 gcc_unreachable ();
21320 return "";
21323 /* Output a call-via instruction for thumb state. */
21324 const char *
21325 thumb_call_via_reg (rtx reg)
21327 int regno = REGNO (reg);
21328 rtx *labelp;
21330 gcc_assert (regno < LR_REGNUM);
21332 /* If we are in the normal text section we can use a single instance
21333 per compilation unit. If we are doing function sections, then we need
21334 an entry per section, since we can't rely on reachability. */
21335 if (in_section == text_section)
21337 thumb_call_reg_needed = 1;
21339 if (thumb_call_via_label[regno] == NULL)
21340 thumb_call_via_label[regno] = gen_label_rtx ();
21341 labelp = thumb_call_via_label + regno;
21343 else
21345 if (cfun->machine->call_via[regno] == NULL)
21346 cfun->machine->call_via[regno] = gen_label_rtx ();
21347 labelp = cfun->machine->call_via + regno;
21350 output_asm_insn ("bl\t%a0", labelp);
21351 return "";
21354 /* Routines for generating rtl. */
21355 void
21356 thumb_expand_movmemqi (rtx *operands)
21358 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21359 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21360 HOST_WIDE_INT len = INTVAL (operands[2]);
21361 HOST_WIDE_INT offset = 0;
21363 while (len >= 12)
21365 emit_insn (gen_movmem12b (out, in, out, in));
21366 len -= 12;
21369 if (len >= 8)
21371 emit_insn (gen_movmem8b (out, in, out, in));
21372 len -= 8;
21375 if (len >= 4)
21377 rtx reg = gen_reg_rtx (SImode);
21378 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21379 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21380 len -= 4;
21381 offset += 4;
21384 if (len >= 2)
21386 rtx reg = gen_reg_rtx (HImode);
21387 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21388 plus_constant (in, offset))));
21389 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21390 reg));
21391 len -= 2;
21392 offset += 2;
21395 if (len)
21397 rtx reg = gen_reg_rtx (QImode);
21398 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21399 plus_constant (in, offset))));
21400 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21401 reg));
21405 void
21406 thumb_reload_out_hi (rtx *operands)
21408 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21411 /* Handle reading a half-word from memory during reload. */
21412 void
21413 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21415 gcc_unreachable ();
21418 /* Return the length of a function name prefix
21419 that starts with the character 'c'. */
21420 static int
21421 arm_get_strip_length (int c)
21423 switch (c)
21425 ARM_NAME_ENCODING_LENGTHS
21426 default: return 0;
21430 /* Return a pointer to a function's name with any
21431 and all prefix encodings stripped from it. */
21432 const char *
21433 arm_strip_name_encoding (const char *name)
21435 int skip;
21437 while ((skip = arm_get_strip_length (* name)))
21438 name += skip;
21440 return name;
21443 /* If there is a '*' anywhere in the name's prefix, then
21444 emit the stripped name verbatim, otherwise prepend an
21445 underscore if leading underscores are being used. */
21446 void
21447 arm_asm_output_labelref (FILE *stream, const char *name)
21449 int skip;
21450 int verbatim = 0;
21452 while ((skip = arm_get_strip_length (* name)))
21454 verbatim |= (*name == '*');
21455 name += skip;
21458 if (verbatim)
21459 fputs (name, stream);
21460 else
21461 asm_fprintf (stream, "%U%s", name);
21464 static void
21465 arm_file_start (void)
21467 int val;
21469 if (TARGET_UNIFIED_ASM)
21470 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21472 if (TARGET_BPABI)
21474 const char *fpu_name;
21475 if (arm_selected_arch)
21476 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21477 else
21478 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21480 if (TARGET_SOFT_FLOAT)
21482 if (TARGET_VFP)
21483 fpu_name = "softvfp";
21484 else
21485 fpu_name = "softfpa";
21487 else
21489 fpu_name = arm_fpu_desc->name;
21490 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21492 if (TARGET_HARD_FLOAT)
21493 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21494 if (TARGET_HARD_FLOAT_ABI)
21495 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21498 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21500 /* Some of these attributes only apply when the corresponding features
21501 are used. However we don't have any easy way of figuring this out.
21502 Conservatively record the setting that would have been used. */
21504 /* Tag_ABI_FP_rounding. */
21505 if (flag_rounding_math)
21506 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21507 if (!flag_unsafe_math_optimizations)
21509 /* Tag_ABI_FP_denomal. */
21510 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21511 /* Tag_ABI_FP_exceptions. */
21512 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21514 /* Tag_ABI_FP_user_exceptions. */
21515 if (flag_signaling_nans)
21516 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21517 /* Tag_ABI_FP_number_model. */
21518 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21519 flag_finite_math_only ? 1 : 3);
21521 /* Tag_ABI_align8_needed. */
21522 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21523 /* Tag_ABI_align8_preserved. */
21524 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21525 /* Tag_ABI_enum_size. */
21526 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21527 flag_short_enums ? 1 : 2);
21529 /* Tag_ABI_optimization_goals. */
21530 if (optimize_size)
21531 val = 4;
21532 else if (optimize >= 2)
21533 val = 2;
21534 else if (optimize)
21535 val = 1;
21536 else
21537 val = 6;
21538 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21540 /* Tag_ABI_FP_16bit_format. */
21541 if (arm_fp16_format)
21542 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21543 (int)arm_fp16_format);
21545 if (arm_lang_output_object_attributes_hook)
21546 arm_lang_output_object_attributes_hook();
21548 default_file_start();
21551 static void
21552 arm_file_end (void)
21554 int regno;
21556 if (NEED_INDICATE_EXEC_STACK)
21557 /* Add .note.GNU-stack. */
21558 file_end_indicate_exec_stack ();
21560 if (! thumb_call_reg_needed)
21561 return;
21563 switch_to_section (text_section);
21564 asm_fprintf (asm_out_file, "\t.code 16\n");
21565 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21567 for (regno = 0; regno < LR_REGNUM; regno++)
21569 rtx label = thumb_call_via_label[regno];
21571 if (label != 0)
21573 targetm.asm_out.internal_label (asm_out_file, "L",
21574 CODE_LABEL_NUMBER (label));
21575 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21580 #ifndef ARM_PE
21581 /* Symbols in the text segment can be accessed without indirecting via the
21582 constant pool; it may take an extra binary operation, but this is still
21583 faster than indirecting via memory. Don't do this when not optimizing,
21584 since we won't be calculating al of the offsets necessary to do this
21585 simplification. */
21587 static void
21588 arm_encode_section_info (tree decl, rtx rtl, int first)
21590 if (optimize > 0 && TREE_CONSTANT (decl))
21591 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21593 default_encode_section_info (decl, rtl, first);
21595 #endif /* !ARM_PE */
21597 static void
21598 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21600 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21601 && !strcmp (prefix, "L"))
21603 arm_ccfsm_state = 0;
21604 arm_target_insn = NULL;
21606 default_internal_label (stream, prefix, labelno);
21609 /* Output code to add DELTA to the first argument, and then jump
21610 to FUNCTION. Used for C++ multiple inheritance. */
21611 static void
21612 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21613 HOST_WIDE_INT delta,
21614 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21615 tree function)
21617 static int thunk_label = 0;
21618 char label[256];
21619 char labelpc[256];
21620 int mi_delta = delta;
21621 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21622 int shift = 0;
21623 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21624 ? 1 : 0);
21625 if (mi_delta < 0)
21626 mi_delta = - mi_delta;
21628 if (TARGET_THUMB1)
21630 int labelno = thunk_label++;
21631 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21632 /* Thunks are entered in arm mode when avaiable. */
21633 if (TARGET_THUMB1_ONLY)
21635 /* push r3 so we can use it as a temporary. */
21636 /* TODO: Omit this save if r3 is not used. */
21637 fputs ("\tpush {r3}\n", file);
21638 fputs ("\tldr\tr3, ", file);
21640 else
21642 fputs ("\tldr\tr12, ", file);
21644 assemble_name (file, label);
21645 fputc ('\n', file);
21646 if (flag_pic)
21648 /* If we are generating PIC, the ldr instruction below loads
21649 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21650 the address of the add + 8, so we have:
21652 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21653 = target + 1.
21655 Note that we have "+ 1" because some versions of GNU ld
21656 don't set the low bit of the result for R_ARM_REL32
21657 relocations against thumb function symbols.
21658 On ARMv6M this is +4, not +8. */
21659 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21660 assemble_name (file, labelpc);
21661 fputs (":\n", file);
21662 if (TARGET_THUMB1_ONLY)
21664 /* This is 2 insns after the start of the thunk, so we know it
21665 is 4-byte aligned. */
21666 fputs ("\tadd\tr3, pc, r3\n", file);
21667 fputs ("\tmov r12, r3\n", file);
21669 else
21670 fputs ("\tadd\tr12, pc, r12\n", file);
21672 else if (TARGET_THUMB1_ONLY)
21673 fputs ("\tmov r12, r3\n", file);
21675 if (TARGET_THUMB1_ONLY)
21677 if (mi_delta > 255)
21679 fputs ("\tldr\tr3, ", file);
21680 assemble_name (file, label);
21681 fputs ("+4\n", file);
21682 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21683 mi_op, this_regno, this_regno);
21685 else if (mi_delta != 0)
21687 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21688 mi_op, this_regno, this_regno,
21689 mi_delta);
21692 else
21694 /* TODO: Use movw/movt for large constants when available. */
21695 while (mi_delta != 0)
21697 if ((mi_delta & (3 << shift)) == 0)
21698 shift += 2;
21699 else
21701 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21702 mi_op, this_regno, this_regno,
21703 mi_delta & (0xff << shift));
21704 mi_delta &= ~(0xff << shift);
21705 shift += 8;
21709 if (TARGET_THUMB1)
21711 if (TARGET_THUMB1_ONLY)
21712 fputs ("\tpop\t{r3}\n", file);
21714 fprintf (file, "\tbx\tr12\n");
21715 ASM_OUTPUT_ALIGN (file, 2);
21716 assemble_name (file, label);
21717 fputs (":\n", file);
21718 if (flag_pic)
21720 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21721 rtx tem = XEXP (DECL_RTL (function), 0);
21722 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21723 tem = gen_rtx_MINUS (GET_MODE (tem),
21724 tem,
21725 gen_rtx_SYMBOL_REF (Pmode,
21726 ggc_strdup (labelpc)));
21727 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21729 else
21730 /* Output ".word .LTHUNKn". */
21731 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21733 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21734 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21736 else
21738 fputs ("\tb\t", file);
21739 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21740 if (NEED_PLT_RELOC)
21741 fputs ("(PLT)", file);
21742 fputc ('\n', file);
21747 arm_emit_vector_const (FILE *file, rtx x)
21749 int i;
21750 const char * pattern;
21752 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21754 switch (GET_MODE (x))
21756 case V2SImode: pattern = "%08x"; break;
21757 case V4HImode: pattern = "%04x"; break;
21758 case V8QImode: pattern = "%02x"; break;
21759 default: gcc_unreachable ();
21762 fprintf (file, "0x");
21763 for (i = CONST_VECTOR_NUNITS (x); i--;)
21765 rtx element;
21767 element = CONST_VECTOR_ELT (x, i);
21768 fprintf (file, pattern, INTVAL (element));
21771 return 1;
21774 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21775 HFmode constant pool entries are actually loaded with ldr. */
21776 void
21777 arm_emit_fp16_const (rtx c)
21779 REAL_VALUE_TYPE r;
21780 long bits;
21782 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21783 bits = real_to_target (NULL, &r, HFmode);
21784 if (WORDS_BIG_ENDIAN)
21785 assemble_zeros (2);
21786 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21787 if (!WORDS_BIG_ENDIAN)
21788 assemble_zeros (2);
21791 const char *
21792 arm_output_load_gr (rtx *operands)
21794 rtx reg;
21795 rtx offset;
21796 rtx wcgr;
21797 rtx sum;
21799 if (GET_CODE (operands [1]) != MEM
21800 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21801 || GET_CODE (reg = XEXP (sum, 0)) != REG
21802 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21803 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21804 return "wldrw%?\t%0, %1";
21806 /* Fix up an out-of-range load of a GR register. */
21807 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21808 wcgr = operands[0];
21809 operands[0] = reg;
21810 output_asm_insn ("ldr%?\t%0, %1", operands);
21812 operands[0] = wcgr;
21813 operands[1] = reg;
21814 output_asm_insn ("tmcr%?\t%0, %1", operands);
21815 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21817 return "";
21820 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21822 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21823 named arg and all anonymous args onto the stack.
21824 XXX I know the prologue shouldn't be pushing registers, but it is faster
21825 that way. */
21827 static void
21828 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21829 enum machine_mode mode,
21830 tree type,
21831 int *pretend_size,
21832 int second_time ATTRIBUTE_UNUSED)
21834 int nregs;
21836 cfun->machine->uses_anonymous_args = 1;
21837 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21839 nregs = pcum->aapcs_ncrn;
21840 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21841 nregs++;
21843 else
21844 nregs = pcum->nregs;
21846 if (nregs < NUM_ARG_REGS)
21847 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21850 /* Return nonzero if the CONSUMER instruction (a store) does not need
21851 PRODUCER's value to calculate the address. */
21854 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21856 rtx value = PATTERN (producer);
21857 rtx addr = PATTERN (consumer);
21859 if (GET_CODE (value) == COND_EXEC)
21860 value = COND_EXEC_CODE (value);
21861 if (GET_CODE (value) == PARALLEL)
21862 value = XVECEXP (value, 0, 0);
21863 value = XEXP (value, 0);
21864 if (GET_CODE (addr) == COND_EXEC)
21865 addr = COND_EXEC_CODE (addr);
21866 if (GET_CODE (addr) == PARALLEL)
21867 addr = XVECEXP (addr, 0, 0);
21868 addr = XEXP (addr, 0);
21870 return !reg_overlap_mentioned_p (value, addr);
21873 /* Return nonzero if the CONSUMER instruction (a store) does need
21874 PRODUCER's value to calculate the address. */
21877 arm_early_store_addr_dep (rtx producer, rtx consumer)
21879 return !arm_no_early_store_addr_dep (producer, consumer);
21882 /* Return nonzero if the CONSUMER instruction (a load) does need
21883 PRODUCER's value to calculate the address. */
21886 arm_early_load_addr_dep (rtx producer, rtx consumer)
21888 rtx value = PATTERN (producer);
21889 rtx addr = PATTERN (consumer);
21891 if (GET_CODE (value) == COND_EXEC)
21892 value = COND_EXEC_CODE (value);
21893 if (GET_CODE (value) == PARALLEL)
21894 value = XVECEXP (value, 0, 0);
21895 value = XEXP (value, 0);
21896 if (GET_CODE (addr) == COND_EXEC)
21897 addr = COND_EXEC_CODE (addr);
21898 if (GET_CODE (addr) == PARALLEL)
21899 addr = XVECEXP (addr, 0, 0);
21900 addr = XEXP (addr, 1);
21902 return reg_overlap_mentioned_p (value, addr);
21905 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21906 have an early register shift value or amount dependency on the
21907 result of PRODUCER. */
21910 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21912 rtx value = PATTERN (producer);
21913 rtx op = PATTERN (consumer);
21914 rtx early_op;
21916 if (GET_CODE (value) == COND_EXEC)
21917 value = COND_EXEC_CODE (value);
21918 if (GET_CODE (value) == PARALLEL)
21919 value = XVECEXP (value, 0, 0);
21920 value = XEXP (value, 0);
21921 if (GET_CODE (op) == COND_EXEC)
21922 op = COND_EXEC_CODE (op);
21923 if (GET_CODE (op) == PARALLEL)
21924 op = XVECEXP (op, 0, 0);
21925 op = XEXP (op, 1);
21927 early_op = XEXP (op, 0);
21928 /* This is either an actual independent shift, or a shift applied to
21929 the first operand of another operation. We want the whole shift
21930 operation. */
21931 if (GET_CODE (early_op) == REG)
21932 early_op = op;
21934 return !reg_overlap_mentioned_p (value, early_op);
21937 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21938 have an early register shift value dependency on the result of
21939 PRODUCER. */
21942 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21944 rtx value = PATTERN (producer);
21945 rtx op = PATTERN (consumer);
21946 rtx early_op;
21948 if (GET_CODE (value) == COND_EXEC)
21949 value = COND_EXEC_CODE (value);
21950 if (GET_CODE (value) == PARALLEL)
21951 value = XVECEXP (value, 0, 0);
21952 value = XEXP (value, 0);
21953 if (GET_CODE (op) == COND_EXEC)
21954 op = COND_EXEC_CODE (op);
21955 if (GET_CODE (op) == PARALLEL)
21956 op = XVECEXP (op, 0, 0);
21957 op = XEXP (op, 1);
21959 early_op = XEXP (op, 0);
21961 /* This is either an actual independent shift, or a shift applied to
21962 the first operand of another operation. We want the value being
21963 shifted, in either case. */
21964 if (GET_CODE (early_op) != REG)
21965 early_op = XEXP (early_op, 0);
21967 return !reg_overlap_mentioned_p (value, early_op);
21970 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21971 have an early register mult dependency on the result of
21972 PRODUCER. */
21975 arm_no_early_mul_dep (rtx producer, rtx consumer)
21977 rtx value = PATTERN (producer);
21978 rtx op = PATTERN (consumer);
21980 if (GET_CODE (value) == COND_EXEC)
21981 value = COND_EXEC_CODE (value);
21982 if (GET_CODE (value) == PARALLEL)
21983 value = XVECEXP (value, 0, 0);
21984 value = XEXP (value, 0);
21985 if (GET_CODE (op) == COND_EXEC)
21986 op = COND_EXEC_CODE (op);
21987 if (GET_CODE (op) == PARALLEL)
21988 op = XVECEXP (op, 0, 0);
21989 op = XEXP (op, 1);
21991 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21993 if (GET_CODE (XEXP (op, 0)) == MULT)
21994 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21995 else
21996 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21999 return 0;
22002 /* We can't rely on the caller doing the proper promotion when
22003 using APCS or ATPCS. */
22005 static bool
22006 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22008 return !TARGET_AAPCS_BASED;
22011 static enum machine_mode
22012 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22013 enum machine_mode mode,
22014 int *punsignedp ATTRIBUTE_UNUSED,
22015 const_tree fntype ATTRIBUTE_UNUSED,
22016 int for_return ATTRIBUTE_UNUSED)
22018 if (GET_MODE_CLASS (mode) == MODE_INT
22019 && GET_MODE_SIZE (mode) < 4)
22020 return SImode;
22022 return mode;
22025 /* AAPCS based ABIs use short enums by default. */
22027 static bool
22028 arm_default_short_enums (void)
22030 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22034 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22036 static bool
22037 arm_align_anon_bitfield (void)
22039 return TARGET_AAPCS_BASED;
22043 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22045 static tree
22046 arm_cxx_guard_type (void)
22048 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22051 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22052 has an accumulator dependency on the result of the producer (a
22053 multiplication instruction) and no other dependency on that result. */
22055 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22057 rtx mul = PATTERN (producer);
22058 rtx mac = PATTERN (consumer);
22059 rtx mul_result;
22060 rtx mac_op0, mac_op1, mac_acc;
22062 if (GET_CODE (mul) == COND_EXEC)
22063 mul = COND_EXEC_CODE (mul);
22064 if (GET_CODE (mac) == COND_EXEC)
22065 mac = COND_EXEC_CODE (mac);
22067 /* Check that mul is of the form (set (...) (mult ...))
22068 and mla is of the form (set (...) (plus (mult ...) (...))). */
22069 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22070 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22071 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22072 return 0;
22074 mul_result = XEXP (mul, 0);
22075 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22076 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22077 mac_acc = XEXP (XEXP (mac, 1), 1);
22079 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22080 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22081 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22085 /* The EABI says test the least significant bit of a guard variable. */
22087 static bool
22088 arm_cxx_guard_mask_bit (void)
22090 return TARGET_AAPCS_BASED;
22094 /* The EABI specifies that all array cookies are 8 bytes long. */
22096 static tree
22097 arm_get_cookie_size (tree type)
22099 tree size;
22101 if (!TARGET_AAPCS_BASED)
22102 return default_cxx_get_cookie_size (type);
22104 size = build_int_cst (sizetype, 8);
22105 return size;
22109 /* The EABI says that array cookies should also contain the element size. */
22111 static bool
22112 arm_cookie_has_size (void)
22114 return TARGET_AAPCS_BASED;
22118 /* The EABI says constructors and destructors should return a pointer to
22119 the object constructed/destroyed. */
22121 static bool
22122 arm_cxx_cdtor_returns_this (void)
22124 return TARGET_AAPCS_BASED;
22127 /* The EABI says that an inline function may never be the key
22128 method. */
22130 static bool
22131 arm_cxx_key_method_may_be_inline (void)
22133 return !TARGET_AAPCS_BASED;
22136 static void
22137 arm_cxx_determine_class_data_visibility (tree decl)
22139 if (!TARGET_AAPCS_BASED
22140 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22141 return;
22143 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22144 is exported. However, on systems without dynamic vague linkage,
22145 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22146 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22147 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22148 else
22149 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22150 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22153 static bool
22154 arm_cxx_class_data_always_comdat (void)
22156 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22157 vague linkage if the class has no key function. */
22158 return !TARGET_AAPCS_BASED;
22162 /* The EABI says __aeabi_atexit should be used to register static
22163 destructors. */
22165 static bool
22166 arm_cxx_use_aeabi_atexit (void)
22168 return TARGET_AAPCS_BASED;
22172 void
22173 arm_set_return_address (rtx source, rtx scratch)
22175 arm_stack_offsets *offsets;
22176 HOST_WIDE_INT delta;
22177 rtx addr;
22178 unsigned long saved_regs;
22180 offsets = arm_get_frame_offsets ();
22181 saved_regs = offsets->saved_regs_mask;
22183 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22184 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22185 else
22187 if (frame_pointer_needed)
22188 addr = plus_constant(hard_frame_pointer_rtx, -4);
22189 else
22191 /* LR will be the first saved register. */
22192 delta = offsets->outgoing_args - (offsets->frame + 4);
22195 if (delta >= 4096)
22197 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22198 GEN_INT (delta & ~4095)));
22199 addr = scratch;
22200 delta &= 4095;
22202 else
22203 addr = stack_pointer_rtx;
22205 addr = plus_constant (addr, delta);
22207 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22212 void
22213 thumb_set_return_address (rtx source, rtx scratch)
22215 arm_stack_offsets *offsets;
22216 HOST_WIDE_INT delta;
22217 HOST_WIDE_INT limit;
22218 int reg;
22219 rtx addr;
22220 unsigned long mask;
22222 emit_use (source);
22224 offsets = arm_get_frame_offsets ();
22225 mask = offsets->saved_regs_mask;
22226 if (mask & (1 << LR_REGNUM))
22228 limit = 1024;
22229 /* Find the saved regs. */
22230 if (frame_pointer_needed)
22232 delta = offsets->soft_frame - offsets->saved_args;
22233 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22234 if (TARGET_THUMB1)
22235 limit = 128;
22237 else
22239 delta = offsets->outgoing_args - offsets->saved_args;
22240 reg = SP_REGNUM;
22242 /* Allow for the stack frame. */
22243 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22244 delta -= 16;
22245 /* The link register is always the first saved register. */
22246 delta -= 4;
22248 /* Construct the address. */
22249 addr = gen_rtx_REG (SImode, reg);
22250 if (delta > limit)
22252 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22253 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22254 addr = scratch;
22256 else
22257 addr = plus_constant (addr, delta);
22259 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22261 else
22262 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22265 /* Implements target hook vector_mode_supported_p. */
22266 bool
22267 arm_vector_mode_supported_p (enum machine_mode mode)
22269 /* Neon also supports V2SImode, etc. listed in the clause below. */
22270 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22271 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22272 return true;
22274 if ((TARGET_NEON || TARGET_IWMMXT)
22275 && ((mode == V2SImode)
22276 || (mode == V4HImode)
22277 || (mode == V8QImode)))
22278 return true;
22280 return false;
22283 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22284 registers when autovectorizing for Neon, at least until multiple vector
22285 widths are supported properly by the middle-end. */
22287 static enum machine_mode
22288 arm_preferred_simd_mode (enum machine_mode mode)
22290 if (TARGET_NEON)
22291 switch (mode)
22293 case SFmode:
22294 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22295 case SImode:
22296 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22297 case HImode:
22298 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22299 case QImode:
22300 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22301 case DImode:
22302 if (TARGET_NEON_VECTORIZE_QUAD)
22303 return V2DImode;
22304 break;
22306 default:;
22309 if (TARGET_REALLY_IWMMXT)
22310 switch (mode)
22312 case SImode:
22313 return V2SImode;
22314 case HImode:
22315 return V4HImode;
22316 case QImode:
22317 return V8QImode;
22319 default:;
22322 return word_mode;
22325 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22327 We need to define this for LO_REGS on thumb. Otherwise we can end up
22328 using r0-r4 for function arguments, r7 for the stack frame and don't
22329 have enough left over to do doubleword arithmetic. */
22331 static bool
22332 arm_class_likely_spilled_p (reg_class_t rclass)
22334 if ((TARGET_THUMB && rclass == LO_REGS)
22335 || rclass == CC_REG)
22336 return true;
22338 return false;
22341 /* Implements target hook small_register_classes_for_mode_p. */
22342 bool
22343 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22345 return TARGET_THUMB1;
22348 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22349 ARM insns and therefore guarantee that the shift count is modulo 256.
22350 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22351 guarantee no particular behavior for out-of-range counts. */
22353 static unsigned HOST_WIDE_INT
22354 arm_shift_truncation_mask (enum machine_mode mode)
22356 return mode == SImode ? 255 : 0;
22360 /* Map internal gcc register numbers to DWARF2 register numbers. */
22362 unsigned int
22363 arm_dbx_register_number (unsigned int regno)
22365 if (regno < 16)
22366 return regno;
22368 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22369 compatibility. The EABI defines them as registers 96-103. */
22370 if (IS_FPA_REGNUM (regno))
22371 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22373 if (IS_VFP_REGNUM (regno))
22375 /* See comment in arm_dwarf_register_span. */
22376 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22377 return 64 + regno - FIRST_VFP_REGNUM;
22378 else
22379 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22382 if (IS_IWMMXT_GR_REGNUM (regno))
22383 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22385 if (IS_IWMMXT_REGNUM (regno))
22386 return 112 + regno - FIRST_IWMMXT_REGNUM;
22388 gcc_unreachable ();
22391 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22392 GCC models tham as 64 32-bit registers, so we need to describe this to
22393 the DWARF generation code. Other registers can use the default. */
22394 static rtx
22395 arm_dwarf_register_span (rtx rtl)
22397 unsigned regno;
22398 int nregs;
22399 int i;
22400 rtx p;
22402 regno = REGNO (rtl);
22403 if (!IS_VFP_REGNUM (regno))
22404 return NULL_RTX;
22406 /* XXX FIXME: The EABI defines two VFP register ranges:
22407 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22408 256-287: D0-D31
22409 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22410 corresponding D register. Until GDB supports this, we shall use the
22411 legacy encodings. We also use these encodings for D0-D15 for
22412 compatibility with older debuggers. */
22413 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22414 return NULL_RTX;
22416 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22417 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22418 regno = (regno - FIRST_VFP_REGNUM) / 2;
22419 for (i = 0; i < nregs; i++)
22420 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22422 return p;
22425 #if ARM_UNWIND_INFO
22426 /* Emit unwind directives for a store-multiple instruction or stack pointer
22427 push during alignment.
22428 These should only ever be generated by the function prologue code, so
22429 expect them to have a particular form. */
22431 static void
22432 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22434 int i;
22435 HOST_WIDE_INT offset;
22436 HOST_WIDE_INT nregs;
22437 int reg_size;
22438 unsigned reg;
22439 unsigned lastreg;
22440 rtx e;
22442 e = XVECEXP (p, 0, 0);
22443 if (GET_CODE (e) != SET)
22444 abort ();
22446 /* First insn will adjust the stack pointer. */
22447 if (GET_CODE (e) != SET
22448 || GET_CODE (XEXP (e, 0)) != REG
22449 || REGNO (XEXP (e, 0)) != SP_REGNUM
22450 || GET_CODE (XEXP (e, 1)) != PLUS)
22451 abort ();
22453 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22454 nregs = XVECLEN (p, 0) - 1;
22456 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22457 if (reg < 16)
22459 /* The function prologue may also push pc, but not annotate it as it is
22460 never restored. We turn this into a stack pointer adjustment. */
22461 if (nregs * 4 == offset - 4)
22463 fprintf (asm_out_file, "\t.pad #4\n");
22464 offset -= 4;
22466 reg_size = 4;
22467 fprintf (asm_out_file, "\t.save {");
22469 else if (IS_VFP_REGNUM (reg))
22471 reg_size = 8;
22472 fprintf (asm_out_file, "\t.vsave {");
22474 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22476 /* FPA registers are done differently. */
22477 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22478 return;
22480 else
22481 /* Unknown register type. */
22482 abort ();
22484 /* If the stack increment doesn't match the size of the saved registers,
22485 something has gone horribly wrong. */
22486 if (offset != nregs * reg_size)
22487 abort ();
22489 offset = 0;
22490 lastreg = 0;
22491 /* The remaining insns will describe the stores. */
22492 for (i = 1; i <= nregs; i++)
22494 /* Expect (set (mem <addr>) (reg)).
22495 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22496 e = XVECEXP (p, 0, i);
22497 if (GET_CODE (e) != SET
22498 || GET_CODE (XEXP (e, 0)) != MEM
22499 || GET_CODE (XEXP (e, 1)) != REG)
22500 abort ();
22502 reg = REGNO (XEXP (e, 1));
22503 if (reg < lastreg)
22504 abort ();
22506 if (i != 1)
22507 fprintf (asm_out_file, ", ");
22508 /* We can't use %r for vfp because we need to use the
22509 double precision register names. */
22510 if (IS_VFP_REGNUM (reg))
22511 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22512 else
22513 asm_fprintf (asm_out_file, "%r", reg);
22515 #ifdef ENABLE_CHECKING
22516 /* Check that the addresses are consecutive. */
22517 e = XEXP (XEXP (e, 0), 0);
22518 if (GET_CODE (e) == PLUS)
22520 offset += reg_size;
22521 if (GET_CODE (XEXP (e, 0)) != REG
22522 || REGNO (XEXP (e, 0)) != SP_REGNUM
22523 || GET_CODE (XEXP (e, 1)) != CONST_INT
22524 || offset != INTVAL (XEXP (e, 1)))
22525 abort ();
22527 else if (i != 1
22528 || GET_CODE (e) != REG
22529 || REGNO (e) != SP_REGNUM)
22530 abort ();
22531 #endif
22533 fprintf (asm_out_file, "}\n");
22536 /* Emit unwind directives for a SET. */
22538 static void
22539 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22541 rtx e0;
22542 rtx e1;
22543 unsigned reg;
22545 e0 = XEXP (p, 0);
22546 e1 = XEXP (p, 1);
22547 switch (GET_CODE (e0))
22549 case MEM:
22550 /* Pushing a single register. */
22551 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22552 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22553 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22554 abort ();
22556 asm_fprintf (asm_out_file, "\t.save ");
22557 if (IS_VFP_REGNUM (REGNO (e1)))
22558 asm_fprintf(asm_out_file, "{d%d}\n",
22559 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22560 else
22561 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22562 break;
22564 case REG:
22565 if (REGNO (e0) == SP_REGNUM)
22567 /* A stack increment. */
22568 if (GET_CODE (e1) != PLUS
22569 || GET_CODE (XEXP (e1, 0)) != REG
22570 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22571 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22572 abort ();
22574 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22575 -INTVAL (XEXP (e1, 1)));
22577 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22579 HOST_WIDE_INT offset;
22581 if (GET_CODE (e1) == PLUS)
22583 if (GET_CODE (XEXP (e1, 0)) != REG
22584 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22585 abort ();
22586 reg = REGNO (XEXP (e1, 0));
22587 offset = INTVAL (XEXP (e1, 1));
22588 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22589 HARD_FRAME_POINTER_REGNUM, reg,
22590 offset);
22592 else if (GET_CODE (e1) == REG)
22594 reg = REGNO (e1);
22595 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22596 HARD_FRAME_POINTER_REGNUM, reg);
22598 else
22599 abort ();
22601 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22603 /* Move from sp to reg. */
22604 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22606 else if (GET_CODE (e1) == PLUS
22607 && GET_CODE (XEXP (e1, 0)) == REG
22608 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22609 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22611 /* Set reg to offset from sp. */
22612 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22613 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22615 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22617 /* Stack pointer save before alignment. */
22618 reg = REGNO (e0);
22619 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22620 reg + 0x90, reg);
22622 else
22623 abort ();
22624 break;
22626 default:
22627 abort ();
22632 /* Emit unwind directives for the given insn. */
22634 static void
22635 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22637 rtx pat;
22639 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22640 return;
22642 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22643 && (TREE_NOTHROW (current_function_decl)
22644 || crtl->all_throwers_are_sibcalls))
22645 return;
22647 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22648 return;
22650 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22651 if (pat)
22652 pat = XEXP (pat, 0);
22653 else
22654 pat = PATTERN (insn);
22656 switch (GET_CODE (pat))
22658 case SET:
22659 arm_unwind_emit_set (asm_out_file, pat);
22660 break;
22662 case SEQUENCE:
22663 /* Store multiple. */
22664 arm_unwind_emit_sequence (asm_out_file, pat);
22665 break;
22667 default:
22668 abort();
22673 /* Output a reference from a function exception table to the type_info
22674 object X. The EABI specifies that the symbol should be relocated by
22675 an R_ARM_TARGET2 relocation. */
22677 static bool
22678 arm_output_ttype (rtx x)
22680 fputs ("\t.word\t", asm_out_file);
22681 output_addr_const (asm_out_file, x);
22682 /* Use special relocations for symbol references. */
22683 if (GET_CODE (x) != CONST_INT)
22684 fputs ("(TARGET2)", asm_out_file);
22685 fputc ('\n', asm_out_file);
22687 return TRUE;
22690 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22692 static void
22693 arm_asm_emit_except_personality (rtx personality)
22695 fputs ("\t.personality\t", asm_out_file);
22696 output_addr_const (asm_out_file, personality);
22697 fputc ('\n', asm_out_file);
22700 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22702 static void
22703 arm_asm_init_sections (void)
22705 exception_section = get_unnamed_section (0, output_section_asm_op,
22706 "\t.handlerdata");
22708 #endif /* ARM_UNWIND_INFO */
22710 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22712 static enum unwind_info_type
22713 arm_except_unwind_info (struct gcc_options *opts)
22715 /* Honor the --enable-sjlj-exceptions configure switch. */
22716 #ifdef CONFIG_SJLJ_EXCEPTIONS
22717 if (CONFIG_SJLJ_EXCEPTIONS)
22718 return UI_SJLJ;
22719 #endif
22721 /* If not using ARM EABI unwind tables... */
22722 if (ARM_UNWIND_INFO)
22724 /* For simplicity elsewhere in this file, indicate that all unwind
22725 info is disabled if we're not emitting unwind tables. */
22726 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22727 return UI_NONE;
22728 else
22729 return UI_TARGET;
22732 /* ... we use sjlj exceptions for backwards compatibility. */
22733 return UI_SJLJ;
22737 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22738 stack alignment. */
22740 static void
22741 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22743 rtx unspec = SET_SRC (pattern);
22744 gcc_assert (GET_CODE (unspec) == UNSPEC);
22746 switch (index)
22748 case UNSPEC_STACK_ALIGN:
22749 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22750 put anything on the stack, so hopefully it won't matter.
22751 CFA = SP will be correct after alignment. */
22752 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22753 SET_DEST (pattern));
22754 break;
22755 default:
22756 gcc_unreachable ();
22761 /* Output unwind directives for the start/end of a function. */
22763 void
22764 arm_output_fn_unwind (FILE * f, bool prologue)
22766 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22767 return;
22769 if (prologue)
22770 fputs ("\t.fnstart\n", f);
22771 else
22773 /* If this function will never be unwound, then mark it as such.
22774 The came condition is used in arm_unwind_emit to suppress
22775 the frame annotations. */
22776 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22777 && (TREE_NOTHROW (current_function_decl)
22778 || crtl->all_throwers_are_sibcalls))
22779 fputs("\t.cantunwind\n", f);
22781 fputs ("\t.fnend\n", f);
22785 static bool
22786 arm_emit_tls_decoration (FILE *fp, rtx x)
22788 enum tls_reloc reloc;
22789 rtx val;
22791 val = XVECEXP (x, 0, 0);
22792 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22794 output_addr_const (fp, val);
22796 switch (reloc)
22798 case TLS_GD32:
22799 fputs ("(tlsgd)", fp);
22800 break;
22801 case TLS_LDM32:
22802 fputs ("(tlsldm)", fp);
22803 break;
22804 case TLS_LDO32:
22805 fputs ("(tlsldo)", fp);
22806 break;
22807 case TLS_IE32:
22808 fputs ("(gottpoff)", fp);
22809 break;
22810 case TLS_LE32:
22811 fputs ("(tpoff)", fp);
22812 break;
22813 default:
22814 gcc_unreachable ();
22817 switch (reloc)
22819 case TLS_GD32:
22820 case TLS_LDM32:
22821 case TLS_IE32:
22822 fputs (" + (. - ", fp);
22823 output_addr_const (fp, XVECEXP (x, 0, 2));
22824 fputs (" - ", fp);
22825 output_addr_const (fp, XVECEXP (x, 0, 3));
22826 fputc (')', fp);
22827 break;
22828 default:
22829 break;
22832 return TRUE;
22835 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22837 static void
22838 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22840 gcc_assert (size == 4);
22841 fputs ("\t.word\t", file);
22842 output_addr_const (file, x);
22843 fputs ("(tlsldo)", file);
22846 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22848 static bool
22849 arm_output_addr_const_extra (FILE *fp, rtx x)
22851 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22852 return arm_emit_tls_decoration (fp, x);
22853 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22855 char label[256];
22856 int labelno = INTVAL (XVECEXP (x, 0, 0));
22858 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22859 assemble_name_raw (fp, label);
22861 return TRUE;
22863 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22865 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22866 if (GOT_PCREL)
22867 fputs ("+.", fp);
22868 fputs ("-(", fp);
22869 output_addr_const (fp, XVECEXP (x, 0, 0));
22870 fputc (')', fp);
22871 return TRUE;
22873 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22875 output_addr_const (fp, XVECEXP (x, 0, 0));
22876 if (GOT_PCREL)
22877 fputs ("+.", fp);
22878 fputs ("-(", fp);
22879 output_addr_const (fp, XVECEXP (x, 0, 1));
22880 fputc (')', fp);
22881 return TRUE;
22883 else if (GET_CODE (x) == CONST_VECTOR)
22884 return arm_emit_vector_const (fp, x);
22886 return FALSE;
22889 /* Output assembly for a shift instruction.
22890 SET_FLAGS determines how the instruction modifies the condition codes.
22891 0 - Do not set condition codes.
22892 1 - Set condition codes.
22893 2 - Use smallest instruction. */
22894 const char *
22895 arm_output_shift(rtx * operands, int set_flags)
22897 char pattern[100];
22898 static const char flag_chars[3] = {'?', '.', '!'};
22899 const char *shift;
22900 HOST_WIDE_INT val;
22901 char c;
22903 c = flag_chars[set_flags];
22904 if (TARGET_UNIFIED_ASM)
22906 shift = shift_op(operands[3], &val);
22907 if (shift)
22909 if (val != -1)
22910 operands[2] = GEN_INT(val);
22911 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22913 else
22914 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22916 else
22917 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22918 output_asm_insn (pattern, operands);
22919 return "";
22922 /* Output a Thumb-1 casesi dispatch sequence. */
22923 const char *
22924 thumb1_output_casesi (rtx *operands)
22926 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22928 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22930 switch (GET_MODE(diff_vec))
22932 case QImode:
22933 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22934 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22935 case HImode:
22936 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22937 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22938 case SImode:
22939 return "bl\t%___gnu_thumb1_case_si";
22940 default:
22941 gcc_unreachable ();
22945 /* Output a Thumb-2 casesi instruction. */
22946 const char *
22947 thumb2_output_casesi (rtx *operands)
22949 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22951 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22953 output_asm_insn ("cmp\t%0, %1", operands);
22954 output_asm_insn ("bhi\t%l3", operands);
22955 switch (GET_MODE(diff_vec))
22957 case QImode:
22958 return "tbb\t[%|pc, %0]";
22959 case HImode:
22960 return "tbh\t[%|pc, %0, lsl #1]";
22961 case SImode:
22962 if (flag_pic)
22964 output_asm_insn ("adr\t%4, %l2", operands);
22965 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22966 output_asm_insn ("add\t%4, %4, %5", operands);
22967 return "bx\t%4";
22969 else
22971 output_asm_insn ("adr\t%4, %l2", operands);
22972 return "ldr\t%|pc, [%4, %0, lsl #2]";
22974 default:
22975 gcc_unreachable ();
22979 /* Most ARM cores are single issue, but some newer ones can dual issue.
22980 The scheduler descriptions rely on this being correct. */
22981 static int
22982 arm_issue_rate (void)
22984 switch (arm_tune)
22986 case cortexr4:
22987 case cortexr4f:
22988 case cortexa5:
22989 case cortexa8:
22990 case cortexa9:
22991 case fa726te:
22992 return 2;
22994 default:
22995 return 1;
22999 /* A table and a function to perform ARM-specific name mangling for
23000 NEON vector types in order to conform to the AAPCS (see "Procedure
23001 Call Standard for the ARM Architecture", Appendix A). To qualify
23002 for emission with the mangled names defined in that document, a
23003 vector type must not only be of the correct mode but also be
23004 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23005 typedef struct
23007 enum machine_mode mode;
23008 const char *element_type_name;
23009 const char *aapcs_name;
23010 } arm_mangle_map_entry;
23012 static arm_mangle_map_entry arm_mangle_map[] = {
23013 /* 64-bit containerized types. */
23014 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23015 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23016 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23017 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23018 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23019 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23020 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23021 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23022 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23023 /* 128-bit containerized types. */
23024 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23025 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23026 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23027 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23028 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23029 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23030 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23031 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23032 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23033 { VOIDmode, NULL, NULL }
23036 const char *
23037 arm_mangle_type (const_tree type)
23039 arm_mangle_map_entry *pos = arm_mangle_map;
23041 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23042 has to be managled as if it is in the "std" namespace. */
23043 if (TARGET_AAPCS_BASED
23044 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23046 static bool warned;
23047 if (!warned && warn_psabi && !in_system_header)
23049 warned = true;
23050 inform (input_location,
23051 "the mangling of %<va_list%> has changed in GCC 4.4");
23053 return "St9__va_list";
23056 /* Half-precision float. */
23057 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23058 return "Dh";
23060 if (TREE_CODE (type) != VECTOR_TYPE)
23061 return NULL;
23063 /* Check the mode of the vector type, and the name of the vector
23064 element type, against the table. */
23065 while (pos->mode != VOIDmode)
23067 tree elt_type = TREE_TYPE (type);
23069 if (pos->mode == TYPE_MODE (type)
23070 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23071 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23072 pos->element_type_name))
23073 return pos->aapcs_name;
23075 pos++;
23078 /* Use the default mangling for unrecognized (possibly user-defined)
23079 vector types. */
23080 return NULL;
23083 /* Order of allocation of core registers for Thumb: this allocation is
23084 written over the corresponding initial entries of the array
23085 initialized with REG_ALLOC_ORDER. We allocate all low registers
23086 first. Saving and restoring a low register is usually cheaper than
23087 using a call-clobbered high register. */
23089 static const int thumb_core_reg_alloc_order[] =
23091 3, 2, 1, 0, 4, 5, 6, 7,
23092 14, 12, 8, 9, 10, 11, 13, 15
23095 /* Adjust register allocation order when compiling for Thumb. */
23097 void
23098 arm_order_regs_for_local_alloc (void)
23100 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23101 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23102 if (TARGET_THUMB)
23103 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23104 sizeof (thumb_core_reg_alloc_order));
23107 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23109 bool
23110 arm_frame_pointer_required (void)
23112 return (cfun->has_nonlocal_label
23113 || SUBTARGET_FRAME_POINTER_REQUIRED
23114 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23117 /* Only thumb1 can't support conditional execution, so return true if
23118 the target is not thumb1. */
23119 static bool
23120 arm_have_conditional_execution (void)
23122 return !TARGET_THUMB1;
23125 /* Legitimize a memory reference for sync primitive implemented using
23126 ldrex / strex. We currently force the form of the reference to be
23127 indirect without offset. We do not yet support the indirect offset
23128 addressing supported by some ARM targets for these
23129 instructions. */
23130 static rtx
23131 arm_legitimize_sync_memory (rtx memory)
23133 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23134 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23136 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23137 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23138 return legitimate_memory;
23141 /* An instruction emitter. */
23142 typedef void (* emit_f) (int label, const char *, rtx *);
23144 /* An instruction emitter that emits via the conventional
23145 output_asm_insn. */
23146 static void
23147 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23149 output_asm_insn (pattern, operands);
23152 /* Count the number of emitted synchronization instructions. */
23153 static unsigned arm_insn_count;
23155 /* An emitter that counts emitted instructions but does not actually
23156 emit instruction into the the instruction stream. */
23157 static void
23158 arm_count (int label,
23159 const char *pattern ATTRIBUTE_UNUSED,
23160 rtx *operands ATTRIBUTE_UNUSED)
23162 if (! label)
23163 ++ arm_insn_count;
23166 /* Construct a pattern using conventional output formatting and feed
23167 it to output_asm_insn. Provides a mechanism to construct the
23168 output pattern on the fly. Note the hard limit on the pattern
23169 buffer size. */
23170 static void ATTRIBUTE_PRINTF_4
23171 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23172 const char *pattern, ...)
23174 va_list ap;
23175 char buffer[256];
23177 va_start (ap, pattern);
23178 vsprintf (buffer, pattern, ap);
23179 va_end (ap);
23180 emit (label, buffer, operands);
23183 /* Emit the memory barrier instruction, if any, provided by this
23184 target to a specified emitter. */
23185 static void
23186 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23188 if (TARGET_HAVE_DMB)
23190 /* Note we issue a system level barrier. We should consider
23191 issuing a inner shareabilty zone barrier here instead, ie.
23192 "DMB ISH". */
23193 emit (0, "dmb\tsy", operands);
23194 return;
23197 if (TARGET_HAVE_DMB_MCR)
23199 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23200 return;
23203 gcc_unreachable ();
23206 /* Emit the memory barrier instruction, if any, provided by this
23207 target. */
23208 const char *
23209 arm_output_memory_barrier (rtx *operands)
23211 arm_process_output_memory_barrier (arm_emit, operands);
23212 return "";
23215 /* Helper to figure out the instruction suffix required on ldrex/strex
23216 for operations on an object of the specified mode. */
23217 static const char *
23218 arm_ldrex_suffix (enum machine_mode mode)
23220 switch (mode)
23222 case QImode: return "b";
23223 case HImode: return "h";
23224 case SImode: return "";
23225 case DImode: return "d";
23226 default:
23227 gcc_unreachable ();
23229 return "";
23232 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23233 mode. */
23234 static void
23235 arm_output_ldrex (emit_f emit,
23236 enum machine_mode mode,
23237 rtx target,
23238 rtx memory)
23240 const char *suffix = arm_ldrex_suffix (mode);
23241 rtx operands[2];
23243 operands[0] = target;
23244 operands[1] = memory;
23245 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23248 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23249 mode. */
23250 static void
23251 arm_output_strex (emit_f emit,
23252 enum machine_mode mode,
23253 const char *cc,
23254 rtx result,
23255 rtx value,
23256 rtx memory)
23258 const char *suffix = arm_ldrex_suffix (mode);
23259 rtx operands[3];
23261 operands[0] = result;
23262 operands[1] = value;
23263 operands[2] = memory;
23264 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23265 cc);
23268 /* Helper to emit a two operand instruction. */
23269 static void
23270 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23272 rtx operands[2];
23274 operands[0] = d;
23275 operands[1] = s;
23276 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23279 /* Helper to emit a three operand instruction. */
23280 static void
23281 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23283 rtx operands[3];
23285 operands[0] = d;
23286 operands[1] = a;
23287 operands[2] = b;
23288 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23291 /* Emit a load store exclusive synchronization loop.
23294 old_value = [mem]
23295 if old_value != required_value
23296 break;
23297 t1 = sync_op (old_value, new_value)
23298 [mem] = t1, t2 = [0|1]
23299 while ! t2
23301 Note:
23302 t1 == t2 is not permitted
23303 t1 == old_value is permitted
23305 required_value:
23307 RTX register or const_int representing the required old_value for
23308 the modify to continue, if NULL no comparsion is performed. */
23309 static void
23310 arm_output_sync_loop (emit_f emit,
23311 enum machine_mode mode,
23312 rtx old_value,
23313 rtx memory,
23314 rtx required_value,
23315 rtx new_value,
23316 rtx t1,
23317 rtx t2,
23318 enum attr_sync_op sync_op,
23319 int early_barrier_required)
23321 rtx operands[1];
23323 gcc_assert (t1 != t2);
23325 if (early_barrier_required)
23326 arm_process_output_memory_barrier (emit, NULL);
23328 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23330 arm_output_ldrex (emit, mode, old_value, memory);
23332 if (required_value)
23334 rtx operands[2];
23336 operands[0] = old_value;
23337 operands[1] = required_value;
23338 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23339 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23342 switch (sync_op)
23344 case SYNC_OP_ADD:
23345 arm_output_op3 (emit, "add", t1, old_value, new_value);
23346 break;
23348 case SYNC_OP_SUB:
23349 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23350 break;
23352 case SYNC_OP_IOR:
23353 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23354 break;
23356 case SYNC_OP_XOR:
23357 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23358 break;
23360 case SYNC_OP_AND:
23361 arm_output_op3 (emit,"and", t1, old_value, new_value);
23362 break;
23364 case SYNC_OP_NAND:
23365 arm_output_op3 (emit, "and", t1, old_value, new_value);
23366 arm_output_op2 (emit, "mvn", t1, t1);
23367 break;
23369 case SYNC_OP_NONE:
23370 t1 = new_value;
23371 break;
23374 if (t2)
23376 arm_output_strex (emit, mode, "", t2, t1, memory);
23377 operands[0] = t2;
23378 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23379 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23380 LOCAL_LABEL_PREFIX);
23382 else
23384 /* Use old_value for the return value because for some operations
23385 the old_value can easily be restored. This saves one register. */
23386 arm_output_strex (emit, mode, "", old_value, t1, memory);
23387 operands[0] = old_value;
23388 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23389 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23390 LOCAL_LABEL_PREFIX);
23392 switch (sync_op)
23394 case SYNC_OP_ADD:
23395 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23396 break;
23398 case SYNC_OP_SUB:
23399 arm_output_op3 (emit, "add", old_value, t1, new_value);
23400 break;
23402 case SYNC_OP_XOR:
23403 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23404 break;
23406 case SYNC_OP_NONE:
23407 arm_output_op2 (emit, "mov", old_value, required_value);
23408 break;
23410 default:
23411 gcc_unreachable ();
23415 arm_process_output_memory_barrier (emit, NULL);
23416 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23419 static rtx
23420 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23422 if (index > 0)
23423 default_value = operands[index - 1];
23425 return default_value;
23428 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23429 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23431 /* Extract the operands for a synchroniztion instruction from the
23432 instructions attributes and emit the instruction. */
23433 static void
23434 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23436 rtx result, memory, required_value, new_value, t1, t2;
23437 int early_barrier;
23438 enum machine_mode mode;
23439 enum attr_sync_op sync_op;
23441 result = FETCH_SYNC_OPERAND(result, 0);
23442 memory = FETCH_SYNC_OPERAND(memory, 0);
23443 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23444 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23445 t1 = FETCH_SYNC_OPERAND(t1, 0);
23446 t2 = FETCH_SYNC_OPERAND(t2, 0);
23447 early_barrier =
23448 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23449 sync_op = get_attr_sync_op (insn);
23450 mode = GET_MODE (memory);
23452 arm_output_sync_loop (emit, mode, result, memory, required_value,
23453 new_value, t1, t2, sync_op, early_barrier);
23456 /* Emit a synchronization instruction loop. */
23457 const char *
23458 arm_output_sync_insn (rtx insn, rtx *operands)
23460 arm_process_output_sync_insn (arm_emit, insn, operands);
23461 return "";
23464 /* Count the number of machine instruction that will be emitted for a
23465 synchronization instruction. Note that the emitter used does not
23466 emit instructions, it just counts instructions being carefull not
23467 to count labels. */
23468 unsigned int
23469 arm_sync_loop_insns (rtx insn, rtx *operands)
23471 arm_insn_count = 0;
23472 arm_process_output_sync_insn (arm_count, insn, operands);
23473 return arm_insn_count;
23476 /* Helper to call a target sync instruction generator, dealing with
23477 the variation in operands required by the different generators. */
23478 static rtx
23479 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23480 rtx memory, rtx required_value, rtx new_value)
23482 switch (generator->op)
23484 case arm_sync_generator_omn:
23485 gcc_assert (! required_value);
23486 return generator->u.omn (old_value, memory, new_value);
23488 case arm_sync_generator_omrn:
23489 gcc_assert (required_value);
23490 return generator->u.omrn (old_value, memory, required_value, new_value);
23493 return NULL;
23496 /* Expand a synchronization loop. The synchronization loop is expanded
23497 as an opaque block of instructions in order to ensure that we do
23498 not subsequently get extraneous memory accesses inserted within the
23499 critical region. The exclusive access property of ldrex/strex is
23500 only guaranteed in there are no intervening memory accesses. */
23501 void
23502 arm_expand_sync (enum machine_mode mode,
23503 struct arm_sync_generator *generator,
23504 rtx target, rtx memory, rtx required_value, rtx new_value)
23506 if (target == NULL)
23507 target = gen_reg_rtx (mode);
23509 memory = arm_legitimize_sync_memory (memory);
23510 if (mode != SImode)
23512 rtx load_temp = gen_reg_rtx (SImode);
23514 if (required_value)
23515 required_value = convert_modes (SImode, mode, required_value, true);
23517 new_value = convert_modes (SImode, mode, new_value, true);
23518 emit_insn (arm_call_generator (generator, load_temp, memory,
23519 required_value, new_value));
23520 emit_move_insn (target, gen_lowpart (mode, load_temp));
23522 else
23524 emit_insn (arm_call_generator (generator, target, memory, required_value,
23525 new_value));
23529 static bool
23530 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23532 /* Vectors which aren't in packed structures will not be less aligned than
23533 the natural alignment of their element type, so this is safe. */
23534 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23535 return !is_packed;
23537 return default_builtin_vector_alignment_reachable (type, is_packed);
23540 static bool
23541 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23542 const_tree type, int misalignment,
23543 bool is_packed)
23545 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23547 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23549 if (is_packed)
23550 return align == 1;
23552 /* If the misalignment is unknown, we should be able to handle the access
23553 so long as it is not to a member of a packed data structure. */
23554 if (misalignment == -1)
23555 return true;
23557 /* Return true if the misalignment is a multiple of the natural alignment
23558 of the vector's element type. This is probably always going to be
23559 true in practice, since we've already established that this isn't a
23560 packed access. */
23561 return ((misalignment % align) == 0);
23564 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23565 is_packed);
23568 static void
23569 arm_conditional_register_usage (void)
23571 int regno;
23573 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23575 for (regno = FIRST_FPA_REGNUM;
23576 regno <= LAST_FPA_REGNUM; ++regno)
23577 fixed_regs[regno] = call_used_regs[regno] = 1;
23580 if (TARGET_THUMB1 && optimize_size)
23582 /* When optimizing for size on Thumb-1, it's better not
23583 to use the HI regs, because of the overhead of
23584 stacking them. */
23585 for (regno = FIRST_HI_REGNUM;
23586 regno <= LAST_HI_REGNUM; ++regno)
23587 fixed_regs[regno] = call_used_regs[regno] = 1;
23590 /* The link register can be clobbered by any branch insn,
23591 but we have no way to track that at present, so mark
23592 it as unavailable. */
23593 if (TARGET_THUMB1)
23594 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23596 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23598 if (TARGET_MAVERICK)
23600 for (regno = FIRST_FPA_REGNUM;
23601 regno <= LAST_FPA_REGNUM; ++ regno)
23602 fixed_regs[regno] = call_used_regs[regno] = 1;
23603 for (regno = FIRST_CIRRUS_FP_REGNUM;
23604 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23606 fixed_regs[regno] = 0;
23607 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23610 if (TARGET_VFP)
23612 /* VFPv3 registers are disabled when earlier VFP
23613 versions are selected due to the definition of
23614 LAST_VFP_REGNUM. */
23615 for (regno = FIRST_VFP_REGNUM;
23616 regno <= LAST_VFP_REGNUM; ++ regno)
23618 fixed_regs[regno] = 0;
23619 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23620 || regno >= FIRST_VFP_REGNUM + 32;
23625 if (TARGET_REALLY_IWMMXT)
23627 regno = FIRST_IWMMXT_GR_REGNUM;
23628 /* The 2002/10/09 revision of the XScale ABI has wCG0
23629 and wCG1 as call-preserved registers. The 2002/11/21
23630 revision changed this so that all wCG registers are
23631 scratch registers. */
23632 for (regno = FIRST_IWMMXT_GR_REGNUM;
23633 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23634 fixed_regs[regno] = 0;
23635 /* The XScale ABI has wR0 - wR9 as scratch registers,
23636 the rest as call-preserved registers. */
23637 for (regno = FIRST_IWMMXT_REGNUM;
23638 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23640 fixed_regs[regno] = 0;
23641 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23645 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23647 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23648 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23650 else if (TARGET_APCS_STACK)
23652 fixed_regs[10] = 1;
23653 call_used_regs[10] = 1;
23655 /* -mcaller-super-interworking reserves r11 for calls to
23656 _interwork_r11_call_via_rN(). Making the register global
23657 is an easy way of ensuring that it remains valid for all
23658 calls. */
23659 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23660 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23662 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23663 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23664 if (TARGET_CALLER_INTERWORKING)
23665 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23667 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23670 static reg_class_t
23671 arm_preferred_rename_class (reg_class_t rclass)
23673 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23674 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23675 and code size can be reduced. */
23676 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23677 return LO_REGS;
23678 else
23679 return NO_REGS;
23682 #include "gt-arm.h"