Merged revision 156805 into branch.
[official-gcc.git] / gcc / config / arm / arm.c
blob466981a7e7b4da42ce4af5277c34136ddc766b57
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "ggc.h"
48 #include "except.h"
49 #include "c-pragma.h"
50 #include "integrate.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
92 int, HOST_WIDE_INT);
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
109 rtx);
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
121 #endif
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 enum machine_mode, int *,
133 const_tree, int);
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, const_rtx);
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
140 tree);
141 static bool arm_have_conditional_execution (void);
142 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
143 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
144 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_rtx_costs (rtx, int, int, int *, bool);
149 static int arm_address_cost (rtx, bool);
150 static bool arm_memory_load_p (rtx);
151 static bool arm_cirrus_insn_p (rtx);
152 static void cirrus_reorg (rtx);
153 static void arm_init_builtins (void);
154 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
155 static void arm_init_iwmmxt_builtins (void);
156 static rtx safe_vector_operand (rtx, enum machine_mode);
157 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
158 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
159 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
160 static void emit_constant_insn (rtx cond, rtx pattern);
161 static rtx emit_set_insn (rtx, rtx);
162 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
163 tree, bool);
164 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
165 const_tree);
166 static int aapcs_select_return_coproc (const_tree, const_tree);
168 #ifdef OBJECT_FORMAT_ELF
169 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
170 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
171 #endif
172 #ifndef ARM_PE
173 static void arm_encode_section_info (tree, rtx, int);
174 #endif
176 static void arm_file_end (void);
177 static void arm_file_start (void);
179 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
180 tree, int *, int);
181 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
182 enum machine_mode, const_tree, bool);
183 static bool arm_promote_prototypes (const_tree);
184 static bool arm_default_short_enums (void);
185 static bool arm_align_anon_bitfield (void);
186 static bool arm_return_in_msb (const_tree);
187 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 #ifdef TARGET_UNWIND_INFO
190 static void arm_unwind_emit (FILE *, rtx);
191 static bool arm_output_ttype (rtx);
192 #endif
193 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
194 static rtx arm_dwarf_register_span (rtx);
196 static tree arm_cxx_guard_type (void);
197 static bool arm_cxx_guard_mask_bit (void);
198 static tree arm_get_cookie_size (tree);
199 static bool arm_cookie_has_size (void);
200 static bool arm_cxx_cdtor_returns_this (void);
201 static bool arm_cxx_key_method_may_be_inline (void);
202 static void arm_cxx_determine_class_data_visibility (tree);
203 static bool arm_cxx_class_data_always_comdat (void);
204 static bool arm_cxx_use_aeabi_atexit (void);
205 static void arm_init_libfuncs (void);
206 static tree arm_build_builtin_va_list (void);
207 static void arm_expand_builtin_va_start (tree, rtx);
208 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
209 static bool arm_handle_option (size_t, const char *, int);
210 static void arm_target_help (void);
211 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
212 static bool arm_cannot_copy_insn_p (rtx);
213 static bool arm_tls_symbol_p (rtx x);
214 static int arm_issue_rate (void);
215 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
216 static bool arm_allocate_stack_slots_for_args (void);
217 static const char *arm_invalid_parameter_type (const_tree t);
218 static const char *arm_invalid_return_type (const_tree t);
219 static tree arm_promoted_type (const_tree t);
220 static tree arm_convert_to_type (tree type, tree expr);
221 static bool arm_scalar_mode_supported_p (enum machine_mode);
222 static bool arm_frame_pointer_required (void);
223 static bool arm_can_eliminate (const int, const int);
224 static void arm_asm_trampoline_template (FILE *);
225 static void arm_trampoline_init (rtx, tree, rtx);
226 static rtx arm_trampoline_adjust_address (rtx);
229 /* Table of machine attributes. */
230 static const struct attribute_spec arm_attribute_table[] =
232 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
233 /* Function calls made to this symbol must be done indirectly, because
234 it may lie outside of the 26 bit addressing range of a normal function
235 call. */
236 { "long_call", 0, 0, false, true, true, NULL },
237 /* Whereas these functions are always known to reside within the 26 bit
238 addressing range. */
239 { "short_call", 0, 0, false, true, true, NULL },
240 /* Specify the procedure call conventions for a function. */
241 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
242 /* Interrupt Service Routines have special prologue and epilogue requirements. */
243 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
244 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
245 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
246 #ifdef ARM_PE
247 /* ARM/PE has three new attributes:
248 interfacearm - ?
249 dllexport - for exporting a function/variable that will live in a dll
250 dllimport - for importing a function/variable from a dll
252 Microsoft allows multiple declspecs in one __declspec, separating
253 them with spaces. We do NOT support this. Instead, use __declspec
254 multiple times.
256 { "dllimport", 0, 0, true, false, false, NULL },
257 { "dllexport", 0, 0, true, false, false, NULL },
258 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
259 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
260 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
261 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
262 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
263 #endif
264 { NULL, 0, 0, false, false, false, NULL }
267 /* Initialize the GCC target structure. */
268 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
269 #undef TARGET_MERGE_DECL_ATTRIBUTES
270 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
271 #endif
273 #undef TARGET_LEGITIMIZE_ADDRESS
274 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
279 #undef TARGET_ASM_FILE_START
280 #define TARGET_ASM_FILE_START arm_file_start
281 #undef TARGET_ASM_FILE_END
282 #define TARGET_ASM_FILE_END arm_file_end
284 #undef TARGET_ASM_ALIGNED_SI_OP
285 #define TARGET_ASM_ALIGNED_SI_OP NULL
286 #undef TARGET_ASM_INTEGER
287 #define TARGET_ASM_INTEGER arm_assemble_integer
289 #undef TARGET_ASM_FUNCTION_PROLOGUE
290 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
295 #undef TARGET_DEFAULT_TARGET_FLAGS
296 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
297 #undef TARGET_HANDLE_OPTION
298 #define TARGET_HANDLE_OPTION arm_handle_option
299 #undef TARGET_HELP
300 #define TARGET_HELP arm_target_help
302 #undef TARGET_COMP_TYPE_ATTRIBUTES
303 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
305 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
306 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
308 #undef TARGET_SCHED_ADJUST_COST
309 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
311 #undef TARGET_ENCODE_SECTION_INFO
312 #ifdef ARM_PE
313 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
314 #else
315 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
316 #endif
318 #undef TARGET_STRIP_NAME_ENCODING
319 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
321 #undef TARGET_ASM_INTERNAL_LABEL
322 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
324 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
325 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
327 #undef TARGET_FUNCTION_VALUE
328 #define TARGET_FUNCTION_VALUE arm_function_value
330 #undef TARGET_LIBCALL_VALUE
331 #define TARGET_LIBCALL_VALUE arm_libcall_value
333 #undef TARGET_ASM_OUTPUT_MI_THUNK
334 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
338 #undef TARGET_RTX_COSTS
339 #define TARGET_RTX_COSTS arm_rtx_costs
340 #undef TARGET_ADDRESS_COST
341 #define TARGET_ADDRESS_COST arm_address_cost
343 #undef TARGET_SHIFT_TRUNCATION_MASK
344 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
345 #undef TARGET_VECTOR_MODE_SUPPORTED_P
346 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
348 #undef TARGET_MACHINE_DEPENDENT_REORG
349 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
351 #undef TARGET_INIT_BUILTINS
352 #define TARGET_INIT_BUILTINS arm_init_builtins
353 #undef TARGET_EXPAND_BUILTIN
354 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
356 #undef TARGET_INIT_LIBFUNCS
357 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
359 #undef TARGET_PROMOTE_FUNCTION_MODE
360 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
361 #undef TARGET_PROMOTE_PROTOTYPES
362 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
363 #undef TARGET_PASS_BY_REFERENCE
364 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
365 #undef TARGET_ARG_PARTIAL_BYTES
366 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
368 #undef TARGET_SETUP_INCOMING_VARARGS
369 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
371 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
372 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
376 #undef TARGET_TRAMPOLINE_INIT
377 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
381 #undef TARGET_DEFAULT_SHORT_ENUMS
382 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
384 #undef TARGET_ALIGN_ANON_BITFIELD
385 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
387 #undef TARGET_NARROW_VOLATILE_BITFIELD
388 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
390 #undef TARGET_CXX_GUARD_TYPE
391 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
393 #undef TARGET_CXX_GUARD_MASK_BIT
394 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
396 #undef TARGET_CXX_GET_COOKIE_SIZE
397 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
399 #undef TARGET_CXX_COOKIE_HAS_SIZE
400 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
402 #undef TARGET_CXX_CDTOR_RETURNS_THIS
403 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
405 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
406 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
408 #undef TARGET_CXX_USE_AEABI_ATEXIT
409 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
411 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
412 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
413 arm_cxx_determine_class_data_visibility
415 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
416 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
418 #undef TARGET_RETURN_IN_MSB
419 #define TARGET_RETURN_IN_MSB arm_return_in_msb
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
424 #undef TARGET_MUST_PASS_IN_STACK
425 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
427 #ifdef TARGET_UNWIND_INFO
428 #undef TARGET_UNWIND_EMIT
429 #define TARGET_UNWIND_EMIT arm_unwind_emit
431 /* EABI unwinding tables use a different format for the typeinfo tables. */
432 #undef TARGET_ASM_TTYPE
433 #define TARGET_ASM_TTYPE arm_output_ttype
435 #undef TARGET_ARM_EABI_UNWINDER
436 #define TARGET_ARM_EABI_UNWINDER true
437 #endif /* TARGET_UNWIND_INFO */
439 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
440 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
442 #undef TARGET_DWARF_REGISTER_SPAN
443 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
445 #undef TARGET_CANNOT_COPY_INSN_P
446 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
448 #ifdef HAVE_AS_TLS
449 #undef TARGET_HAVE_TLS
450 #define TARGET_HAVE_TLS true
451 #endif
453 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
454 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
456 #undef TARGET_CANNOT_FORCE_CONST_MEM
457 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
459 #undef TARGET_MAX_ANCHOR_OFFSET
460 #define TARGET_MAX_ANCHOR_OFFSET 4095
462 /* The minimum is set such that the total size of the block
463 for a particular anchor is -4088 + 1 + 4095 bytes, which is
464 divisible by eight, ensuring natural spacing of anchors. */
465 #undef TARGET_MIN_ANCHOR_OFFSET
466 #define TARGET_MIN_ANCHOR_OFFSET -4088
468 #undef TARGET_SCHED_ISSUE_RATE
469 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
471 #undef TARGET_MANGLE_TYPE
472 #define TARGET_MANGLE_TYPE arm_mangle_type
474 #undef TARGET_BUILD_BUILTIN_VA_LIST
475 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
476 #undef TARGET_EXPAND_BUILTIN_VA_START
477 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
478 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
479 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
481 #ifdef HAVE_AS_TLS
482 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
483 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
484 #endif
486 #undef TARGET_LEGITIMATE_ADDRESS_P
487 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
489 #undef TARGET_INVALID_PARAMETER_TYPE
490 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
492 #undef TARGET_INVALID_RETURN_TYPE
493 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
495 #undef TARGET_PROMOTED_TYPE
496 #define TARGET_PROMOTED_TYPE arm_promoted_type
498 #undef TARGET_CONVERT_TO_TYPE
499 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
501 #undef TARGET_SCALAR_MODE_SUPPORTED_P
502 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
504 #undef TARGET_FRAME_POINTER_REQUIRED
505 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
507 #undef TARGET_CAN_ELIMINATE
508 #define TARGET_CAN_ELIMINATE arm_can_eliminate
510 struct gcc_target targetm = TARGET_INITIALIZER;
512 /* Obstack for minipool constant handling. */
513 static struct obstack minipool_obstack;
514 static char * minipool_startobj;
516 /* The maximum number of insns skipped which
517 will be conditionalised if possible. */
518 static int max_insns_skipped = 5;
520 extern FILE * asm_out_file;
522 /* True if we are currently building a constant table. */
523 int making_const_table;
525 /* The processor for which instructions should be scheduled. */
526 enum processor_type arm_tune = arm_none;
528 /* The default processor used if not overridden by commandline. */
529 static enum processor_type arm_default_cpu = arm_none;
531 /* Which floating point hardware to schedule for. */
532 int arm_fpu_attr;
534 /* Which floating popint hardware to use. */
535 const struct arm_fpu_desc *arm_fpu_desc;
537 /* Whether to use floating point hardware. */
538 enum float_abi_type arm_float_abi;
540 /* Which __fp16 format to use. */
541 enum arm_fp16_format_type arm_fp16_format;
543 /* Which ABI to use. */
544 enum arm_abi_type arm_abi;
546 /* Which thread pointer model to use. */
547 enum arm_tp_type target_thread_pointer = TP_AUTO;
549 /* Used to parse -mstructure_size_boundary command line option. */
550 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
552 /* Used for Thumb call_via trampolines. */
553 rtx thumb_call_via_label[14];
554 static int thumb_call_reg_needed;
556 /* Bit values used to identify processor capabilities. */
557 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
558 #define FL_ARCH3M (1 << 1) /* Extended multiply */
559 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
560 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
561 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
562 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
563 #define FL_THUMB (1 << 6) /* Thumb aware */
564 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
565 #define FL_STRONG (1 << 8) /* StrongARM */
566 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
567 #define FL_XSCALE (1 << 10) /* XScale */
568 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
569 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
570 media instructions. */
571 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
572 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
573 Note: ARM6 & 7 derivatives only. */
574 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
575 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
576 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
577 profile. */
578 #define FL_DIV (1 << 18) /* Hardware divide. */
579 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
580 #define FL_NEON (1 << 20) /* Neon instructions. */
581 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
582 architecture. */
584 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
586 #define FL_FOR_ARCH2 FL_NOTM
587 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
588 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
589 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
590 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
591 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
592 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
593 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
594 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
595 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
596 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
597 #define FL_FOR_ARCH6J FL_FOR_ARCH6
598 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
599 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
600 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
601 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
602 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
603 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
604 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
605 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
606 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
607 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
609 /* The bits in this mask specify which
610 instructions we are allowed to generate. */
611 static unsigned long insn_flags = 0;
613 /* The bits in this mask specify which instruction scheduling options should
614 be used. */
615 static unsigned long tune_flags = 0;
617 /* The following are used in the arm.md file as equivalents to bits
618 in the above two flag variables. */
620 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
621 int arm_arch3m = 0;
623 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
624 int arm_arch4 = 0;
626 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
627 int arm_arch4t = 0;
629 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
630 int arm_arch5 = 0;
632 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
633 int arm_arch5e = 0;
635 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
636 int arm_arch6 = 0;
638 /* Nonzero if this chip supports the ARM 6K extensions. */
639 int arm_arch6k = 0;
641 /* Nonzero if instructions not present in the 'M' profile can be used. */
642 int arm_arch_notm = 0;
644 /* Nonzero if instructions present in ARMv7E-M can be used. */
645 int arm_arch7em = 0;
647 /* Nonzero if this chip can benefit from load scheduling. */
648 int arm_ld_sched = 0;
650 /* Nonzero if this chip is a StrongARM. */
651 int arm_tune_strongarm = 0;
653 /* Nonzero if this chip is a Cirrus variant. */
654 int arm_arch_cirrus = 0;
656 /* Nonzero if this chip supports Intel Wireless MMX technology. */
657 int arm_arch_iwmmxt = 0;
659 /* Nonzero if this chip is an XScale. */
660 int arm_arch_xscale = 0;
662 /* Nonzero if tuning for XScale */
663 int arm_tune_xscale = 0;
665 /* Nonzero if we want to tune for stores that access the write-buffer.
666 This typically means an ARM6 or ARM7 with MMU or MPU. */
667 int arm_tune_wbuf = 0;
669 /* Nonzero if tuning for Cortex-A9. */
670 int arm_tune_cortex_a9 = 0;
672 /* Nonzero if generating Thumb instructions. */
673 int thumb_code = 0;
675 /* Nonzero if we should define __THUMB_INTERWORK__ in the
676 preprocessor.
677 XXX This is a bit of a hack, it's intended to help work around
678 problems in GLD which doesn't understand that armv5t code is
679 interworking clean. */
680 int arm_cpp_interwork = 0;
682 /* Nonzero if chip supports Thumb 2. */
683 int arm_arch_thumb2;
685 /* Nonzero if chip supports integer division instruction. */
686 int arm_arch_hwdiv;
688 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
689 must report the mode of the memory reference from PRINT_OPERAND to
690 PRINT_OPERAND_ADDRESS. */
691 enum machine_mode output_memory_reference_mode;
693 /* The register number to be used for the PIC offset register. */
694 unsigned arm_pic_register = INVALID_REGNUM;
696 /* Set to 1 after arm_reorg has started. Reset to start at the start of
697 the next function. */
698 static int after_arm_reorg = 0;
700 /* The maximum number of insns to be used when loading a constant. */
701 static int arm_constant_limit = 3;
703 static enum arm_pcs arm_pcs_default;
705 /* For an explanation of these variables, see final_prescan_insn below. */
706 int arm_ccfsm_state;
707 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
708 enum arm_cond_code arm_current_cc;
709 rtx arm_target_insn;
710 int arm_target_label;
711 /* The number of conditionally executed insns, including the current insn. */
712 int arm_condexec_count = 0;
713 /* A bitmask specifying the patterns for the IT block.
714 Zero means do not output an IT block before this insn. */
715 int arm_condexec_mask = 0;
716 /* The number of bits used in arm_condexec_mask. */
717 int arm_condexec_masklen = 0;
719 /* The condition codes of the ARM, and the inverse function. */
720 static const char * const arm_condition_codes[] =
722 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
723 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
726 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
727 #define streq(string1, string2) (strcmp (string1, string2) == 0)
729 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
730 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
731 | (1 << PIC_OFFSET_TABLE_REGNUM)))
733 /* Initialization code. */
735 struct processors
737 const char *const name;
738 enum processor_type core;
739 const char *arch;
740 const unsigned long flags;
741 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
744 /* Not all of these give usefully different compilation alternatives,
745 but there is no simple way of generalizing them. */
746 static const struct processors all_cores[] =
748 /* ARM Cores */
749 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
750 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
751 #include "arm-cores.def"
752 #undef ARM_CORE
753 {NULL, arm_none, NULL, 0, NULL}
756 static const struct processors all_architectures[] =
758 /* ARM Architectures */
759 /* We don't specify rtx_costs here as it will be figured out
760 from the core. */
762 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
763 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
764 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
765 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
766 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
767 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
768 implementations that support it, so we will leave it out for now. */
769 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
770 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
771 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
772 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
773 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
774 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
775 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
776 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
777 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
778 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
779 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
780 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
781 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
782 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
783 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
784 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
785 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
786 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
787 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
788 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
789 {NULL, arm_none, NULL, 0 , NULL}
792 struct arm_cpu_select
794 const char * string;
795 const char * name;
796 const struct processors * processors;
799 /* This is a magic structure. The 'string' field is magically filled in
800 with a pointer to the value specified by the user on the command line
801 assuming that the user has specified such a value. */
803 static struct arm_cpu_select arm_select[] =
805 /* string name processors */
806 { NULL, "-mcpu=", all_cores },
807 { NULL, "-march=", all_architectures },
808 { NULL, "-mtune=", all_cores }
811 /* Defines representing the indexes into the above table. */
812 #define ARM_OPT_SET_CPU 0
813 #define ARM_OPT_SET_ARCH 1
814 #define ARM_OPT_SET_TUNE 2
816 /* The name of the preprocessor macro to define for this architecture. */
818 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
820 /* Available values for -mfpu=. */
822 static const struct arm_fpu_desc all_fpus[] =
824 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
825 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
826 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
827 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
828 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
829 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
830 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
831 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
832 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
833 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
834 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
835 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
836 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
837 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
838 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
839 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
840 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
841 /* Compatibility aliases. */
842 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
846 struct float_abi
848 const char * name;
849 enum float_abi_type abi_type;
853 /* Available values for -mfloat-abi=. */
855 static const struct float_abi all_float_abis[] =
857 {"soft", ARM_FLOAT_ABI_SOFT},
858 {"softfp", ARM_FLOAT_ABI_SOFTFP},
859 {"hard", ARM_FLOAT_ABI_HARD}
863 struct fp16_format
865 const char *name;
866 enum arm_fp16_format_type fp16_format_type;
870 /* Available values for -mfp16-format=. */
872 static const struct fp16_format all_fp16_formats[] =
874 {"none", ARM_FP16_FORMAT_NONE},
875 {"ieee", ARM_FP16_FORMAT_IEEE},
876 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
880 struct abi_name
882 const char *name;
883 enum arm_abi_type abi_type;
887 /* Available values for -mabi=. */
889 static const struct abi_name arm_all_abis[] =
891 {"apcs-gnu", ARM_ABI_APCS},
892 {"atpcs", ARM_ABI_ATPCS},
893 {"aapcs", ARM_ABI_AAPCS},
894 {"iwmmxt", ARM_ABI_IWMMXT},
895 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
898 /* Supported TLS relocations. */
900 enum tls_reloc {
901 TLS_GD32,
902 TLS_LDM32,
903 TLS_LDO32,
904 TLS_IE32,
905 TLS_LE32
908 /* Emit an insn that's a simple single-set. Both the operands must be known
909 to be valid. */
910 inline static rtx
911 emit_set_insn (rtx x, rtx y)
913 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
916 /* Return the number of bits set in VALUE. */
917 static unsigned
918 bit_count (unsigned long value)
920 unsigned long count = 0;
922 while (value)
924 count++;
925 value &= value - 1; /* Clear the least-significant set bit. */
928 return count;
931 /* Set up library functions unique to ARM. */
933 static void
934 arm_init_libfuncs (void)
936 /* There are no special library functions unless we are using the
937 ARM BPABI. */
938 if (!TARGET_BPABI)
939 return;
941 /* The functions below are described in Section 4 of the "Run-Time
942 ABI for the ARM architecture", Version 1.0. */
944 /* Double-precision floating-point arithmetic. Table 2. */
945 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
946 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
947 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
948 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
949 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
951 /* Double-precision comparisons. Table 3. */
952 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
953 set_optab_libfunc (ne_optab, DFmode, NULL);
954 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
955 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
956 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
957 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
958 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
960 /* Single-precision floating-point arithmetic. Table 4. */
961 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
962 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
963 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
964 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
965 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
967 /* Single-precision comparisons. Table 5. */
968 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
969 set_optab_libfunc (ne_optab, SFmode, NULL);
970 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
971 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
972 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
973 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
974 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
976 /* Floating-point to integer conversions. Table 6. */
977 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
978 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
979 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
980 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
981 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
982 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
983 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
984 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
986 /* Conversions between floating types. Table 7. */
987 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
988 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
990 /* Integer to floating-point conversions. Table 8. */
991 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
992 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
993 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
994 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
995 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
996 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
997 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
998 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1000 /* Long long. Table 9. */
1001 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1002 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1003 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1004 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1005 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1006 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1007 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1008 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1010 /* Integer (32/32->32) division. \S 4.3.1. */
1011 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1012 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1014 /* The divmod functions are designed so that they can be used for
1015 plain division, even though they return both the quotient and the
1016 remainder. The quotient is returned in the usual location (i.e.,
1017 r0 for SImode, {r0, r1} for DImode), just as would be expected
1018 for an ordinary division routine. Because the AAPCS calling
1019 conventions specify that all of { r0, r1, r2, r3 } are
1020 callee-saved registers, there is no need to tell the compiler
1021 explicitly that those registers are clobbered by these
1022 routines. */
1023 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1024 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1026 /* For SImode division the ABI provides div-without-mod routines,
1027 which are faster. */
1028 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1029 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1031 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1032 divmod libcalls instead. */
1033 set_optab_libfunc (smod_optab, DImode, NULL);
1034 set_optab_libfunc (umod_optab, DImode, NULL);
1035 set_optab_libfunc (smod_optab, SImode, NULL);
1036 set_optab_libfunc (umod_optab, SImode, NULL);
1038 /* Half-precision float operations. The compiler handles all operations
1039 with NULL libfuncs by converting the SFmode. */
1040 switch (arm_fp16_format)
1042 case ARM_FP16_FORMAT_IEEE:
1043 case ARM_FP16_FORMAT_ALTERNATIVE:
1045 /* Conversions. */
1046 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1047 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1048 ? "__gnu_f2h_ieee"
1049 : "__gnu_f2h_alternative"));
1050 set_conv_libfunc (sext_optab, SFmode, HFmode,
1051 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1052 ? "__gnu_h2f_ieee"
1053 : "__gnu_h2f_alternative"));
1055 /* Arithmetic. */
1056 set_optab_libfunc (add_optab, HFmode, NULL);
1057 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1058 set_optab_libfunc (smul_optab, HFmode, NULL);
1059 set_optab_libfunc (neg_optab, HFmode, NULL);
1060 set_optab_libfunc (sub_optab, HFmode, NULL);
1062 /* Comparisons. */
1063 set_optab_libfunc (eq_optab, HFmode, NULL);
1064 set_optab_libfunc (ne_optab, HFmode, NULL);
1065 set_optab_libfunc (lt_optab, HFmode, NULL);
1066 set_optab_libfunc (le_optab, HFmode, NULL);
1067 set_optab_libfunc (ge_optab, HFmode, NULL);
1068 set_optab_libfunc (gt_optab, HFmode, NULL);
1069 set_optab_libfunc (unord_optab, HFmode, NULL);
1070 break;
1072 default:
1073 break;
1076 if (TARGET_AAPCS_BASED)
1077 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1080 /* On AAPCS systems, this is the "struct __va_list". */
1081 static GTY(()) tree va_list_type;
1083 /* Return the type to use as __builtin_va_list. */
1084 static tree
1085 arm_build_builtin_va_list (void)
1087 tree va_list_name;
1088 tree ap_field;
1090 if (!TARGET_AAPCS_BASED)
1091 return std_build_builtin_va_list ();
1093 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1094 defined as:
1096 struct __va_list
1098 void *__ap;
1101 The C Library ABI further reinforces this definition in \S
1102 4.1.
1104 We must follow this definition exactly. The structure tag
1105 name is visible in C++ mangled names, and thus forms a part
1106 of the ABI. The field name may be used by people who
1107 #include <stdarg.h>. */
1108 /* Create the type. */
1109 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1110 /* Give it the required name. */
1111 va_list_name = build_decl (BUILTINS_LOCATION,
1112 TYPE_DECL,
1113 get_identifier ("__va_list"),
1114 va_list_type);
1115 DECL_ARTIFICIAL (va_list_name) = 1;
1116 TYPE_NAME (va_list_type) = va_list_name;
1117 /* Create the __ap field. */
1118 ap_field = build_decl (BUILTINS_LOCATION,
1119 FIELD_DECL,
1120 get_identifier ("__ap"),
1121 ptr_type_node);
1122 DECL_ARTIFICIAL (ap_field) = 1;
1123 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1124 TYPE_FIELDS (va_list_type) = ap_field;
1125 /* Compute its layout. */
1126 layout_type (va_list_type);
1128 return va_list_type;
1131 /* Return an expression of type "void *" pointing to the next
1132 available argument in a variable-argument list. VALIST is the
1133 user-level va_list object, of type __builtin_va_list. */
1134 static tree
1135 arm_extract_valist_ptr (tree valist)
1137 if (TREE_TYPE (valist) == error_mark_node)
1138 return error_mark_node;
1140 /* On an AAPCS target, the pointer is stored within "struct
1141 va_list". */
1142 if (TARGET_AAPCS_BASED)
1144 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1145 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1146 valist, ap_field, NULL_TREE);
1149 return valist;
1152 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1153 static void
1154 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1156 valist = arm_extract_valist_ptr (valist);
1157 std_expand_builtin_va_start (valist, nextarg);
1160 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1161 static tree
1162 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1163 gimple_seq *post_p)
1165 valist = arm_extract_valist_ptr (valist);
1166 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1169 /* Implement TARGET_HANDLE_OPTION. */
1171 static bool
1172 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1174 switch (code)
1176 case OPT_march_:
1177 arm_select[1].string = arg;
1178 return true;
1180 case OPT_mcpu_:
1181 arm_select[0].string = arg;
1182 return true;
1184 case OPT_mhard_float:
1185 target_float_abi_name = "hard";
1186 return true;
1188 case OPT_msoft_float:
1189 target_float_abi_name = "soft";
1190 return true;
1192 case OPT_mtune_:
1193 arm_select[2].string = arg;
1194 return true;
1196 default:
1197 return true;
1201 static void
1202 arm_target_help (void)
1204 int i;
1205 static int columns = 0;
1206 int remaining;
1208 /* If we have not done so already, obtain the desired maximum width of
1209 the output. Note - this is a duplication of the code at the start of
1210 gcc/opts.c:print_specific_help() - the two copies should probably be
1211 replaced by a single function. */
1212 if (columns == 0)
1214 const char *p;
1216 GET_ENVIRONMENT (p, "COLUMNS");
1217 if (p != NULL)
1219 int value = atoi (p);
1221 if (value > 0)
1222 columns = value;
1225 if (columns == 0)
1226 /* Use a reasonable default. */
1227 columns = 80;
1230 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1232 /* The - 2 is because we know that the last entry in the array is NULL. */
1233 i = ARRAY_SIZE (all_cores) - 2;
1234 gcc_assert (i > 0);
1235 printf (" %s", all_cores[i].name);
1236 remaining = columns - (strlen (all_cores[i].name) + 4);
1237 gcc_assert (remaining >= 0);
1239 while (i--)
1241 int len = strlen (all_cores[i].name);
1243 if (remaining > len + 2)
1245 printf (", %s", all_cores[i].name);
1246 remaining -= len + 2;
1248 else
1250 if (remaining > 0)
1251 printf (",");
1252 printf ("\n %s", all_cores[i].name);
1253 remaining = columns - (len + 4);
1257 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1259 i = ARRAY_SIZE (all_architectures) - 2;
1260 gcc_assert (i > 0);
1262 printf (" %s", all_architectures[i].name);
1263 remaining = columns - (strlen (all_architectures[i].name) + 4);
1264 gcc_assert (remaining >= 0);
1266 while (i--)
1268 int len = strlen (all_architectures[i].name);
1270 if (remaining > len + 2)
1272 printf (", %s", all_architectures[i].name);
1273 remaining -= len + 2;
1275 else
1277 if (remaining > 0)
1278 printf (",");
1279 printf ("\n %s", all_architectures[i].name);
1280 remaining = columns - (len + 4);
1283 printf ("\n");
1287 /* Fix up any incompatible options that the user has specified.
1288 This has now turned into a maze. */
1289 void
1290 arm_override_options (void)
1292 unsigned i;
1293 enum processor_type target_arch_cpu = arm_none;
1294 enum processor_type selected_cpu = arm_none;
1296 /* Set up the flags based on the cpu/architecture selected by the user. */
1297 for (i = ARRAY_SIZE (arm_select); i--;)
1299 struct arm_cpu_select * ptr = arm_select + i;
1301 if (ptr->string != NULL && ptr->string[0] != '\0')
1303 const struct processors * sel;
1305 for (sel = ptr->processors; sel->name != NULL; sel++)
1306 if (streq (ptr->string, sel->name))
1308 /* Set the architecture define. */
1309 if (i != ARM_OPT_SET_TUNE)
1310 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1312 /* Determine the processor core for which we should
1313 tune code-generation. */
1314 if (/* -mcpu= is a sensible default. */
1315 i == ARM_OPT_SET_CPU
1316 /* -mtune= overrides -mcpu= and -march=. */
1317 || i == ARM_OPT_SET_TUNE)
1318 arm_tune = (enum processor_type) (sel - ptr->processors);
1320 /* Remember the CPU associated with this architecture.
1321 If no other option is used to set the CPU type,
1322 we'll use this to guess the most suitable tuning
1323 options. */
1324 if (i == ARM_OPT_SET_ARCH)
1325 target_arch_cpu = sel->core;
1327 if (i == ARM_OPT_SET_CPU)
1328 selected_cpu = (enum processor_type) (sel - ptr->processors);
1330 if (i != ARM_OPT_SET_TUNE)
1332 /* If we have been given an architecture and a processor
1333 make sure that they are compatible. We only generate
1334 a warning though, and we prefer the CPU over the
1335 architecture. */
1336 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1337 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1338 ptr->string);
1340 insn_flags = sel->flags;
1343 break;
1346 if (sel->name == NULL)
1347 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1351 /* Guess the tuning options from the architecture if necessary. */
1352 if (arm_tune == arm_none)
1353 arm_tune = target_arch_cpu;
1355 /* If the user did not specify a processor, choose one for them. */
1356 if (insn_flags == 0)
1358 const struct processors * sel;
1359 unsigned int sought;
1361 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1362 if (selected_cpu == arm_none)
1364 #ifdef SUBTARGET_CPU_DEFAULT
1365 /* Use the subtarget default CPU if none was specified by
1366 configure. */
1367 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1368 #endif
1369 /* Default to ARM6. */
1370 if (selected_cpu == arm_none)
1371 selected_cpu = arm6;
1373 sel = &all_cores[selected_cpu];
1375 insn_flags = sel->flags;
1377 /* Now check to see if the user has specified some command line
1378 switch that require certain abilities from the cpu. */
1379 sought = 0;
1381 if (TARGET_INTERWORK || TARGET_THUMB)
1383 sought |= (FL_THUMB | FL_MODE32);
1385 /* There are no ARM processors that support both APCS-26 and
1386 interworking. Therefore we force FL_MODE26 to be removed
1387 from insn_flags here (if it was set), so that the search
1388 below will always be able to find a compatible processor. */
1389 insn_flags &= ~FL_MODE26;
1392 if (sought != 0 && ((sought & insn_flags) != sought))
1394 /* Try to locate a CPU type that supports all of the abilities
1395 of the default CPU, plus the extra abilities requested by
1396 the user. */
1397 for (sel = all_cores; sel->name != NULL; sel++)
1398 if ((sel->flags & sought) == (sought | insn_flags))
1399 break;
1401 if (sel->name == NULL)
1403 unsigned current_bit_count = 0;
1404 const struct processors * best_fit = NULL;
1406 /* Ideally we would like to issue an error message here
1407 saying that it was not possible to find a CPU compatible
1408 with the default CPU, but which also supports the command
1409 line options specified by the programmer, and so they
1410 ought to use the -mcpu=<name> command line option to
1411 override the default CPU type.
1413 If we cannot find a cpu that has both the
1414 characteristics of the default cpu and the given
1415 command line options we scan the array again looking
1416 for a best match. */
1417 for (sel = all_cores; sel->name != NULL; sel++)
1418 if ((sel->flags & sought) == sought)
1420 unsigned count;
1422 count = bit_count (sel->flags & insn_flags);
1424 if (count >= current_bit_count)
1426 best_fit = sel;
1427 current_bit_count = count;
1431 gcc_assert (best_fit);
1432 sel = best_fit;
1435 insn_flags = sel->flags;
1437 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1438 arm_default_cpu = (enum processor_type) (sel - all_cores);
1439 if (arm_tune == arm_none)
1440 arm_tune = arm_default_cpu;
1443 /* The processor for which we should tune should now have been
1444 chosen. */
1445 gcc_assert (arm_tune != arm_none);
1447 tune_flags = all_cores[(int)arm_tune].flags;
1449 if (target_fp16_format_name)
1451 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1453 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1455 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1456 break;
1459 if (i == ARRAY_SIZE (all_fp16_formats))
1460 error ("invalid __fp16 format option: -mfp16-format=%s",
1461 target_fp16_format_name);
1463 else
1464 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1466 if (target_abi_name)
1468 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1470 if (streq (arm_all_abis[i].name, target_abi_name))
1472 arm_abi = arm_all_abis[i].abi_type;
1473 break;
1476 if (i == ARRAY_SIZE (arm_all_abis))
1477 error ("invalid ABI option: -mabi=%s", target_abi_name);
1479 else
1480 arm_abi = ARM_DEFAULT_ABI;
1482 /* Make sure that the processor choice does not conflict with any of the
1483 other command line choices. */
1484 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1485 error ("target CPU does not support ARM mode");
1487 /* BPABI targets use linker tricks to allow interworking on cores
1488 without thumb support. */
1489 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1491 warning (0, "target CPU does not support interworking" );
1492 target_flags &= ~MASK_INTERWORK;
1495 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1497 warning (0, "target CPU does not support THUMB instructions");
1498 target_flags &= ~MASK_THUMB;
1501 if (TARGET_APCS_FRAME && TARGET_THUMB)
1503 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1504 target_flags &= ~MASK_APCS_FRAME;
1507 /* Callee super interworking implies thumb interworking. Adding
1508 this to the flags here simplifies the logic elsewhere. */
1509 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1510 target_flags |= MASK_INTERWORK;
1512 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1513 from here where no function is being compiled currently. */
1514 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1515 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1517 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1518 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1520 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1521 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1523 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1525 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1526 target_flags |= MASK_APCS_FRAME;
1529 if (TARGET_POKE_FUNCTION_NAME)
1530 target_flags |= MASK_APCS_FRAME;
1532 if (TARGET_APCS_REENT && flag_pic)
1533 error ("-fpic and -mapcs-reent are incompatible");
1535 if (TARGET_APCS_REENT)
1536 warning (0, "APCS reentrant code not supported. Ignored");
1538 /* If this target is normally configured to use APCS frames, warn if they
1539 are turned off and debugging is turned on. */
1540 if (TARGET_ARM
1541 && write_symbols != NO_DEBUG
1542 && !TARGET_APCS_FRAME
1543 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1544 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1546 if (TARGET_APCS_FLOAT)
1547 warning (0, "passing floating point arguments in fp regs not yet supported");
1549 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1550 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1551 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1552 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1553 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1554 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1555 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1556 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1557 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1558 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1559 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1560 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1561 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1563 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1564 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1565 thumb_code = (TARGET_ARM == 0);
1566 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1567 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1568 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1569 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1570 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1572 /* If we are not using the default (ARM mode) section anchor offset
1573 ranges, then set the correct ranges now. */
1574 if (TARGET_THUMB1)
1576 /* Thumb-1 LDR instructions cannot have negative offsets.
1577 Permissible positive offset ranges are 5-bit (for byte loads),
1578 6-bit (for halfword loads), or 7-bit (for word loads).
1579 Empirical results suggest a 7-bit anchor range gives the best
1580 overall code size. */
1581 targetm.min_anchor_offset = 0;
1582 targetm.max_anchor_offset = 127;
1584 else if (TARGET_THUMB2)
1586 /* The minimum is set such that the total size of the block
1587 for a particular anchor is 248 + 1 + 4095 bytes, which is
1588 divisible by eight, ensuring natural spacing of anchors. */
1589 targetm.min_anchor_offset = -248;
1590 targetm.max_anchor_offset = 4095;
1593 /* V5 code we generate is completely interworking capable, so we turn off
1594 TARGET_INTERWORK here to avoid many tests later on. */
1596 /* XXX However, we must pass the right pre-processor defines to CPP
1597 or GLD can get confused. This is a hack. */
1598 if (TARGET_INTERWORK)
1599 arm_cpp_interwork = 1;
1601 if (arm_arch5)
1602 target_flags &= ~MASK_INTERWORK;
1604 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1605 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1607 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1608 error ("iwmmxt abi requires an iwmmxt capable cpu");
1610 if (target_fpu_name == NULL && target_fpe_name != NULL)
1612 if (streq (target_fpe_name, "2"))
1613 target_fpu_name = "fpe2";
1614 else if (streq (target_fpe_name, "3"))
1615 target_fpu_name = "fpe3";
1616 else
1617 error ("invalid floating point emulation option: -mfpe=%s",
1618 target_fpe_name);
1621 if (target_fpu_name == NULL)
1623 #ifdef FPUTYPE_DEFAULT
1624 target_fpu_name = FPUTYPE_DEFAULT;
1625 #else
1626 if (arm_arch_cirrus)
1627 target_fpu_name = "maverick";
1628 else
1629 target_fpu_name = "fpe2";
1630 #endif
1633 arm_fpu_desc = NULL;
1634 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1636 if (streq (all_fpus[i].name, target_fpu_name))
1638 arm_fpu_desc = &all_fpus[i];
1639 break;
1643 if (!arm_fpu_desc)
1645 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1646 return;
1649 switch (arm_fpu_desc->model)
1651 case ARM_FP_MODEL_FPA:
1652 if (arm_fpu_desc->rev == 2)
1653 arm_fpu_attr = FPU_FPE2;
1654 else if (arm_fpu_desc->rev == 3)
1655 arm_fpu_attr = FPU_FPE3;
1656 else
1657 arm_fpu_attr = FPU_FPA;
1658 break;
1660 case ARM_FP_MODEL_MAVERICK:
1661 arm_fpu_attr = FPU_MAVERICK;
1662 break;
1664 case ARM_FP_MODEL_VFP:
1665 arm_fpu_attr = FPU_VFP;
1666 break;
1668 default:
1669 gcc_unreachable();
1672 if (target_float_abi_name != NULL)
1674 /* The user specified a FP ABI. */
1675 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1677 if (streq (all_float_abis[i].name, target_float_abi_name))
1679 arm_float_abi = all_float_abis[i].abi_type;
1680 break;
1683 if (i == ARRAY_SIZE (all_float_abis))
1684 error ("invalid floating point abi: -mfloat-abi=%s",
1685 target_float_abi_name);
1687 else
1688 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1690 if (TARGET_AAPCS_BASED
1691 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1692 error ("FPA is unsupported in the AAPCS");
1694 if (TARGET_AAPCS_BASED)
1696 if (TARGET_CALLER_INTERWORKING)
1697 error ("AAPCS does not support -mcaller-super-interworking");
1698 else
1699 if (TARGET_CALLEE_INTERWORKING)
1700 error ("AAPCS does not support -mcallee-super-interworking");
1703 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1704 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1705 will ever exist. GCC makes no attempt to support this combination. */
1706 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1707 sorry ("iWMMXt and hardware floating point");
1709 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1710 if (TARGET_THUMB2 && TARGET_IWMMXT)
1711 sorry ("Thumb-2 iWMMXt");
1713 /* __fp16 support currently assumes the core has ldrh. */
1714 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1715 sorry ("__fp16 and no ldrh");
1717 /* If soft-float is specified then don't use FPU. */
1718 if (TARGET_SOFT_FLOAT)
1719 arm_fpu_attr = FPU_NONE;
1721 if (TARGET_AAPCS_BASED)
1723 if (arm_abi == ARM_ABI_IWMMXT)
1724 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1725 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1726 && TARGET_HARD_FLOAT
1727 && TARGET_VFP)
1728 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1729 else
1730 arm_pcs_default = ARM_PCS_AAPCS;
1732 else
1734 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1735 sorry ("-mfloat-abi=hard and VFP");
1737 if (arm_abi == ARM_ABI_APCS)
1738 arm_pcs_default = ARM_PCS_APCS;
1739 else
1740 arm_pcs_default = ARM_PCS_ATPCS;
1743 /* For arm2/3 there is no need to do any scheduling if there is only
1744 a floating point emulator, or we are doing software floating-point. */
1745 if ((TARGET_SOFT_FLOAT
1746 || (TARGET_FPA && arm_fpu_desc->rev))
1747 && (tune_flags & FL_MODE32) == 0)
1748 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1750 if (target_thread_switch)
1752 if (strcmp (target_thread_switch, "soft") == 0)
1753 target_thread_pointer = TP_SOFT;
1754 else if (strcmp (target_thread_switch, "auto") == 0)
1755 target_thread_pointer = TP_AUTO;
1756 else if (strcmp (target_thread_switch, "cp15") == 0)
1757 target_thread_pointer = TP_CP15;
1758 else
1759 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1762 /* Use the cp15 method if it is available. */
1763 if (target_thread_pointer == TP_AUTO)
1765 if (arm_arch6k && !TARGET_THUMB1)
1766 target_thread_pointer = TP_CP15;
1767 else
1768 target_thread_pointer = TP_SOFT;
1771 if (TARGET_HARD_TP && TARGET_THUMB1)
1772 error ("can not use -mtp=cp15 with 16-bit Thumb");
1774 /* Override the default structure alignment for AAPCS ABI. */
1775 if (TARGET_AAPCS_BASED)
1776 arm_structure_size_boundary = 8;
1778 if (structure_size_string != NULL)
1780 int size = strtol (structure_size_string, NULL, 0);
1782 if (size == 8 || size == 32
1783 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1784 arm_structure_size_boundary = size;
1785 else
1786 warning (0, "structure size boundary can only be set to %s",
1787 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1790 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1792 error ("RTP PIC is incompatible with Thumb");
1793 flag_pic = 0;
1796 /* If stack checking is disabled, we can use r10 as the PIC register,
1797 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1798 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1800 if (TARGET_VXWORKS_RTP)
1801 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1802 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1805 if (flag_pic && TARGET_VXWORKS_RTP)
1806 arm_pic_register = 9;
1808 if (arm_pic_register_string != NULL)
1810 int pic_register = decode_reg_name (arm_pic_register_string);
1812 if (!flag_pic)
1813 warning (0, "-mpic-register= is useless without -fpic");
1815 /* Prevent the user from choosing an obviously stupid PIC register. */
1816 else if (pic_register < 0 || call_used_regs[pic_register]
1817 || pic_register == HARD_FRAME_POINTER_REGNUM
1818 || pic_register == STACK_POINTER_REGNUM
1819 || pic_register >= PC_REGNUM
1820 || (TARGET_VXWORKS_RTP
1821 && (unsigned int) pic_register != arm_pic_register))
1822 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1823 else
1824 arm_pic_register = pic_register;
1827 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1828 if (fix_cm3_ldrd == 2)
1830 if (selected_cpu == cortexm3)
1831 fix_cm3_ldrd = 1;
1832 else
1833 fix_cm3_ldrd = 0;
1836 if (TARGET_THUMB1 && flag_schedule_insns)
1838 /* Don't warn since it's on by default in -O2. */
1839 flag_schedule_insns = 0;
1842 if (optimize_size)
1844 arm_constant_limit = 1;
1846 /* If optimizing for size, bump the number of instructions that we
1847 are prepared to conditionally execute (even on a StrongARM). */
1848 max_insns_skipped = 6;
1850 else
1852 /* For processors with load scheduling, it never costs more than
1853 2 cycles to load a constant, and the load scheduler may well
1854 reduce that to 1. */
1855 if (arm_ld_sched)
1856 arm_constant_limit = 1;
1858 /* On XScale the longer latency of a load makes it more difficult
1859 to achieve a good schedule, so it's faster to synthesize
1860 constants that can be done in two insns. */
1861 if (arm_tune_xscale)
1862 arm_constant_limit = 2;
1864 /* StrongARM has early execution of branches, so a sequence
1865 that is worth skipping is shorter. */
1866 if (arm_tune_strongarm)
1867 max_insns_skipped = 3;
1870 /* Hot/Cold partitioning is not currently supported, since we can't
1871 handle literal pool placement in that case. */
1872 if (flag_reorder_blocks_and_partition)
1874 inform (input_location,
1875 "-freorder-blocks-and-partition not supported on this architecture");
1876 flag_reorder_blocks_and_partition = 0;
1877 flag_reorder_blocks = 1;
1880 /* Ideally we would want to use CFI directives to generate
1881 debug info. However this also creates the .eh_frame
1882 section, so disable them until GAS can handle
1883 this properly. See PR40521. */
1884 if (TARGET_AAPCS_BASED)
1885 flag_dwarf2_cfi_asm = 0;
1887 /* Register global variables with the garbage collector. */
1888 arm_add_gc_roots ();
1891 static void
1892 arm_add_gc_roots (void)
1894 gcc_obstack_init(&minipool_obstack);
1895 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1898 /* A table of known ARM exception types.
1899 For use with the interrupt function attribute. */
1901 typedef struct
1903 const char *const arg;
1904 const unsigned long return_value;
1906 isr_attribute_arg;
1908 static const isr_attribute_arg isr_attribute_args [] =
1910 { "IRQ", ARM_FT_ISR },
1911 { "irq", ARM_FT_ISR },
1912 { "FIQ", ARM_FT_FIQ },
1913 { "fiq", ARM_FT_FIQ },
1914 { "ABORT", ARM_FT_ISR },
1915 { "abort", ARM_FT_ISR },
1916 { "ABORT", ARM_FT_ISR },
1917 { "abort", ARM_FT_ISR },
1918 { "UNDEF", ARM_FT_EXCEPTION },
1919 { "undef", ARM_FT_EXCEPTION },
1920 { "SWI", ARM_FT_EXCEPTION },
1921 { "swi", ARM_FT_EXCEPTION },
1922 { NULL, ARM_FT_NORMAL }
1925 /* Returns the (interrupt) function type of the current
1926 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1928 static unsigned long
1929 arm_isr_value (tree argument)
1931 const isr_attribute_arg * ptr;
1932 const char * arg;
1934 if (!arm_arch_notm)
1935 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1937 /* No argument - default to IRQ. */
1938 if (argument == NULL_TREE)
1939 return ARM_FT_ISR;
1941 /* Get the value of the argument. */
1942 if (TREE_VALUE (argument) == NULL_TREE
1943 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1944 return ARM_FT_UNKNOWN;
1946 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1948 /* Check it against the list of known arguments. */
1949 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1950 if (streq (arg, ptr->arg))
1951 return ptr->return_value;
1953 /* An unrecognized interrupt type. */
1954 return ARM_FT_UNKNOWN;
1957 /* Computes the type of the current function. */
1959 static unsigned long
1960 arm_compute_func_type (void)
1962 unsigned long type = ARM_FT_UNKNOWN;
1963 tree a;
1964 tree attr;
1966 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1968 /* Decide if the current function is volatile. Such functions
1969 never return, and many memory cycles can be saved by not storing
1970 register values that will never be needed again. This optimization
1971 was added to speed up context switching in a kernel application. */
1972 if (optimize > 0
1973 && (TREE_NOTHROW (current_function_decl)
1974 || !(flag_unwind_tables
1975 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1976 && TREE_THIS_VOLATILE (current_function_decl))
1977 type |= ARM_FT_VOLATILE;
1979 if (cfun->static_chain_decl != NULL)
1980 type |= ARM_FT_NESTED;
1982 attr = DECL_ATTRIBUTES (current_function_decl);
1984 a = lookup_attribute ("naked", attr);
1985 if (a != NULL_TREE)
1986 type |= ARM_FT_NAKED;
1988 a = lookup_attribute ("isr", attr);
1989 if (a == NULL_TREE)
1990 a = lookup_attribute ("interrupt", attr);
1992 if (a == NULL_TREE)
1993 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1994 else
1995 type |= arm_isr_value (TREE_VALUE (a));
1997 return type;
2000 /* Returns the type of the current function. */
2002 unsigned long
2003 arm_current_func_type (void)
2005 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2006 cfun->machine->func_type = arm_compute_func_type ();
2008 return cfun->machine->func_type;
2011 bool
2012 arm_allocate_stack_slots_for_args (void)
2014 /* Naked functions should not allocate stack slots for arguments. */
2015 return !IS_NAKED (arm_current_func_type ());
2019 /* Output assembler code for a block containing the constant parts
2020 of a trampoline, leaving space for the variable parts.
2022 On the ARM, (if r8 is the static chain regnum, and remembering that
2023 referencing pc adds an offset of 8) the trampoline looks like:
2024 ldr r8, [pc, #0]
2025 ldr pc, [pc]
2026 .word static chain value
2027 .word function's address
2028 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2030 static void
2031 arm_asm_trampoline_template (FILE *f)
2033 if (TARGET_ARM)
2035 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2036 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2038 else if (TARGET_THUMB2)
2040 /* The Thumb-2 trampoline is similar to the arm implementation.
2041 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2042 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2043 STATIC_CHAIN_REGNUM, PC_REGNUM);
2044 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2046 else
2048 ASM_OUTPUT_ALIGN (f, 2);
2049 fprintf (f, "\t.code\t16\n");
2050 fprintf (f, ".Ltrampoline_start:\n");
2051 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2052 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2053 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2054 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2055 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2056 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2058 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2059 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2062 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2064 static void
2065 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2067 rtx fnaddr, mem, a_tramp;
2069 emit_block_move (m_tramp, assemble_trampoline_template (),
2070 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2072 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2073 emit_move_insn (mem, chain_value);
2075 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2076 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2077 emit_move_insn (mem, fnaddr);
2079 a_tramp = XEXP (m_tramp, 0);
2080 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2081 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2082 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2085 /* Thumb trampolines should be entered in thumb mode, so set
2086 the bottom bit of the address. */
2088 static rtx
2089 arm_trampoline_adjust_address (rtx addr)
2091 if (TARGET_THUMB)
2092 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2093 NULL, 0, OPTAB_LIB_WIDEN);
2094 return addr;
2097 /* Return 1 if it is possible to return using a single instruction.
2098 If SIBLING is non-null, this is a test for a return before a sibling
2099 call. SIBLING is the call insn, so we can examine its register usage. */
2102 use_return_insn (int iscond, rtx sibling)
2104 int regno;
2105 unsigned int func_type;
2106 unsigned long saved_int_regs;
2107 unsigned HOST_WIDE_INT stack_adjust;
2108 arm_stack_offsets *offsets;
2110 /* Never use a return instruction before reload has run. */
2111 if (!reload_completed)
2112 return 0;
2114 func_type = arm_current_func_type ();
2116 /* Naked, volatile and stack alignment functions need special
2117 consideration. */
2118 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2119 return 0;
2121 /* So do interrupt functions that use the frame pointer and Thumb
2122 interrupt functions. */
2123 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2124 return 0;
2126 offsets = arm_get_frame_offsets ();
2127 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2129 /* As do variadic functions. */
2130 if (crtl->args.pretend_args_size
2131 || cfun->machine->uses_anonymous_args
2132 /* Or if the function calls __builtin_eh_return () */
2133 || crtl->calls_eh_return
2134 /* Or if the function calls alloca */
2135 || cfun->calls_alloca
2136 /* Or if there is a stack adjustment. However, if the stack pointer
2137 is saved on the stack, we can use a pre-incrementing stack load. */
2138 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2139 && stack_adjust == 4)))
2140 return 0;
2142 saved_int_regs = offsets->saved_regs_mask;
2144 /* Unfortunately, the insn
2146 ldmib sp, {..., sp, ...}
2148 triggers a bug on most SA-110 based devices, such that the stack
2149 pointer won't be correctly restored if the instruction takes a
2150 page fault. We work around this problem by popping r3 along with
2151 the other registers, since that is never slower than executing
2152 another instruction.
2154 We test for !arm_arch5 here, because code for any architecture
2155 less than this could potentially be run on one of the buggy
2156 chips. */
2157 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2159 /* Validate that r3 is a call-clobbered register (always true in
2160 the default abi) ... */
2161 if (!call_used_regs[3])
2162 return 0;
2164 /* ... that it isn't being used for a return value ... */
2165 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2166 return 0;
2168 /* ... or for a tail-call argument ... */
2169 if (sibling)
2171 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2173 if (find_regno_fusage (sibling, USE, 3))
2174 return 0;
2177 /* ... and that there are no call-saved registers in r0-r2
2178 (always true in the default ABI). */
2179 if (saved_int_regs & 0x7)
2180 return 0;
2183 /* Can't be done if interworking with Thumb, and any registers have been
2184 stacked. */
2185 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2186 return 0;
2188 /* On StrongARM, conditional returns are expensive if they aren't
2189 taken and multiple registers have been stacked. */
2190 if (iscond && arm_tune_strongarm)
2192 /* Conditional return when just the LR is stored is a simple
2193 conditional-load instruction, that's not expensive. */
2194 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2195 return 0;
2197 if (flag_pic
2198 && arm_pic_register != INVALID_REGNUM
2199 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2200 return 0;
2203 /* If there are saved registers but the LR isn't saved, then we need
2204 two instructions for the return. */
2205 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2206 return 0;
2208 /* Can't be done if any of the FPA regs are pushed,
2209 since this also requires an insn. */
2210 if (TARGET_HARD_FLOAT && TARGET_FPA)
2211 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2212 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2213 return 0;
2215 /* Likewise VFP regs. */
2216 if (TARGET_HARD_FLOAT && TARGET_VFP)
2217 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2218 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2219 return 0;
2221 if (TARGET_REALLY_IWMMXT)
2222 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2223 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2224 return 0;
2226 return 1;
2229 /* Return TRUE if int I is a valid immediate ARM constant. */
2232 const_ok_for_arm (HOST_WIDE_INT i)
2234 int lowbit;
2236 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2237 be all zero, or all one. */
2238 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2239 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2240 != ((~(unsigned HOST_WIDE_INT) 0)
2241 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2242 return FALSE;
2244 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2246 /* Fast return for 0 and small values. We must do this for zero, since
2247 the code below can't handle that one case. */
2248 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2249 return TRUE;
2251 /* Get the number of trailing zeros. */
2252 lowbit = ffs((int) i) - 1;
2254 /* Only even shifts are allowed in ARM mode so round down to the
2255 nearest even number. */
2256 if (TARGET_ARM)
2257 lowbit &= ~1;
2259 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2260 return TRUE;
2262 if (TARGET_ARM)
2264 /* Allow rotated constants in ARM mode. */
2265 if (lowbit <= 4
2266 && ((i & ~0xc000003f) == 0
2267 || (i & ~0xf000000f) == 0
2268 || (i & ~0xfc000003) == 0))
2269 return TRUE;
2271 else
2273 HOST_WIDE_INT v;
2275 /* Allow repeated pattern. */
2276 v = i & 0xff;
2277 v |= v << 16;
2278 if (i == v || i == (v | (v << 8)))
2279 return TRUE;
2282 return FALSE;
2285 /* Return true if I is a valid constant for the operation CODE. */
2286 static int
2287 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2289 if (const_ok_for_arm (i))
2290 return 1;
2292 switch (code)
2294 case PLUS:
2295 case COMPARE:
2296 case EQ:
2297 case NE:
2298 case GT:
2299 case LE:
2300 case LT:
2301 case GE:
2302 case GEU:
2303 case LTU:
2304 case GTU:
2305 case LEU:
2306 case UNORDERED:
2307 case ORDERED:
2308 case UNEQ:
2309 case UNGE:
2310 case UNLT:
2311 case UNGT:
2312 case UNLE:
2313 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2315 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2316 case XOR:
2317 return 0;
2319 case IOR:
2320 if (TARGET_THUMB2)
2321 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2322 return 0;
2324 case AND:
2325 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2327 default:
2328 gcc_unreachable ();
2332 /* Emit a sequence of insns to handle a large constant.
2333 CODE is the code of the operation required, it can be any of SET, PLUS,
2334 IOR, AND, XOR, MINUS;
2335 MODE is the mode in which the operation is being performed;
2336 VAL is the integer to operate on;
2337 SOURCE is the other operand (a register, or a null-pointer for SET);
2338 SUBTARGETS means it is safe to create scratch registers if that will
2339 either produce a simpler sequence, or we will want to cse the values.
2340 Return value is the number of insns emitted. */
2342 /* ??? Tweak this for thumb2. */
2344 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2345 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2347 rtx cond;
2349 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2350 cond = COND_EXEC_TEST (PATTERN (insn));
2351 else
2352 cond = NULL_RTX;
2354 if (subtargets || code == SET
2355 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2356 && REGNO (target) != REGNO (source)))
2358 /* After arm_reorg has been called, we can't fix up expensive
2359 constants by pushing them into memory so we must synthesize
2360 them in-line, regardless of the cost. This is only likely to
2361 be more costly on chips that have load delay slots and we are
2362 compiling without running the scheduler (so no splitting
2363 occurred before the final instruction emission).
2365 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2367 if (!after_arm_reorg
2368 && !cond
2369 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2370 1, 0)
2371 > arm_constant_limit + (code != SET)))
2373 if (code == SET)
2375 /* Currently SET is the only monadic value for CODE, all
2376 the rest are diadic. */
2377 if (TARGET_USE_MOVT)
2378 arm_emit_movpair (target, GEN_INT (val));
2379 else
2380 emit_set_insn (target, GEN_INT (val));
2382 return 1;
2384 else
2386 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2388 if (TARGET_USE_MOVT)
2389 arm_emit_movpair (temp, GEN_INT (val));
2390 else
2391 emit_set_insn (temp, GEN_INT (val));
2393 /* For MINUS, the value is subtracted from, since we never
2394 have subtraction of a constant. */
2395 if (code == MINUS)
2396 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2397 else
2398 emit_set_insn (target,
2399 gen_rtx_fmt_ee (code, mode, source, temp));
2400 return 2;
2405 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2409 /* Return the number of instructions required to synthesize the given
2410 constant, if we start emitting them from bit-position I. */
2411 static int
2412 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2414 HOST_WIDE_INT temp1;
2415 int step_size = TARGET_ARM ? 2 : 1;
2416 int num_insns = 0;
2418 gcc_assert (TARGET_ARM || i == 0);
2422 int end;
2424 if (i <= 0)
2425 i += 32;
2426 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2428 end = i - 8;
2429 if (end < 0)
2430 end += 32;
2431 temp1 = remainder & ((0x0ff << end)
2432 | ((i < end) ? (0xff >> (32 - end)) : 0));
2433 remainder &= ~temp1;
2434 num_insns++;
2435 i -= 8 - step_size;
2437 i -= step_size;
2438 } while (remainder);
2439 return num_insns;
2442 static int
2443 find_best_start (unsigned HOST_WIDE_INT remainder)
2445 int best_consecutive_zeros = 0;
2446 int i;
2447 int best_start = 0;
2449 /* If we aren't targetting ARM, the best place to start is always at
2450 the bottom. */
2451 if (! TARGET_ARM)
2452 return 0;
2454 for (i = 0; i < 32; i += 2)
2456 int consecutive_zeros = 0;
2458 if (!(remainder & (3 << i)))
2460 while ((i < 32) && !(remainder & (3 << i)))
2462 consecutive_zeros += 2;
2463 i += 2;
2465 if (consecutive_zeros > best_consecutive_zeros)
2467 best_consecutive_zeros = consecutive_zeros;
2468 best_start = i - consecutive_zeros;
2470 i -= 2;
2474 /* So long as it won't require any more insns to do so, it's
2475 desirable to emit a small constant (in bits 0...9) in the last
2476 insn. This way there is more chance that it can be combined with
2477 a later addressing insn to form a pre-indexed load or store
2478 operation. Consider:
2480 *((volatile int *)0xe0000100) = 1;
2481 *((volatile int *)0xe0000110) = 2;
2483 We want this to wind up as:
2485 mov rA, #0xe0000000
2486 mov rB, #1
2487 str rB, [rA, #0x100]
2488 mov rB, #2
2489 str rB, [rA, #0x110]
2491 rather than having to synthesize both large constants from scratch.
2493 Therefore, we calculate how many insns would be required to emit
2494 the constant starting from `best_start', and also starting from
2495 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2496 yield a shorter sequence, we may as well use zero. */
2497 if (best_start != 0
2498 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2499 && (count_insns_for_constant (remainder, 0) <=
2500 count_insns_for_constant (remainder, best_start)))
2501 best_start = 0;
2503 return best_start;
2506 /* Emit an instruction with the indicated PATTERN. If COND is
2507 non-NULL, conditionalize the execution of the instruction on COND
2508 being true. */
2510 static void
2511 emit_constant_insn (rtx cond, rtx pattern)
2513 if (cond)
2514 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2515 emit_insn (pattern);
2518 /* As above, but extra parameter GENERATE which, if clear, suppresses
2519 RTL generation. */
2520 /* ??? This needs more work for thumb2. */
2522 static int
2523 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2524 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2525 int generate)
2527 int can_invert = 0;
2528 int can_negate = 0;
2529 int final_invert = 0;
2530 int can_negate_initial = 0;
2531 int can_shift = 0;
2532 int i;
2533 int num_bits_set = 0;
2534 int set_sign_bit_copies = 0;
2535 int clear_sign_bit_copies = 0;
2536 int clear_zero_bit_copies = 0;
2537 int set_zero_bit_copies = 0;
2538 int insns = 0;
2539 unsigned HOST_WIDE_INT temp1, temp2;
2540 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2541 int step_size = TARGET_ARM ? 2 : 1;
2543 /* Find out which operations are safe for a given CODE. Also do a quick
2544 check for degenerate cases; these can occur when DImode operations
2545 are split. */
2546 switch (code)
2548 case SET:
2549 can_invert = 1;
2550 can_shift = 1;
2551 can_negate = 1;
2552 break;
2554 case PLUS:
2555 can_negate = 1;
2556 can_negate_initial = 1;
2557 break;
2559 case IOR:
2560 if (remainder == 0xffffffff)
2562 if (generate)
2563 emit_constant_insn (cond,
2564 gen_rtx_SET (VOIDmode, target,
2565 GEN_INT (ARM_SIGN_EXTEND (val))));
2566 return 1;
2569 if (remainder == 0)
2571 if (reload_completed && rtx_equal_p (target, source))
2572 return 0;
2574 if (generate)
2575 emit_constant_insn (cond,
2576 gen_rtx_SET (VOIDmode, target, source));
2577 return 1;
2580 if (TARGET_THUMB2)
2581 can_invert = 1;
2582 break;
2584 case AND:
2585 if (remainder == 0)
2587 if (generate)
2588 emit_constant_insn (cond,
2589 gen_rtx_SET (VOIDmode, target, const0_rtx));
2590 return 1;
2592 if (remainder == 0xffffffff)
2594 if (reload_completed && rtx_equal_p (target, source))
2595 return 0;
2596 if (generate)
2597 emit_constant_insn (cond,
2598 gen_rtx_SET (VOIDmode, target, source));
2599 return 1;
2601 can_invert = 1;
2602 break;
2604 case XOR:
2605 if (remainder == 0)
2607 if (reload_completed && rtx_equal_p (target, source))
2608 return 0;
2609 if (generate)
2610 emit_constant_insn (cond,
2611 gen_rtx_SET (VOIDmode, target, source));
2612 return 1;
2615 if (remainder == 0xffffffff)
2617 if (generate)
2618 emit_constant_insn (cond,
2619 gen_rtx_SET (VOIDmode, target,
2620 gen_rtx_NOT (mode, source)));
2621 return 1;
2623 break;
2625 case MINUS:
2626 /* We treat MINUS as (val - source), since (source - val) is always
2627 passed as (source + (-val)). */
2628 if (remainder == 0)
2630 if (generate)
2631 emit_constant_insn (cond,
2632 gen_rtx_SET (VOIDmode, target,
2633 gen_rtx_NEG (mode, source)));
2634 return 1;
2636 if (const_ok_for_arm (val))
2638 if (generate)
2639 emit_constant_insn (cond,
2640 gen_rtx_SET (VOIDmode, target,
2641 gen_rtx_MINUS (mode, GEN_INT (val),
2642 source)));
2643 return 1;
2645 can_negate = 1;
2647 break;
2649 default:
2650 gcc_unreachable ();
2653 /* If we can do it in one insn get out quickly. */
2654 if (const_ok_for_arm (val)
2655 || (can_negate_initial && const_ok_for_arm (-val))
2656 || (can_invert && const_ok_for_arm (~val)))
2658 if (generate)
2659 emit_constant_insn (cond,
2660 gen_rtx_SET (VOIDmode, target,
2661 (source
2662 ? gen_rtx_fmt_ee (code, mode, source,
2663 GEN_INT (val))
2664 : GEN_INT (val))));
2665 return 1;
2668 /* Calculate a few attributes that may be useful for specific
2669 optimizations. */
2670 /* Count number of leading zeros. */
2671 for (i = 31; i >= 0; i--)
2673 if ((remainder & (1 << i)) == 0)
2674 clear_sign_bit_copies++;
2675 else
2676 break;
2679 /* Count number of leading 1's. */
2680 for (i = 31; i >= 0; i--)
2682 if ((remainder & (1 << i)) != 0)
2683 set_sign_bit_copies++;
2684 else
2685 break;
2688 /* Count number of trailing zero's. */
2689 for (i = 0; i <= 31; i++)
2691 if ((remainder & (1 << i)) == 0)
2692 clear_zero_bit_copies++;
2693 else
2694 break;
2697 /* Count number of trailing 1's. */
2698 for (i = 0; i <= 31; i++)
2700 if ((remainder & (1 << i)) != 0)
2701 set_zero_bit_copies++;
2702 else
2703 break;
2706 switch (code)
2708 case SET:
2709 /* See if we can use movw. */
2710 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2712 if (generate)
2713 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2714 GEN_INT (val)));
2715 return 1;
2718 /* See if we can do this by sign_extending a constant that is known
2719 to be negative. This is a good, way of doing it, since the shift
2720 may well merge into a subsequent insn. */
2721 if (set_sign_bit_copies > 1)
2723 if (const_ok_for_arm
2724 (temp1 = ARM_SIGN_EXTEND (remainder
2725 << (set_sign_bit_copies - 1))))
2727 if (generate)
2729 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2730 emit_constant_insn (cond,
2731 gen_rtx_SET (VOIDmode, new_src,
2732 GEN_INT (temp1)));
2733 emit_constant_insn (cond,
2734 gen_ashrsi3 (target, new_src,
2735 GEN_INT (set_sign_bit_copies - 1)));
2737 return 2;
2739 /* For an inverted constant, we will need to set the low bits,
2740 these will be shifted out of harm's way. */
2741 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2742 if (const_ok_for_arm (~temp1))
2744 if (generate)
2746 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2747 emit_constant_insn (cond,
2748 gen_rtx_SET (VOIDmode, new_src,
2749 GEN_INT (temp1)));
2750 emit_constant_insn (cond,
2751 gen_ashrsi3 (target, new_src,
2752 GEN_INT (set_sign_bit_copies - 1)));
2754 return 2;
2758 /* See if we can calculate the value as the difference between two
2759 valid immediates. */
2760 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2762 int topshift = clear_sign_bit_copies & ~1;
2764 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2765 & (0xff000000 >> topshift));
2767 /* If temp1 is zero, then that means the 9 most significant
2768 bits of remainder were 1 and we've caused it to overflow.
2769 When topshift is 0 we don't need to do anything since we
2770 can borrow from 'bit 32'. */
2771 if (temp1 == 0 && topshift != 0)
2772 temp1 = 0x80000000 >> (topshift - 1);
2774 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2776 if (const_ok_for_arm (temp2))
2778 if (generate)
2780 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2781 emit_constant_insn (cond,
2782 gen_rtx_SET (VOIDmode, new_src,
2783 GEN_INT (temp1)));
2784 emit_constant_insn (cond,
2785 gen_addsi3 (target, new_src,
2786 GEN_INT (-temp2)));
2789 return 2;
2793 /* See if we can generate this by setting the bottom (or the top)
2794 16 bits, and then shifting these into the other half of the
2795 word. We only look for the simplest cases, to do more would cost
2796 too much. Be careful, however, not to generate this when the
2797 alternative would take fewer insns. */
2798 if (val & 0xffff0000)
2800 temp1 = remainder & 0xffff0000;
2801 temp2 = remainder & 0x0000ffff;
2803 /* Overlaps outside this range are best done using other methods. */
2804 for (i = 9; i < 24; i++)
2806 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2807 && !const_ok_for_arm (temp2))
2809 rtx new_src = (subtargets
2810 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2811 : target);
2812 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2813 source, subtargets, generate);
2814 source = new_src;
2815 if (generate)
2816 emit_constant_insn
2817 (cond,
2818 gen_rtx_SET
2819 (VOIDmode, target,
2820 gen_rtx_IOR (mode,
2821 gen_rtx_ASHIFT (mode, source,
2822 GEN_INT (i)),
2823 source)));
2824 return insns + 1;
2828 /* Don't duplicate cases already considered. */
2829 for (i = 17; i < 24; i++)
2831 if (((temp1 | (temp1 >> i)) == remainder)
2832 && !const_ok_for_arm (temp1))
2834 rtx new_src = (subtargets
2835 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2836 : target);
2837 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2838 source, subtargets, generate);
2839 source = new_src;
2840 if (generate)
2841 emit_constant_insn
2842 (cond,
2843 gen_rtx_SET (VOIDmode, target,
2844 gen_rtx_IOR
2845 (mode,
2846 gen_rtx_LSHIFTRT (mode, source,
2847 GEN_INT (i)),
2848 source)));
2849 return insns + 1;
2853 break;
2855 case IOR:
2856 case XOR:
2857 /* If we have IOR or XOR, and the constant can be loaded in a
2858 single instruction, and we can find a temporary to put it in,
2859 then this can be done in two instructions instead of 3-4. */
2860 if (subtargets
2861 /* TARGET can't be NULL if SUBTARGETS is 0 */
2862 || (reload_completed && !reg_mentioned_p (target, source)))
2864 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2866 if (generate)
2868 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2870 emit_constant_insn (cond,
2871 gen_rtx_SET (VOIDmode, sub,
2872 GEN_INT (val)));
2873 emit_constant_insn (cond,
2874 gen_rtx_SET (VOIDmode, target,
2875 gen_rtx_fmt_ee (code, mode,
2876 source, sub)));
2878 return 2;
2882 if (code == XOR)
2883 break;
2885 /* Convert.
2886 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2887 and the remainder 0s for e.g. 0xfff00000)
2888 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2890 This can be done in 2 instructions by using shifts with mov or mvn.
2891 e.g. for
2892 x = x | 0xfff00000;
2893 we generate.
2894 mvn r0, r0, asl #12
2895 mvn r0, r0, lsr #12 */
2896 if (set_sign_bit_copies > 8
2897 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2899 if (generate)
2901 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2902 rtx shift = GEN_INT (set_sign_bit_copies);
2904 emit_constant_insn
2905 (cond,
2906 gen_rtx_SET (VOIDmode, sub,
2907 gen_rtx_NOT (mode,
2908 gen_rtx_ASHIFT (mode,
2909 source,
2910 shift))));
2911 emit_constant_insn
2912 (cond,
2913 gen_rtx_SET (VOIDmode, target,
2914 gen_rtx_NOT (mode,
2915 gen_rtx_LSHIFTRT (mode, sub,
2916 shift))));
2918 return 2;
2921 /* Convert
2922 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2924 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2926 For eg. r0 = r0 | 0xfff
2927 mvn r0, r0, lsr #12
2928 mvn r0, r0, asl #12
2931 if (set_zero_bit_copies > 8
2932 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2934 if (generate)
2936 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2937 rtx shift = GEN_INT (set_zero_bit_copies);
2939 emit_constant_insn
2940 (cond,
2941 gen_rtx_SET (VOIDmode, sub,
2942 gen_rtx_NOT (mode,
2943 gen_rtx_LSHIFTRT (mode,
2944 source,
2945 shift))));
2946 emit_constant_insn
2947 (cond,
2948 gen_rtx_SET (VOIDmode, target,
2949 gen_rtx_NOT (mode,
2950 gen_rtx_ASHIFT (mode, sub,
2951 shift))));
2953 return 2;
2956 /* This will never be reached for Thumb2 because orn is a valid
2957 instruction. This is for Thumb1 and the ARM 32 bit cases.
2959 x = y | constant (such that ~constant is a valid constant)
2960 Transform this to
2961 x = ~(~y & ~constant).
2963 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2965 if (generate)
2967 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2968 emit_constant_insn (cond,
2969 gen_rtx_SET (VOIDmode, sub,
2970 gen_rtx_NOT (mode, source)));
2971 source = sub;
2972 if (subtargets)
2973 sub = gen_reg_rtx (mode);
2974 emit_constant_insn (cond,
2975 gen_rtx_SET (VOIDmode, sub,
2976 gen_rtx_AND (mode, source,
2977 GEN_INT (temp1))));
2978 emit_constant_insn (cond,
2979 gen_rtx_SET (VOIDmode, target,
2980 gen_rtx_NOT (mode, sub)));
2982 return 3;
2984 break;
2986 case AND:
2987 /* See if two shifts will do 2 or more insn's worth of work. */
2988 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2990 HOST_WIDE_INT shift_mask = ((0xffffffff
2991 << (32 - clear_sign_bit_copies))
2992 & 0xffffffff);
2994 if ((remainder | shift_mask) != 0xffffffff)
2996 if (generate)
2998 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2999 insns = arm_gen_constant (AND, mode, cond,
3000 remainder | shift_mask,
3001 new_src, source, subtargets, 1);
3002 source = new_src;
3004 else
3006 rtx targ = subtargets ? NULL_RTX : target;
3007 insns = arm_gen_constant (AND, mode, cond,
3008 remainder | shift_mask,
3009 targ, source, subtargets, 0);
3013 if (generate)
3015 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3016 rtx shift = GEN_INT (clear_sign_bit_copies);
3018 emit_insn (gen_ashlsi3 (new_src, source, shift));
3019 emit_insn (gen_lshrsi3 (target, new_src, shift));
3022 return insns + 2;
3025 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3027 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3029 if ((remainder | shift_mask) != 0xffffffff)
3031 if (generate)
3033 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3035 insns = arm_gen_constant (AND, mode, cond,
3036 remainder | shift_mask,
3037 new_src, source, subtargets, 1);
3038 source = new_src;
3040 else
3042 rtx targ = subtargets ? NULL_RTX : target;
3044 insns = arm_gen_constant (AND, mode, cond,
3045 remainder | shift_mask,
3046 targ, source, subtargets, 0);
3050 if (generate)
3052 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3053 rtx shift = GEN_INT (clear_zero_bit_copies);
3055 emit_insn (gen_lshrsi3 (new_src, source, shift));
3056 emit_insn (gen_ashlsi3 (target, new_src, shift));
3059 return insns + 2;
3062 break;
3064 default:
3065 break;
3068 for (i = 0; i < 32; i++)
3069 if (remainder & (1 << i))
3070 num_bits_set++;
3072 if ((code == AND)
3073 || (code != IOR && can_invert && num_bits_set > 16))
3074 remainder ^= 0xffffffff;
3075 else if (code == PLUS && num_bits_set > 16)
3076 remainder = (-remainder) & 0xffffffff;
3078 /* For XOR, if more than half the bits are set and there's a sequence
3079 of more than 8 consecutive ones in the pattern then we can XOR by the
3080 inverted constant and then invert the final result; this may save an
3081 instruction and might also lead to the final mvn being merged with
3082 some other operation. */
3083 else if (code == XOR && num_bits_set > 16
3084 && (count_insns_for_constant (remainder ^ 0xffffffff,
3085 find_best_start
3086 (remainder ^ 0xffffffff))
3087 < count_insns_for_constant (remainder,
3088 find_best_start (remainder))))
3090 remainder ^= 0xffffffff;
3091 final_invert = 1;
3093 else
3095 can_invert = 0;
3096 can_negate = 0;
3099 /* Now try and find a way of doing the job in either two or three
3100 instructions.
3101 We start by looking for the largest block of zeros that are aligned on
3102 a 2-bit boundary, we then fill up the temps, wrapping around to the
3103 top of the word when we drop off the bottom.
3104 In the worst case this code should produce no more than four insns.
3105 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3106 best place to start. */
3108 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3109 the same. */
3111 /* Now start emitting the insns. */
3112 i = find_best_start (remainder);
3115 int end;
3117 if (i <= 0)
3118 i += 32;
3119 if (remainder & (3 << (i - 2)))
3121 end = i - 8;
3122 if (end < 0)
3123 end += 32;
3124 temp1 = remainder & ((0x0ff << end)
3125 | ((i < end) ? (0xff >> (32 - end)) : 0));
3126 remainder &= ~temp1;
3128 if (generate)
3130 rtx new_src, temp1_rtx;
3132 if (code == SET || code == MINUS)
3134 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3135 if (can_invert && code != MINUS)
3136 temp1 = ~temp1;
3138 else
3140 if ((final_invert || remainder) && subtargets)
3141 new_src = gen_reg_rtx (mode);
3142 else
3143 new_src = target;
3144 if (can_invert)
3145 temp1 = ~temp1;
3146 else if (can_negate)
3147 temp1 = -temp1;
3150 temp1 = trunc_int_for_mode (temp1, mode);
3151 temp1_rtx = GEN_INT (temp1);
3153 if (code == SET)
3155 else if (code == MINUS)
3156 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3157 else
3158 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3160 emit_constant_insn (cond,
3161 gen_rtx_SET (VOIDmode, new_src,
3162 temp1_rtx));
3163 source = new_src;
3166 if (code == SET)
3168 can_invert = 0;
3169 code = PLUS;
3171 else if (code == MINUS)
3172 code = PLUS;
3174 insns++;
3175 i -= 8 - step_size;
3177 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3178 shifts. */
3179 i -= step_size;
3181 while (remainder);
3184 if (final_invert)
3186 if (generate)
3187 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3188 gen_rtx_NOT (mode, source)));
3189 insns++;
3192 return insns;
3195 /* Canonicalize a comparison so that we are more likely to recognize it.
3196 This can be done for a few constant compares, where we can make the
3197 immediate value easier to load. */
3199 enum rtx_code
3200 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3201 rtx * op1)
3203 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3204 unsigned HOST_WIDE_INT maxval;
3205 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3207 switch (code)
3209 case EQ:
3210 case NE:
3211 return code;
3213 case GT:
3214 case LE:
3215 if (i != maxval
3216 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3218 *op1 = GEN_INT (i + 1);
3219 return code == GT ? GE : LT;
3221 break;
3223 case GE:
3224 case LT:
3225 if (i != ~maxval
3226 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3228 *op1 = GEN_INT (i - 1);
3229 return code == GE ? GT : LE;
3231 break;
3233 case GTU:
3234 case LEU:
3235 if (i != ~((unsigned HOST_WIDE_INT) 0)
3236 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3238 *op1 = GEN_INT (i + 1);
3239 return code == GTU ? GEU : LTU;
3241 break;
3243 case GEU:
3244 case LTU:
3245 if (i != 0
3246 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3248 *op1 = GEN_INT (i - 1);
3249 return code == GEU ? GTU : LEU;
3251 break;
3253 default:
3254 gcc_unreachable ();
3257 return code;
3261 /* Define how to find the value returned by a function. */
3263 static rtx
3264 arm_function_value(const_tree type, const_tree func,
3265 bool outgoing ATTRIBUTE_UNUSED)
3267 enum machine_mode mode;
3268 int unsignedp ATTRIBUTE_UNUSED;
3269 rtx r ATTRIBUTE_UNUSED;
3271 mode = TYPE_MODE (type);
3273 if (TARGET_AAPCS_BASED)
3274 return aapcs_allocate_return_reg (mode, type, func);
3276 /* Promote integer types. */
3277 if (INTEGRAL_TYPE_P (type))
3278 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3280 /* Promotes small structs returned in a register to full-word size
3281 for big-endian AAPCS. */
3282 if (arm_return_in_msb (type))
3284 HOST_WIDE_INT size = int_size_in_bytes (type);
3285 if (size % UNITS_PER_WORD != 0)
3287 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3288 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3292 return LIBCALL_VALUE (mode);
3295 static int
3296 libcall_eq (const void *p1, const void *p2)
3298 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3301 static hashval_t
3302 libcall_hash (const void *p1)
3304 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3307 static void
3308 add_libcall (htab_t htab, rtx libcall)
3310 *htab_find_slot (htab, libcall, INSERT) = libcall;
3313 static bool
3314 arm_libcall_uses_aapcs_base (const_rtx libcall)
3316 static bool init_done = false;
3317 static htab_t libcall_htab;
3319 if (!init_done)
3321 init_done = true;
3323 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3324 NULL);
3325 add_libcall (libcall_htab,
3326 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3327 add_libcall (libcall_htab,
3328 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3329 add_libcall (libcall_htab,
3330 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3331 add_libcall (libcall_htab,
3332 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3334 add_libcall (libcall_htab,
3335 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3336 add_libcall (libcall_htab,
3337 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3338 add_libcall (libcall_htab,
3339 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3340 add_libcall (libcall_htab,
3341 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3343 add_libcall (libcall_htab,
3344 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3345 add_libcall (libcall_htab,
3346 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3347 add_libcall (libcall_htab,
3348 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3349 add_libcall (libcall_htab,
3350 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3351 add_libcall (libcall_htab,
3352 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3353 add_libcall (libcall_htab,
3354 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3357 return libcall && htab_find (libcall_htab, libcall) != NULL;
3361 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3363 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3364 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3366 /* The following libcalls return their result in integer registers,
3367 even though they return a floating point value. */
3368 if (arm_libcall_uses_aapcs_base (libcall))
3369 return gen_rtx_REG (mode, ARG_REGISTER(1));
3373 return LIBCALL_VALUE (mode);
3376 /* Determine the amount of memory needed to store the possible return
3377 registers of an untyped call. */
3379 arm_apply_result_size (void)
3381 int size = 16;
3383 if (TARGET_32BIT)
3385 if (TARGET_HARD_FLOAT_ABI)
3387 if (TARGET_VFP)
3388 size += 32;
3389 if (TARGET_FPA)
3390 size += 12;
3391 if (TARGET_MAVERICK)
3392 size += 8;
3394 if (TARGET_IWMMXT_ABI)
3395 size += 8;
3398 return size;
3401 /* Decide whether TYPE should be returned in memory (true)
3402 or in a register (false). FNTYPE is the type of the function making
3403 the call. */
3404 static bool
3405 arm_return_in_memory (const_tree type, const_tree fntype)
3407 HOST_WIDE_INT size;
3409 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3411 if (TARGET_AAPCS_BASED)
3413 /* Simple, non-aggregate types (ie not including vectors and
3414 complex) are always returned in a register (or registers).
3415 We don't care about which register here, so we can short-cut
3416 some of the detail. */
3417 if (!AGGREGATE_TYPE_P (type)
3418 && TREE_CODE (type) != VECTOR_TYPE
3419 && TREE_CODE (type) != COMPLEX_TYPE)
3420 return false;
3422 /* Any return value that is no larger than one word can be
3423 returned in r0. */
3424 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3425 return false;
3427 /* Check any available co-processors to see if they accept the
3428 type as a register candidate (VFP, for example, can return
3429 some aggregates in consecutive registers). These aren't
3430 available if the call is variadic. */
3431 if (aapcs_select_return_coproc (type, fntype) >= 0)
3432 return false;
3434 /* Vector values should be returned using ARM registers, not
3435 memory (unless they're over 16 bytes, which will break since
3436 we only have four call-clobbered registers to play with). */
3437 if (TREE_CODE (type) == VECTOR_TYPE)
3438 return (size < 0 || size > (4 * UNITS_PER_WORD));
3440 /* The rest go in memory. */
3441 return true;
3444 if (TREE_CODE (type) == VECTOR_TYPE)
3445 return (size < 0 || size > (4 * UNITS_PER_WORD));
3447 if (!AGGREGATE_TYPE_P (type) &&
3448 (TREE_CODE (type) != VECTOR_TYPE))
3449 /* All simple types are returned in registers. */
3450 return false;
3452 if (arm_abi != ARM_ABI_APCS)
3454 /* ATPCS and later return aggregate types in memory only if they are
3455 larger than a word (or are variable size). */
3456 return (size < 0 || size > UNITS_PER_WORD);
3459 /* For the arm-wince targets we choose to be compatible with Microsoft's
3460 ARM and Thumb compilers, which always return aggregates in memory. */
3461 #ifndef ARM_WINCE
3462 /* All structures/unions bigger than one word are returned in memory.
3463 Also catch the case where int_size_in_bytes returns -1. In this case
3464 the aggregate is either huge or of variable size, and in either case
3465 we will want to return it via memory and not in a register. */
3466 if (size < 0 || size > UNITS_PER_WORD)
3467 return true;
3469 if (TREE_CODE (type) == RECORD_TYPE)
3471 tree field;
3473 /* For a struct the APCS says that we only return in a register
3474 if the type is 'integer like' and every addressable element
3475 has an offset of zero. For practical purposes this means
3476 that the structure can have at most one non bit-field element
3477 and that this element must be the first one in the structure. */
3479 /* Find the first field, ignoring non FIELD_DECL things which will
3480 have been created by C++. */
3481 for (field = TYPE_FIELDS (type);
3482 field && TREE_CODE (field) != FIELD_DECL;
3483 field = TREE_CHAIN (field))
3484 continue;
3486 if (field == NULL)
3487 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3489 /* Check that the first field is valid for returning in a register. */
3491 /* ... Floats are not allowed */
3492 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3493 return true;
3495 /* ... Aggregates that are not themselves valid for returning in
3496 a register are not allowed. */
3497 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3498 return true;
3500 /* Now check the remaining fields, if any. Only bitfields are allowed,
3501 since they are not addressable. */
3502 for (field = TREE_CHAIN (field);
3503 field;
3504 field = TREE_CHAIN (field))
3506 if (TREE_CODE (field) != FIELD_DECL)
3507 continue;
3509 if (!DECL_BIT_FIELD_TYPE (field))
3510 return true;
3513 return false;
3516 if (TREE_CODE (type) == UNION_TYPE)
3518 tree field;
3520 /* Unions can be returned in registers if every element is
3521 integral, or can be returned in an integer register. */
3522 for (field = TYPE_FIELDS (type);
3523 field;
3524 field = TREE_CHAIN (field))
3526 if (TREE_CODE (field) != FIELD_DECL)
3527 continue;
3529 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3530 return true;
3532 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3533 return true;
3536 return false;
3538 #endif /* not ARM_WINCE */
3540 /* Return all other types in memory. */
3541 return true;
3544 /* Indicate whether or not words of a double are in big-endian order. */
3547 arm_float_words_big_endian (void)
3549 if (TARGET_MAVERICK)
3550 return 0;
3552 /* For FPA, float words are always big-endian. For VFP, floats words
3553 follow the memory system mode. */
3555 if (TARGET_FPA)
3557 return 1;
3560 if (TARGET_VFP)
3561 return (TARGET_BIG_END ? 1 : 0);
3563 return 1;
3566 const struct pcs_attribute_arg
3568 const char *arg;
3569 enum arm_pcs value;
3570 } pcs_attribute_args[] =
3572 {"aapcs", ARM_PCS_AAPCS},
3573 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3574 #if 0
3575 /* We could recognize these, but changes would be needed elsewhere
3576 * to implement them. */
3577 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3578 {"atpcs", ARM_PCS_ATPCS},
3579 {"apcs", ARM_PCS_APCS},
3580 #endif
3581 {NULL, ARM_PCS_UNKNOWN}
3584 static enum arm_pcs
3585 arm_pcs_from_attribute (tree attr)
3587 const struct pcs_attribute_arg *ptr;
3588 const char *arg;
3590 /* Get the value of the argument. */
3591 if (TREE_VALUE (attr) == NULL_TREE
3592 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3593 return ARM_PCS_UNKNOWN;
3595 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3597 /* Check it against the list of known arguments. */
3598 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3599 if (streq (arg, ptr->arg))
3600 return ptr->value;
3602 /* An unrecognized interrupt type. */
3603 return ARM_PCS_UNKNOWN;
3606 /* Get the PCS variant to use for this call. TYPE is the function's type
3607 specification, DECL is the specific declartion. DECL may be null if
3608 the call could be indirect or if this is a library call. */
3609 static enum arm_pcs
3610 arm_get_pcs_model (const_tree type, const_tree decl)
3612 bool user_convention = false;
3613 enum arm_pcs user_pcs = arm_pcs_default;
3614 tree attr;
3616 gcc_assert (type);
3618 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3619 if (attr)
3621 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3622 user_convention = true;
3625 if (TARGET_AAPCS_BASED)
3627 /* Detect varargs functions. These always use the base rules
3628 (no argument is ever a candidate for a co-processor
3629 register). */
3630 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3631 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3632 != void_type_node));
3634 if (user_convention)
3636 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3637 sorry ("Non-AAPCS derived PCS variant");
3638 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3639 error ("Variadic functions must use the base AAPCS variant");
3642 if (base_rules)
3643 return ARM_PCS_AAPCS;
3644 else if (user_convention)
3645 return user_pcs;
3646 else if (decl && flag_unit_at_a_time)
3648 /* Local functions never leak outside this compilation unit,
3649 so we are free to use whatever conventions are
3650 appropriate. */
3651 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3652 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3653 if (i && i->local)
3654 return ARM_PCS_AAPCS_LOCAL;
3657 else if (user_convention && user_pcs != arm_pcs_default)
3658 sorry ("PCS variant");
3660 /* For everything else we use the target's default. */
3661 return arm_pcs_default;
3665 static void
3666 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3667 const_tree fntype ATTRIBUTE_UNUSED,
3668 rtx libcall ATTRIBUTE_UNUSED,
3669 const_tree fndecl ATTRIBUTE_UNUSED)
3671 /* Record the unallocated VFP registers. */
3672 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3673 pcum->aapcs_vfp_reg_alloc = 0;
3676 /* Walk down the type tree of TYPE counting consecutive base elements.
3677 If *MODEP is VOIDmode, then set it to the first valid floating point
3678 type. If a non-floating point type is found, or if a floating point
3679 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3680 otherwise return the count in the sub-tree. */
3681 static int
3682 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3684 enum machine_mode mode;
3685 HOST_WIDE_INT size;
3687 switch (TREE_CODE (type))
3689 case REAL_TYPE:
3690 mode = TYPE_MODE (type);
3691 if (mode != DFmode && mode != SFmode)
3692 return -1;
3694 if (*modep == VOIDmode)
3695 *modep = mode;
3697 if (*modep == mode)
3698 return 1;
3700 break;
3702 case COMPLEX_TYPE:
3703 mode = TYPE_MODE (TREE_TYPE (type));
3704 if (mode != DFmode && mode != SFmode)
3705 return -1;
3707 if (*modep == VOIDmode)
3708 *modep = mode;
3710 if (*modep == mode)
3711 return 2;
3713 break;
3715 case VECTOR_TYPE:
3716 /* Use V2SImode and V4SImode as representatives of all 64-bit
3717 and 128-bit vector types, whether or not those modes are
3718 supported with the present options. */
3719 size = int_size_in_bytes (type);
3720 switch (size)
3722 case 8:
3723 mode = V2SImode;
3724 break;
3725 case 16:
3726 mode = V4SImode;
3727 break;
3728 default:
3729 return -1;
3732 if (*modep == VOIDmode)
3733 *modep = mode;
3735 /* Vector modes are considered to be opaque: two vectors are
3736 equivalent for the purposes of being homogeneous aggregates
3737 if they are the same size. */
3738 if (*modep == mode)
3739 return 1;
3741 break;
3743 case ARRAY_TYPE:
3745 int count;
3746 tree index = TYPE_DOMAIN (type);
3748 /* Can't handle incomplete types. */
3749 if (!COMPLETE_TYPE_P(type))
3750 return -1;
3752 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3753 if (count == -1
3754 || !index
3755 || !TYPE_MAX_VALUE (index)
3756 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3757 || !TYPE_MIN_VALUE (index)
3758 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3759 || count < 0)
3760 return -1;
3762 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3763 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3765 /* There must be no padding. */
3766 if (!host_integerp (TYPE_SIZE (type), 1)
3767 || (tree_low_cst (TYPE_SIZE (type), 1)
3768 != count * GET_MODE_BITSIZE (*modep)))
3769 return -1;
3771 return count;
3774 case RECORD_TYPE:
3776 int count = 0;
3777 int sub_count;
3778 tree field;
3780 /* Can't handle incomplete types. */
3781 if (!COMPLETE_TYPE_P(type))
3782 return -1;
3784 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3786 if (TREE_CODE (field) != FIELD_DECL)
3787 continue;
3789 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3790 if (sub_count < 0)
3791 return -1;
3792 count += sub_count;
3795 /* There must be no padding. */
3796 if (!host_integerp (TYPE_SIZE (type), 1)
3797 || (tree_low_cst (TYPE_SIZE (type), 1)
3798 != count * GET_MODE_BITSIZE (*modep)))
3799 return -1;
3801 return count;
3804 case UNION_TYPE:
3805 case QUAL_UNION_TYPE:
3807 /* These aren't very interesting except in a degenerate case. */
3808 int count = 0;
3809 int sub_count;
3810 tree field;
3812 /* Can't handle incomplete types. */
3813 if (!COMPLETE_TYPE_P(type))
3814 return -1;
3816 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3818 if (TREE_CODE (field) != FIELD_DECL)
3819 continue;
3821 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3822 if (sub_count < 0)
3823 return -1;
3824 count = count > sub_count ? count : sub_count;
3827 /* There must be no padding. */
3828 if (!host_integerp (TYPE_SIZE (type), 1)
3829 || (tree_low_cst (TYPE_SIZE (type), 1)
3830 != count * GET_MODE_BITSIZE (*modep)))
3831 return -1;
3833 return count;
3836 default:
3837 break;
3840 return -1;
3843 /* Return true if PCS_VARIANT should use VFP registers. */
3844 static bool
3845 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3847 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3848 return true;
3850 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3851 return false;
3853 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3854 (TARGET_VFP_DOUBLE || !is_double));
3857 static bool
3858 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3859 enum machine_mode mode, const_tree type,
3860 enum machine_mode *base_mode, int *count)
3862 enum machine_mode new_mode = VOIDmode;
3864 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3865 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3866 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3868 *count = 1;
3869 new_mode = mode;
3871 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3873 *count = 2;
3874 new_mode = (mode == DCmode ? DFmode : SFmode);
3876 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3878 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3880 if (ag_count > 0 && ag_count <= 4)
3881 *count = ag_count;
3882 else
3883 return false;
3885 else
3886 return false;
3889 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3890 return false;
3892 *base_mode = new_mode;
3893 return true;
3896 static bool
3897 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3898 enum machine_mode mode, const_tree type)
3900 int count ATTRIBUTE_UNUSED;
3901 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3903 if (!use_vfp_abi (pcs_variant, false))
3904 return false;
3905 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3906 &ag_mode, &count);
3909 static bool
3910 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3911 const_tree type)
3913 if (!use_vfp_abi (pcum->pcs_variant, false))
3914 return false;
3916 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3917 &pcum->aapcs_vfp_rmode,
3918 &pcum->aapcs_vfp_rcount);
3921 static bool
3922 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3923 const_tree type ATTRIBUTE_UNUSED)
3925 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3926 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3927 int regno;
3929 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3930 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3932 pcum->aapcs_vfp_reg_alloc = mask << regno;
3933 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3935 int i;
3936 int rcount = pcum->aapcs_vfp_rcount;
3937 int rshift = shift;
3938 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3939 rtx par;
3940 if (!TARGET_NEON)
3942 /* Avoid using unsupported vector modes. */
3943 if (rmode == V2SImode)
3944 rmode = DImode;
3945 else if (rmode == V4SImode)
3947 rmode = DImode;
3948 rcount *= 2;
3949 rshift /= 2;
3952 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3953 for (i = 0; i < rcount; i++)
3955 rtx tmp = gen_rtx_REG (rmode,
3956 FIRST_VFP_REGNUM + regno + i * rshift);
3957 tmp = gen_rtx_EXPR_LIST
3958 (VOIDmode, tmp,
3959 GEN_INT (i * GET_MODE_SIZE (rmode)));
3960 XVECEXP (par, 0, i) = tmp;
3963 pcum->aapcs_reg = par;
3965 else
3966 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3967 return true;
3969 return false;
3972 static rtx
3973 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3974 enum machine_mode mode,
3975 const_tree type ATTRIBUTE_UNUSED)
3977 if (!use_vfp_abi (pcs_variant, false))
3978 return false;
3980 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3982 int count;
3983 enum machine_mode ag_mode;
3984 int i;
3985 rtx par;
3986 int shift;
3988 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3989 &ag_mode, &count);
3991 if (!TARGET_NEON)
3993 if (ag_mode == V2SImode)
3994 ag_mode = DImode;
3995 else if (ag_mode == V4SImode)
3997 ag_mode = DImode;
3998 count *= 2;
4001 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4002 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4003 for (i = 0; i < count; i++)
4005 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4006 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4007 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4008 XVECEXP (par, 0, i) = tmp;
4011 return par;
4014 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4017 static void
4018 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4019 enum machine_mode mode ATTRIBUTE_UNUSED,
4020 const_tree type ATTRIBUTE_UNUSED)
4022 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4023 pcum->aapcs_vfp_reg_alloc = 0;
4024 return;
4027 #define AAPCS_CP(X) \
4029 aapcs_ ## X ## _cum_init, \
4030 aapcs_ ## X ## _is_call_candidate, \
4031 aapcs_ ## X ## _allocate, \
4032 aapcs_ ## X ## _is_return_candidate, \
4033 aapcs_ ## X ## _allocate_return_reg, \
4034 aapcs_ ## X ## _advance \
4037 /* Table of co-processors that can be used to pass arguments in
4038 registers. Idealy no arugment should be a candidate for more than
4039 one co-processor table entry, but the table is processed in order
4040 and stops after the first match. If that entry then fails to put
4041 the argument into a co-processor register, the argument will go on
4042 the stack. */
4043 static struct
4045 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4046 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4048 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4049 BLKmode) is a candidate for this co-processor's registers; this
4050 function should ignore any position-dependent state in
4051 CUMULATIVE_ARGS and only use call-type dependent information. */
4052 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4054 /* Return true if the argument does get a co-processor register; it
4055 should set aapcs_reg to an RTX of the register allocated as is
4056 required for a return from FUNCTION_ARG. */
4057 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4059 /* Return true if a result of mode MODE (or type TYPE if MODE is
4060 BLKmode) is can be returned in this co-processor's registers. */
4061 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4063 /* Allocate and return an RTX element to hold the return type of a
4064 call, this routine must not fail and will only be called if
4065 is_return_candidate returned true with the same parameters. */
4066 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4068 /* Finish processing this argument and prepare to start processing
4069 the next one. */
4070 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4071 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4073 AAPCS_CP(vfp)
4076 #undef AAPCS_CP
4078 static int
4079 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4080 tree type)
4082 int i;
4084 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4085 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4086 return i;
4088 return -1;
4091 static int
4092 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4094 /* We aren't passed a decl, so we can't check that a call is local.
4095 However, it isn't clear that that would be a win anyway, since it
4096 might limit some tail-calling opportunities. */
4097 enum arm_pcs pcs_variant;
4099 if (fntype)
4101 const_tree fndecl = NULL_TREE;
4103 if (TREE_CODE (fntype) == FUNCTION_DECL)
4105 fndecl = fntype;
4106 fntype = TREE_TYPE (fntype);
4109 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4111 else
4112 pcs_variant = arm_pcs_default;
4114 if (pcs_variant != ARM_PCS_AAPCS)
4116 int i;
4118 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4119 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4120 TYPE_MODE (type),
4121 type))
4122 return i;
4124 return -1;
4127 static rtx
4128 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4129 const_tree fntype)
4131 /* We aren't passed a decl, so we can't check that a call is local.
4132 However, it isn't clear that that would be a win anyway, since it
4133 might limit some tail-calling opportunities. */
4134 enum arm_pcs pcs_variant;
4135 int unsignedp ATTRIBUTE_UNUSED;
4137 if (fntype)
4139 const_tree fndecl = NULL_TREE;
4141 if (TREE_CODE (fntype) == FUNCTION_DECL)
4143 fndecl = fntype;
4144 fntype = TREE_TYPE (fntype);
4147 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4149 else
4150 pcs_variant = arm_pcs_default;
4152 /* Promote integer types. */
4153 if (type && INTEGRAL_TYPE_P (type))
4154 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4156 if (pcs_variant != ARM_PCS_AAPCS)
4158 int i;
4160 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4161 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4162 type))
4163 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4164 mode, type);
4167 /* Promotes small structs returned in a register to full-word size
4168 for big-endian AAPCS. */
4169 if (type && arm_return_in_msb (type))
4171 HOST_WIDE_INT size = int_size_in_bytes (type);
4172 if (size % UNITS_PER_WORD != 0)
4174 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4175 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4179 return gen_rtx_REG (mode, R0_REGNUM);
4183 aapcs_libcall_value (enum machine_mode mode)
4185 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4188 /* Lay out a function argument using the AAPCS rules. The rule
4189 numbers referred to here are those in the AAPCS. */
4190 static void
4191 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4192 tree type, int named)
4194 int nregs, nregs2;
4195 int ncrn;
4197 /* We only need to do this once per argument. */
4198 if (pcum->aapcs_arg_processed)
4199 return;
4201 pcum->aapcs_arg_processed = true;
4203 /* Special case: if named is false then we are handling an incoming
4204 anonymous argument which is on the stack. */
4205 if (!named)
4206 return;
4208 /* Is this a potential co-processor register candidate? */
4209 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4211 int slot = aapcs_select_call_coproc (pcum, mode, type);
4212 pcum->aapcs_cprc_slot = slot;
4214 /* We don't have to apply any of the rules from part B of the
4215 preparation phase, these are handled elsewhere in the
4216 compiler. */
4218 if (slot >= 0)
4220 /* A Co-processor register candidate goes either in its own
4221 class of registers or on the stack. */
4222 if (!pcum->aapcs_cprc_failed[slot])
4224 /* C1.cp - Try to allocate the argument to co-processor
4225 registers. */
4226 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4227 return;
4229 /* C2.cp - Put the argument on the stack and note that we
4230 can't assign any more candidates in this slot. We also
4231 need to note that we have allocated stack space, so that
4232 we won't later try to split a non-cprc candidate between
4233 core registers and the stack. */
4234 pcum->aapcs_cprc_failed[slot] = true;
4235 pcum->can_split = false;
4238 /* We didn't get a register, so this argument goes on the
4239 stack. */
4240 gcc_assert (pcum->can_split == false);
4241 return;
4245 /* C3 - For double-word aligned arguments, round the NCRN up to the
4246 next even number. */
4247 ncrn = pcum->aapcs_ncrn;
4248 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4249 ncrn++;
4251 nregs = ARM_NUM_REGS2(mode, type);
4253 /* Sigh, this test should really assert that nregs > 0, but a GCC
4254 extension allows empty structs and then gives them empty size; it
4255 then allows such a structure to be passed by value. For some of
4256 the code below we have to pretend that such an argument has
4257 non-zero size so that we 'locate' it correctly either in
4258 registers or on the stack. */
4259 gcc_assert (nregs >= 0);
4261 nregs2 = nregs ? nregs : 1;
4263 /* C4 - Argument fits entirely in core registers. */
4264 if (ncrn + nregs2 <= NUM_ARG_REGS)
4266 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4267 pcum->aapcs_next_ncrn = ncrn + nregs;
4268 return;
4271 /* C5 - Some core registers left and there are no arguments already
4272 on the stack: split this argument between the remaining core
4273 registers and the stack. */
4274 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4276 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4277 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4278 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4279 return;
4282 /* C6 - NCRN is set to 4. */
4283 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4285 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4286 return;
4289 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4290 for a call to a function whose data type is FNTYPE.
4291 For a library call, FNTYPE is NULL. */
4292 void
4293 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4294 rtx libname,
4295 tree fndecl ATTRIBUTE_UNUSED)
4297 /* Long call handling. */
4298 if (fntype)
4299 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4300 else
4301 pcum->pcs_variant = arm_pcs_default;
4303 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4305 if (arm_libcall_uses_aapcs_base (libname))
4306 pcum->pcs_variant = ARM_PCS_AAPCS;
4308 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4309 pcum->aapcs_reg = NULL_RTX;
4310 pcum->aapcs_partial = 0;
4311 pcum->aapcs_arg_processed = false;
4312 pcum->aapcs_cprc_slot = -1;
4313 pcum->can_split = true;
4315 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4317 int i;
4319 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4321 pcum->aapcs_cprc_failed[i] = false;
4322 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4325 return;
4328 /* Legacy ABIs */
4330 /* On the ARM, the offset starts at 0. */
4331 pcum->nregs = 0;
4332 pcum->iwmmxt_nregs = 0;
4333 pcum->can_split = true;
4335 /* Varargs vectors are treated the same as long long.
4336 named_count avoids having to change the way arm handles 'named' */
4337 pcum->named_count = 0;
4338 pcum->nargs = 0;
4340 if (TARGET_REALLY_IWMMXT && fntype)
4342 tree fn_arg;
4344 for (fn_arg = TYPE_ARG_TYPES (fntype);
4345 fn_arg;
4346 fn_arg = TREE_CHAIN (fn_arg))
4347 pcum->named_count += 1;
4349 if (! pcum->named_count)
4350 pcum->named_count = INT_MAX;
4355 /* Return true if mode/type need doubleword alignment. */
4356 bool
4357 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4359 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4360 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4364 /* Determine where to put an argument to a function.
4365 Value is zero to push the argument on the stack,
4366 or a hard register in which to store the argument.
4368 MODE is the argument's machine mode.
4369 TYPE is the data type of the argument (as a tree).
4370 This is null for libcalls where that information may
4371 not be available.
4372 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4373 the preceding args and about the function being called.
4374 NAMED is nonzero if this argument is a named parameter
4375 (otherwise it is an extra parameter matching an ellipsis). */
4378 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4379 tree type, int named)
4381 int nregs;
4383 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4384 a call insn (op3 of a call_value insn). */
4385 if (mode == VOIDmode)
4386 return const0_rtx;
4388 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4390 aapcs_layout_arg (pcum, mode, type, named);
4391 return pcum->aapcs_reg;
4394 /* Varargs vectors are treated the same as long long.
4395 named_count avoids having to change the way arm handles 'named' */
4396 if (TARGET_IWMMXT_ABI
4397 && arm_vector_mode_supported_p (mode)
4398 && pcum->named_count > pcum->nargs + 1)
4400 if (pcum->iwmmxt_nregs <= 9)
4401 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4402 else
4404 pcum->can_split = false;
4405 return NULL_RTX;
4409 /* Put doubleword aligned quantities in even register pairs. */
4410 if (pcum->nregs & 1
4411 && ARM_DOUBLEWORD_ALIGN
4412 && arm_needs_doubleword_align (mode, type))
4413 pcum->nregs++;
4415 if (mode == VOIDmode)
4416 /* Pick an arbitrary value for operand 2 of the call insn. */
4417 return const0_rtx;
4419 /* Only allow splitting an arg between regs and memory if all preceding
4420 args were allocated to regs. For args passed by reference we only count
4421 the reference pointer. */
4422 if (pcum->can_split)
4423 nregs = 1;
4424 else
4425 nregs = ARM_NUM_REGS2 (mode, type);
4427 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4428 return NULL_RTX;
4430 return gen_rtx_REG (mode, pcum->nregs);
4433 static int
4434 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4435 tree type, bool named)
4437 int nregs = pcum->nregs;
4439 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4441 aapcs_layout_arg (pcum, mode, type, named);
4442 return pcum->aapcs_partial;
4445 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4446 return 0;
4448 if (NUM_ARG_REGS > nregs
4449 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4450 && pcum->can_split)
4451 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4453 return 0;
4456 void
4457 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4458 tree type, bool named)
4460 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4462 aapcs_layout_arg (pcum, mode, type, named);
4464 if (pcum->aapcs_cprc_slot >= 0)
4466 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4467 type);
4468 pcum->aapcs_cprc_slot = -1;
4471 /* Generic stuff. */
4472 pcum->aapcs_arg_processed = false;
4473 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4474 pcum->aapcs_reg = NULL_RTX;
4475 pcum->aapcs_partial = 0;
4477 else
4479 pcum->nargs += 1;
4480 if (arm_vector_mode_supported_p (mode)
4481 && pcum->named_count > pcum->nargs
4482 && TARGET_IWMMXT_ABI)
4483 pcum->iwmmxt_nregs += 1;
4484 else
4485 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4489 /* Variable sized types are passed by reference. This is a GCC
4490 extension to the ARM ABI. */
4492 static bool
4493 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4494 enum machine_mode mode ATTRIBUTE_UNUSED,
4495 const_tree type, bool named ATTRIBUTE_UNUSED)
4497 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4500 /* Encode the current state of the #pragma [no_]long_calls. */
4501 typedef enum
4503 OFF, /* No #pragma [no_]long_calls is in effect. */
4504 LONG, /* #pragma long_calls is in effect. */
4505 SHORT /* #pragma no_long_calls is in effect. */
4506 } arm_pragma_enum;
4508 static arm_pragma_enum arm_pragma_long_calls = OFF;
4510 void
4511 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4513 arm_pragma_long_calls = LONG;
4516 void
4517 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4519 arm_pragma_long_calls = SHORT;
4522 void
4523 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4525 arm_pragma_long_calls = OFF;
4528 /* Handle an attribute requiring a FUNCTION_DECL;
4529 arguments as in struct attribute_spec.handler. */
4530 static tree
4531 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4532 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4534 if (TREE_CODE (*node) != FUNCTION_DECL)
4536 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4537 name);
4538 *no_add_attrs = true;
4541 return NULL_TREE;
4544 /* Handle an "interrupt" or "isr" attribute;
4545 arguments as in struct attribute_spec.handler. */
4546 static tree
4547 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4548 bool *no_add_attrs)
4550 if (DECL_P (*node))
4552 if (TREE_CODE (*node) != FUNCTION_DECL)
4554 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4555 name);
4556 *no_add_attrs = true;
4558 /* FIXME: the argument if any is checked for type attributes;
4559 should it be checked for decl ones? */
4561 else
4563 if (TREE_CODE (*node) == FUNCTION_TYPE
4564 || TREE_CODE (*node) == METHOD_TYPE)
4566 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4568 warning (OPT_Wattributes, "%qE attribute ignored",
4569 name);
4570 *no_add_attrs = true;
4573 else if (TREE_CODE (*node) == POINTER_TYPE
4574 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4575 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4576 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4578 *node = build_variant_type_copy (*node);
4579 TREE_TYPE (*node) = build_type_attribute_variant
4580 (TREE_TYPE (*node),
4581 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4582 *no_add_attrs = true;
4584 else
4586 /* Possibly pass this attribute on from the type to a decl. */
4587 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4588 | (int) ATTR_FLAG_FUNCTION_NEXT
4589 | (int) ATTR_FLAG_ARRAY_NEXT))
4591 *no_add_attrs = true;
4592 return tree_cons (name, args, NULL_TREE);
4594 else
4596 warning (OPT_Wattributes, "%qE attribute ignored",
4597 name);
4602 return NULL_TREE;
4605 /* Handle a "pcs" attribute; arguments as in struct
4606 attribute_spec.handler. */
4607 static tree
4608 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4609 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4611 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4613 warning (OPT_Wattributes, "%qE attribute ignored", name);
4614 *no_add_attrs = true;
4616 return NULL_TREE;
4619 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4620 /* Handle the "notshared" attribute. This attribute is another way of
4621 requesting hidden visibility. ARM's compiler supports
4622 "__declspec(notshared)"; we support the same thing via an
4623 attribute. */
4625 static tree
4626 arm_handle_notshared_attribute (tree *node,
4627 tree name ATTRIBUTE_UNUSED,
4628 tree args ATTRIBUTE_UNUSED,
4629 int flags ATTRIBUTE_UNUSED,
4630 bool *no_add_attrs)
4632 tree decl = TYPE_NAME (*node);
4634 if (decl)
4636 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4637 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4638 *no_add_attrs = false;
4640 return NULL_TREE;
4642 #endif
4644 /* Return 0 if the attributes for two types are incompatible, 1 if they
4645 are compatible, and 2 if they are nearly compatible (which causes a
4646 warning to be generated). */
4647 static int
4648 arm_comp_type_attributes (const_tree type1, const_tree type2)
4650 int l1, l2, s1, s2;
4652 /* Check for mismatch of non-default calling convention. */
4653 if (TREE_CODE (type1) != FUNCTION_TYPE)
4654 return 1;
4656 /* Check for mismatched call attributes. */
4657 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4658 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4659 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4660 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4662 /* Only bother to check if an attribute is defined. */
4663 if (l1 | l2 | s1 | s2)
4665 /* If one type has an attribute, the other must have the same attribute. */
4666 if ((l1 != l2) || (s1 != s2))
4667 return 0;
4669 /* Disallow mixed attributes. */
4670 if ((l1 & s2) || (l2 & s1))
4671 return 0;
4674 /* Check for mismatched ISR attribute. */
4675 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4676 if (! l1)
4677 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4678 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4679 if (! l2)
4680 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4681 if (l1 != l2)
4682 return 0;
4684 return 1;
4687 /* Assigns default attributes to newly defined type. This is used to
4688 set short_call/long_call attributes for function types of
4689 functions defined inside corresponding #pragma scopes. */
4690 static void
4691 arm_set_default_type_attributes (tree type)
4693 /* Add __attribute__ ((long_call)) to all functions, when
4694 inside #pragma long_calls or __attribute__ ((short_call)),
4695 when inside #pragma no_long_calls. */
4696 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4698 tree type_attr_list, attr_name;
4699 type_attr_list = TYPE_ATTRIBUTES (type);
4701 if (arm_pragma_long_calls == LONG)
4702 attr_name = get_identifier ("long_call");
4703 else if (arm_pragma_long_calls == SHORT)
4704 attr_name = get_identifier ("short_call");
4705 else
4706 return;
4708 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4709 TYPE_ATTRIBUTES (type) = type_attr_list;
4713 /* Return true if DECL is known to be linked into section SECTION. */
4715 static bool
4716 arm_function_in_section_p (tree decl, section *section)
4718 /* We can only be certain about functions defined in the same
4719 compilation unit. */
4720 if (!TREE_STATIC (decl))
4721 return false;
4723 /* Make sure that SYMBOL always binds to the definition in this
4724 compilation unit. */
4725 if (!targetm.binds_local_p (decl))
4726 return false;
4728 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4729 if (!DECL_SECTION_NAME (decl))
4731 /* Make sure that we will not create a unique section for DECL. */
4732 if (flag_function_sections || DECL_ONE_ONLY (decl))
4733 return false;
4736 return function_section (decl) == section;
4739 /* Return nonzero if a 32-bit "long_call" should be generated for
4740 a call from the current function to DECL. We generate a long_call
4741 if the function:
4743 a. has an __attribute__((long call))
4744 or b. is within the scope of a #pragma long_calls
4745 or c. the -mlong-calls command line switch has been specified
4747 However we do not generate a long call if the function:
4749 d. has an __attribute__ ((short_call))
4750 or e. is inside the scope of a #pragma no_long_calls
4751 or f. is defined in the same section as the current function. */
4753 bool
4754 arm_is_long_call_p (tree decl)
4756 tree attrs;
4758 if (!decl)
4759 return TARGET_LONG_CALLS;
4761 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4762 if (lookup_attribute ("short_call", attrs))
4763 return false;
4765 /* For "f", be conservative, and only cater for cases in which the
4766 whole of the current function is placed in the same section. */
4767 if (!flag_reorder_blocks_and_partition
4768 && TREE_CODE (decl) == FUNCTION_DECL
4769 && arm_function_in_section_p (decl, current_function_section ()))
4770 return false;
4772 if (lookup_attribute ("long_call", attrs))
4773 return true;
4775 return TARGET_LONG_CALLS;
4778 /* Return nonzero if it is ok to make a tail-call to DECL. */
4779 static bool
4780 arm_function_ok_for_sibcall (tree decl, tree exp)
4782 unsigned long func_type;
4784 if (cfun->machine->sibcall_blocked)
4785 return false;
4787 /* Never tailcall something for which we have no decl, or if we
4788 are in Thumb mode. */
4789 if (decl == NULL || TARGET_THUMB)
4790 return false;
4792 /* The PIC register is live on entry to VxWorks PLT entries, so we
4793 must make the call before restoring the PIC register. */
4794 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4795 return false;
4797 /* Cannot tail-call to long calls, since these are out of range of
4798 a branch instruction. */
4799 if (arm_is_long_call_p (decl))
4800 return false;
4802 /* If we are interworking and the function is not declared static
4803 then we can't tail-call it unless we know that it exists in this
4804 compilation unit (since it might be a Thumb routine). */
4805 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4806 return false;
4808 func_type = arm_current_func_type ();
4809 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4810 if (IS_INTERRUPT (func_type))
4811 return false;
4813 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4815 /* Check that the return value locations are the same. For
4816 example that we aren't returning a value from the sibling in
4817 a VFP register but then need to transfer it to a core
4818 register. */
4819 rtx a, b;
4821 a = arm_function_value (TREE_TYPE (exp), decl, false);
4822 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4823 cfun->decl, false);
4824 if (!rtx_equal_p (a, b))
4825 return false;
4828 /* Never tailcall if function may be called with a misaligned SP. */
4829 if (IS_STACKALIGN (func_type))
4830 return false;
4832 /* Everything else is ok. */
4833 return true;
4837 /* Addressing mode support functions. */
4839 /* Return nonzero if X is a legitimate immediate operand when compiling
4840 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4842 legitimate_pic_operand_p (rtx x)
4844 if (GET_CODE (x) == SYMBOL_REF
4845 || (GET_CODE (x) == CONST
4846 && GET_CODE (XEXP (x, 0)) == PLUS
4847 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4848 return 0;
4850 return 1;
4853 /* Record that the current function needs a PIC register. Initialize
4854 cfun->machine->pic_reg if we have not already done so. */
4856 static void
4857 require_pic_register (void)
4859 /* A lot of the logic here is made obscure by the fact that this
4860 routine gets called as part of the rtx cost estimation process.
4861 We don't want those calls to affect any assumptions about the real
4862 function; and further, we can't call entry_of_function() until we
4863 start the real expansion process. */
4864 if (!crtl->uses_pic_offset_table)
4866 gcc_assert (can_create_pseudo_p ());
4867 if (arm_pic_register != INVALID_REGNUM)
4869 if (!cfun->machine->pic_reg)
4870 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4872 /* Play games to avoid marking the function as needing pic
4873 if we are being called as part of the cost-estimation
4874 process. */
4875 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4876 crtl->uses_pic_offset_table = 1;
4878 else
4880 rtx seq;
4882 if (!cfun->machine->pic_reg)
4883 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4885 /* Play games to avoid marking the function as needing pic
4886 if we are being called as part of the cost-estimation
4887 process. */
4888 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4890 crtl->uses_pic_offset_table = 1;
4891 start_sequence ();
4893 arm_load_pic_register (0UL);
4895 seq = get_insns ();
4896 end_sequence ();
4897 /* We can be called during expansion of PHI nodes, where
4898 we can't yet emit instructions directly in the final
4899 insn stream. Queue the insns on the entry edge, they will
4900 be committed after everything else is expanded. */
4901 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4908 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4910 if (GET_CODE (orig) == SYMBOL_REF
4911 || GET_CODE (orig) == LABEL_REF)
4913 rtx pic_ref, address;
4914 rtx insn;
4915 int subregs = 0;
4917 /* If this function doesn't have a pic register, create one now. */
4918 require_pic_register ();
4920 if (reg == 0)
4922 gcc_assert (can_create_pseudo_p ());
4923 reg = gen_reg_rtx (Pmode);
4925 subregs = 1;
4928 if (subregs)
4929 address = gen_reg_rtx (Pmode);
4930 else
4931 address = reg;
4933 if (TARGET_32BIT)
4934 emit_insn (gen_pic_load_addr_32bit (address, orig));
4935 else /* TARGET_THUMB1 */
4936 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4938 /* VxWorks does not impose a fixed gap between segments; the run-time
4939 gap can be different from the object-file gap. We therefore can't
4940 use GOTOFF unless we are absolutely sure that the symbol is in the
4941 same segment as the GOT. Unfortunately, the flexibility of linker
4942 scripts means that we can't be sure of that in general, so assume
4943 that GOTOFF is never valid on VxWorks. */
4944 if ((GET_CODE (orig) == LABEL_REF
4945 || (GET_CODE (orig) == SYMBOL_REF &&
4946 SYMBOL_REF_LOCAL_P (orig)))
4947 && NEED_GOT_RELOC
4948 && !TARGET_VXWORKS_RTP)
4949 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
4950 else
4952 pic_ref = gen_const_mem (Pmode,
4953 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4954 address));
4957 insn = emit_move_insn (reg, pic_ref);
4959 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4960 by loop. */
4961 set_unique_reg_note (insn, REG_EQUAL, orig);
4963 return reg;
4965 else if (GET_CODE (orig) == CONST)
4967 rtx base, offset;
4969 if (GET_CODE (XEXP (orig, 0)) == PLUS
4970 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4971 return orig;
4973 /* Handle the case where we have: const (UNSPEC_TLS). */
4974 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4975 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4976 return orig;
4978 /* Handle the case where we have:
4979 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4980 CONST_INT. */
4981 if (GET_CODE (XEXP (orig, 0)) == PLUS
4982 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4983 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4985 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4986 return orig;
4989 if (reg == 0)
4991 gcc_assert (can_create_pseudo_p ());
4992 reg = gen_reg_rtx (Pmode);
4995 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4997 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4998 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4999 base == reg ? 0 : reg);
5001 if (GET_CODE (offset) == CONST_INT)
5003 /* The base register doesn't really matter, we only want to
5004 test the index for the appropriate mode. */
5005 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5007 gcc_assert (can_create_pseudo_p ());
5008 offset = force_reg (Pmode, offset);
5011 if (GET_CODE (offset) == CONST_INT)
5012 return plus_constant (base, INTVAL (offset));
5015 if (GET_MODE_SIZE (mode) > 4
5016 && (GET_MODE_CLASS (mode) == MODE_INT
5017 || TARGET_SOFT_FLOAT))
5019 emit_insn (gen_addsi3 (reg, base, offset));
5020 return reg;
5023 return gen_rtx_PLUS (Pmode, base, offset);
5026 return orig;
5030 /* Find a spare register to use during the prolog of a function. */
5032 static int
5033 thumb_find_work_register (unsigned long pushed_regs_mask)
5035 int reg;
5037 /* Check the argument registers first as these are call-used. The
5038 register allocation order means that sometimes r3 might be used
5039 but earlier argument registers might not, so check them all. */
5040 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5041 if (!df_regs_ever_live_p (reg))
5042 return reg;
5044 /* Before going on to check the call-saved registers we can try a couple
5045 more ways of deducing that r3 is available. The first is when we are
5046 pushing anonymous arguments onto the stack and we have less than 4
5047 registers worth of fixed arguments(*). In this case r3 will be part of
5048 the variable argument list and so we can be sure that it will be
5049 pushed right at the start of the function. Hence it will be available
5050 for the rest of the prologue.
5051 (*): ie crtl->args.pretend_args_size is greater than 0. */
5052 if (cfun->machine->uses_anonymous_args
5053 && crtl->args.pretend_args_size > 0)
5054 return LAST_ARG_REGNUM;
5056 /* The other case is when we have fixed arguments but less than 4 registers
5057 worth. In this case r3 might be used in the body of the function, but
5058 it is not being used to convey an argument into the function. In theory
5059 we could just check crtl->args.size to see how many bytes are
5060 being passed in argument registers, but it seems that it is unreliable.
5061 Sometimes it will have the value 0 when in fact arguments are being
5062 passed. (See testcase execute/20021111-1.c for an example). So we also
5063 check the args_info.nregs field as well. The problem with this field is
5064 that it makes no allowances for arguments that are passed to the
5065 function but which are not used. Hence we could miss an opportunity
5066 when a function has an unused argument in r3. But it is better to be
5067 safe than to be sorry. */
5068 if (! cfun->machine->uses_anonymous_args
5069 && crtl->args.size >= 0
5070 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5071 && crtl->args.info.nregs < 4)
5072 return LAST_ARG_REGNUM;
5074 /* Otherwise look for a call-saved register that is going to be pushed. */
5075 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5076 if (pushed_regs_mask & (1 << reg))
5077 return reg;
5079 if (TARGET_THUMB2)
5081 /* Thumb-2 can use high regs. */
5082 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5083 if (pushed_regs_mask & (1 << reg))
5084 return reg;
5086 /* Something went wrong - thumb_compute_save_reg_mask()
5087 should have arranged for a suitable register to be pushed. */
5088 gcc_unreachable ();
5091 static GTY(()) int pic_labelno;
5093 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5094 low register. */
5096 void
5097 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5099 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5101 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5102 return;
5104 gcc_assert (flag_pic);
5106 pic_reg = cfun->machine->pic_reg;
5107 if (TARGET_VXWORKS_RTP)
5109 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5110 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5111 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5113 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5115 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5116 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5118 else
5120 /* We use an UNSPEC rather than a LABEL_REF because this label
5121 never appears in the code stream. */
5123 labelno = GEN_INT (pic_labelno++);
5124 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5125 l1 = gen_rtx_CONST (VOIDmode, l1);
5127 /* On the ARM the PC register contains 'dot + 8' at the time of the
5128 addition, on the Thumb it is 'dot + 4'. */
5129 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5130 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5131 UNSPEC_GOTSYM_OFF);
5132 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5134 if (TARGET_32BIT)
5136 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5137 if (TARGET_ARM)
5138 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5139 else
5140 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5142 else /* TARGET_THUMB1 */
5144 if (arm_pic_register != INVALID_REGNUM
5145 && REGNO (pic_reg) > LAST_LO_REGNUM)
5147 /* We will have pushed the pic register, so we should always be
5148 able to find a work register. */
5149 pic_tmp = gen_rtx_REG (SImode,
5150 thumb_find_work_register (saved_regs));
5151 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5152 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5154 else
5155 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5156 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5160 /* Need to emit this whether or not we obey regdecls,
5161 since setjmp/longjmp can cause life info to screw up. */
5162 emit_use (pic_reg);
5166 /* Return nonzero if X is valid as an ARM state addressing register. */
5167 static int
5168 arm_address_register_rtx_p (rtx x, int strict_p)
5170 int regno;
5172 if (GET_CODE (x) != REG)
5173 return 0;
5175 regno = REGNO (x);
5177 if (strict_p)
5178 return ARM_REGNO_OK_FOR_BASE_P (regno);
5180 return (regno <= LAST_ARM_REGNUM
5181 || regno >= FIRST_PSEUDO_REGISTER
5182 || regno == FRAME_POINTER_REGNUM
5183 || regno == ARG_POINTER_REGNUM);
5186 /* Return TRUE if this rtx is the difference of a symbol and a label,
5187 and will reduce to a PC-relative relocation in the object file.
5188 Expressions like this can be left alone when generating PIC, rather
5189 than forced through the GOT. */
5190 static int
5191 pcrel_constant_p (rtx x)
5193 if (GET_CODE (x) == MINUS)
5194 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5196 return FALSE;
5199 /* Return nonzero if X is a valid ARM state address operand. */
5201 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5202 int strict_p)
5204 bool use_ldrd;
5205 enum rtx_code code = GET_CODE (x);
5207 if (arm_address_register_rtx_p (x, strict_p))
5208 return 1;
5210 use_ldrd = (TARGET_LDRD
5211 && (mode == DImode
5212 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5214 if (code == POST_INC || code == PRE_DEC
5215 || ((code == PRE_INC || code == POST_DEC)
5216 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5217 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5219 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5220 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5221 && GET_CODE (XEXP (x, 1)) == PLUS
5222 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5224 rtx addend = XEXP (XEXP (x, 1), 1);
5226 /* Don't allow ldrd post increment by register because it's hard
5227 to fixup invalid register choices. */
5228 if (use_ldrd
5229 && GET_CODE (x) == POST_MODIFY
5230 && GET_CODE (addend) == REG)
5231 return 0;
5233 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5234 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5237 /* After reload constants split into minipools will have addresses
5238 from a LABEL_REF. */
5239 else if (reload_completed
5240 && (code == LABEL_REF
5241 || (code == CONST
5242 && GET_CODE (XEXP (x, 0)) == PLUS
5243 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5244 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5245 return 1;
5247 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5248 return 0;
5250 else if (code == PLUS)
5252 rtx xop0 = XEXP (x, 0);
5253 rtx xop1 = XEXP (x, 1);
5255 return ((arm_address_register_rtx_p (xop0, strict_p)
5256 && GET_CODE(xop1) == CONST_INT
5257 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5258 || (arm_address_register_rtx_p (xop1, strict_p)
5259 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5262 #if 0
5263 /* Reload currently can't handle MINUS, so disable this for now */
5264 else if (GET_CODE (x) == MINUS)
5266 rtx xop0 = XEXP (x, 0);
5267 rtx xop1 = XEXP (x, 1);
5269 return (arm_address_register_rtx_p (xop0, strict_p)
5270 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5272 #endif
5274 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5275 && code == SYMBOL_REF
5276 && CONSTANT_POOL_ADDRESS_P (x)
5277 && ! (flag_pic
5278 && symbol_mentioned_p (get_pool_constant (x))
5279 && ! pcrel_constant_p (get_pool_constant (x))))
5280 return 1;
5282 return 0;
5285 /* Return nonzero if X is a valid Thumb-2 address operand. */
5286 static int
5287 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5289 bool use_ldrd;
5290 enum rtx_code code = GET_CODE (x);
5292 if (arm_address_register_rtx_p (x, strict_p))
5293 return 1;
5295 use_ldrd = (TARGET_LDRD
5296 && (mode == DImode
5297 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5299 if (code == POST_INC || code == PRE_DEC
5300 || ((code == PRE_INC || code == POST_DEC)
5301 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5302 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5304 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5305 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5306 && GET_CODE (XEXP (x, 1)) == PLUS
5307 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5309 /* Thumb-2 only has autoincrement by constant. */
5310 rtx addend = XEXP (XEXP (x, 1), 1);
5311 HOST_WIDE_INT offset;
5313 if (GET_CODE (addend) != CONST_INT)
5314 return 0;
5316 offset = INTVAL(addend);
5317 if (GET_MODE_SIZE (mode) <= 4)
5318 return (offset > -256 && offset < 256);
5320 return (use_ldrd && offset > -1024 && offset < 1024
5321 && (offset & 3) == 0);
5324 /* After reload constants split into minipools will have addresses
5325 from a LABEL_REF. */
5326 else if (reload_completed
5327 && (code == LABEL_REF
5328 || (code == CONST
5329 && GET_CODE (XEXP (x, 0)) == PLUS
5330 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5331 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5332 return 1;
5334 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5335 return 0;
5337 else if (code == PLUS)
5339 rtx xop0 = XEXP (x, 0);
5340 rtx xop1 = XEXP (x, 1);
5342 return ((arm_address_register_rtx_p (xop0, strict_p)
5343 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5344 || (arm_address_register_rtx_p (xop1, strict_p)
5345 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5348 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5349 && code == SYMBOL_REF
5350 && CONSTANT_POOL_ADDRESS_P (x)
5351 && ! (flag_pic
5352 && symbol_mentioned_p (get_pool_constant (x))
5353 && ! pcrel_constant_p (get_pool_constant (x))))
5354 return 1;
5356 return 0;
5359 /* Return nonzero if INDEX is valid for an address index operand in
5360 ARM state. */
5361 static int
5362 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5363 int strict_p)
5365 HOST_WIDE_INT range;
5366 enum rtx_code code = GET_CODE (index);
5368 /* Standard coprocessor addressing modes. */
5369 if (TARGET_HARD_FLOAT
5370 && (TARGET_FPA || TARGET_MAVERICK)
5371 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5372 || (TARGET_MAVERICK && mode == DImode)))
5373 return (code == CONST_INT && INTVAL (index) < 1024
5374 && INTVAL (index) > -1024
5375 && (INTVAL (index) & 3) == 0);
5377 if (TARGET_NEON
5378 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5379 return (code == CONST_INT
5380 && INTVAL (index) < 1016
5381 && INTVAL (index) > -1024
5382 && (INTVAL (index) & 3) == 0);
5384 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5385 return (code == CONST_INT
5386 && INTVAL (index) < 1024
5387 && INTVAL (index) > -1024
5388 && (INTVAL (index) & 3) == 0);
5390 if (arm_address_register_rtx_p (index, strict_p)
5391 && (GET_MODE_SIZE (mode) <= 4))
5392 return 1;
5394 if (mode == DImode || mode == DFmode)
5396 if (code == CONST_INT)
5398 HOST_WIDE_INT val = INTVAL (index);
5400 if (TARGET_LDRD)
5401 return val > -256 && val < 256;
5402 else
5403 return val > -4096 && val < 4092;
5406 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5409 if (GET_MODE_SIZE (mode) <= 4
5410 && ! (arm_arch4
5411 && (mode == HImode
5412 || mode == HFmode
5413 || (mode == QImode && outer == SIGN_EXTEND))))
5415 if (code == MULT)
5417 rtx xiop0 = XEXP (index, 0);
5418 rtx xiop1 = XEXP (index, 1);
5420 return ((arm_address_register_rtx_p (xiop0, strict_p)
5421 && power_of_two_operand (xiop1, SImode))
5422 || (arm_address_register_rtx_p (xiop1, strict_p)
5423 && power_of_two_operand (xiop0, SImode)));
5425 else if (code == LSHIFTRT || code == ASHIFTRT
5426 || code == ASHIFT || code == ROTATERT)
5428 rtx op = XEXP (index, 1);
5430 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5431 && GET_CODE (op) == CONST_INT
5432 && INTVAL (op) > 0
5433 && INTVAL (op) <= 31);
5437 /* For ARM v4 we may be doing a sign-extend operation during the
5438 load. */
5439 if (arm_arch4)
5441 if (mode == HImode
5442 || mode == HFmode
5443 || (outer == SIGN_EXTEND && mode == QImode))
5444 range = 256;
5445 else
5446 range = 4096;
5448 else
5449 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5451 return (code == CONST_INT
5452 && INTVAL (index) < range
5453 && INTVAL (index) > -range);
5456 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5457 index operand. i.e. 1, 2, 4 or 8. */
5458 static bool
5459 thumb2_index_mul_operand (rtx op)
5461 HOST_WIDE_INT val;
5463 if (GET_CODE(op) != CONST_INT)
5464 return false;
5466 val = INTVAL(op);
5467 return (val == 1 || val == 2 || val == 4 || val == 8);
5470 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5471 static int
5472 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5474 enum rtx_code code = GET_CODE (index);
5476 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5477 /* Standard coprocessor addressing modes. */
5478 if (TARGET_HARD_FLOAT
5479 && (TARGET_FPA || TARGET_MAVERICK)
5480 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5481 || (TARGET_MAVERICK && mode == DImode)))
5482 return (code == CONST_INT && INTVAL (index) < 1024
5483 && INTVAL (index) > -1024
5484 && (INTVAL (index) & 3) == 0);
5486 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5488 /* For DImode assume values will usually live in core regs
5489 and only allow LDRD addressing modes. */
5490 if (!TARGET_LDRD || mode != DImode)
5491 return (code == CONST_INT
5492 && INTVAL (index) < 1024
5493 && INTVAL (index) > -1024
5494 && (INTVAL (index) & 3) == 0);
5497 if (TARGET_NEON
5498 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5499 return (code == CONST_INT
5500 && INTVAL (index) < 1016
5501 && INTVAL (index) > -1024
5502 && (INTVAL (index) & 3) == 0);
5504 if (arm_address_register_rtx_p (index, strict_p)
5505 && (GET_MODE_SIZE (mode) <= 4))
5506 return 1;
5508 if (mode == DImode || mode == DFmode)
5510 if (code == CONST_INT)
5512 HOST_WIDE_INT val = INTVAL (index);
5513 /* ??? Can we assume ldrd for thumb2? */
5514 /* Thumb-2 ldrd only has reg+const addressing modes. */
5515 /* ldrd supports offsets of +-1020.
5516 However the ldr fallback does not. */
5517 return val > -256 && val < 256 && (val & 3) == 0;
5519 else
5520 return 0;
5523 if (code == MULT)
5525 rtx xiop0 = XEXP (index, 0);
5526 rtx xiop1 = XEXP (index, 1);
5528 return ((arm_address_register_rtx_p (xiop0, strict_p)
5529 && thumb2_index_mul_operand (xiop1))
5530 || (arm_address_register_rtx_p (xiop1, strict_p)
5531 && thumb2_index_mul_operand (xiop0)));
5533 else if (code == ASHIFT)
5535 rtx op = XEXP (index, 1);
5537 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5538 && GET_CODE (op) == CONST_INT
5539 && INTVAL (op) > 0
5540 && INTVAL (op) <= 3);
5543 return (code == CONST_INT
5544 && INTVAL (index) < 4096
5545 && INTVAL (index) > -256);
5548 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5549 static int
5550 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5552 int regno;
5554 if (GET_CODE (x) != REG)
5555 return 0;
5557 regno = REGNO (x);
5559 if (strict_p)
5560 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5562 return (regno <= LAST_LO_REGNUM
5563 || regno > LAST_VIRTUAL_REGISTER
5564 || regno == FRAME_POINTER_REGNUM
5565 || (GET_MODE_SIZE (mode) >= 4
5566 && (regno == STACK_POINTER_REGNUM
5567 || regno >= FIRST_PSEUDO_REGISTER
5568 || x == hard_frame_pointer_rtx
5569 || x == arg_pointer_rtx)));
5572 /* Return nonzero if x is a legitimate index register. This is the case
5573 for any base register that can access a QImode object. */
5574 inline static int
5575 thumb1_index_register_rtx_p (rtx x, int strict_p)
5577 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5580 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5582 The AP may be eliminated to either the SP or the FP, so we use the
5583 least common denominator, e.g. SImode, and offsets from 0 to 64.
5585 ??? Verify whether the above is the right approach.
5587 ??? Also, the FP may be eliminated to the SP, so perhaps that
5588 needs special handling also.
5590 ??? Look at how the mips16 port solves this problem. It probably uses
5591 better ways to solve some of these problems.
5593 Although it is not incorrect, we don't accept QImode and HImode
5594 addresses based on the frame pointer or arg pointer until the
5595 reload pass starts. This is so that eliminating such addresses
5596 into stack based ones won't produce impossible code. */
5597 static int
5598 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5600 /* ??? Not clear if this is right. Experiment. */
5601 if (GET_MODE_SIZE (mode) < 4
5602 && !(reload_in_progress || reload_completed)
5603 && (reg_mentioned_p (frame_pointer_rtx, x)
5604 || reg_mentioned_p (arg_pointer_rtx, x)
5605 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5606 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5607 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5608 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5609 return 0;
5611 /* Accept any base register. SP only in SImode or larger. */
5612 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5613 return 1;
5615 /* This is PC relative data before arm_reorg runs. */
5616 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5617 && GET_CODE (x) == SYMBOL_REF
5618 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5619 return 1;
5621 /* This is PC relative data after arm_reorg runs. */
5622 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5623 && reload_completed
5624 && (GET_CODE (x) == LABEL_REF
5625 || (GET_CODE (x) == CONST
5626 && GET_CODE (XEXP (x, 0)) == PLUS
5627 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5628 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5629 return 1;
5631 /* Post-inc indexing only supported for SImode and larger. */
5632 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5633 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5634 return 1;
5636 else if (GET_CODE (x) == PLUS)
5638 /* REG+REG address can be any two index registers. */
5639 /* We disallow FRAME+REG addressing since we know that FRAME
5640 will be replaced with STACK, and SP relative addressing only
5641 permits SP+OFFSET. */
5642 if (GET_MODE_SIZE (mode) <= 4
5643 && XEXP (x, 0) != frame_pointer_rtx
5644 && XEXP (x, 1) != frame_pointer_rtx
5645 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5646 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5647 return 1;
5649 /* REG+const has 5-7 bit offset for non-SP registers. */
5650 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5651 || XEXP (x, 0) == arg_pointer_rtx)
5652 && GET_CODE (XEXP (x, 1)) == CONST_INT
5653 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5654 return 1;
5656 /* REG+const has 10-bit offset for SP, but only SImode and
5657 larger is supported. */
5658 /* ??? Should probably check for DI/DFmode overflow here
5659 just like GO_IF_LEGITIMATE_OFFSET does. */
5660 else if (GET_CODE (XEXP (x, 0)) == REG
5661 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5662 && GET_MODE_SIZE (mode) >= 4
5663 && GET_CODE (XEXP (x, 1)) == CONST_INT
5664 && INTVAL (XEXP (x, 1)) >= 0
5665 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5666 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5667 return 1;
5669 else if (GET_CODE (XEXP (x, 0)) == REG
5670 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5671 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5672 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5673 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5674 && GET_MODE_SIZE (mode) >= 4
5675 && GET_CODE (XEXP (x, 1)) == CONST_INT
5676 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5677 return 1;
5680 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5681 && GET_MODE_SIZE (mode) == 4
5682 && GET_CODE (x) == SYMBOL_REF
5683 && CONSTANT_POOL_ADDRESS_P (x)
5684 && ! (flag_pic
5685 && symbol_mentioned_p (get_pool_constant (x))
5686 && ! pcrel_constant_p (get_pool_constant (x))))
5687 return 1;
5689 return 0;
5692 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5693 instruction of mode MODE. */
5695 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5697 switch (GET_MODE_SIZE (mode))
5699 case 1:
5700 return val >= 0 && val < 32;
5702 case 2:
5703 return val >= 0 && val < 64 && (val & 1) == 0;
5705 default:
5706 return (val >= 0
5707 && (val + GET_MODE_SIZE (mode)) <= 128
5708 && (val & 3) == 0);
5712 bool
5713 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5715 if (TARGET_ARM)
5716 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5717 else if (TARGET_THUMB2)
5718 return thumb2_legitimate_address_p (mode, x, strict_p);
5719 else /* if (TARGET_THUMB1) */
5720 return thumb1_legitimate_address_p (mode, x, strict_p);
5723 /* Build the SYMBOL_REF for __tls_get_addr. */
5725 static GTY(()) rtx tls_get_addr_libfunc;
5727 static rtx
5728 get_tls_get_addr (void)
5730 if (!tls_get_addr_libfunc)
5731 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5732 return tls_get_addr_libfunc;
5735 static rtx
5736 arm_load_tp (rtx target)
5738 if (!target)
5739 target = gen_reg_rtx (SImode);
5741 if (TARGET_HARD_TP)
5743 /* Can return in any reg. */
5744 emit_insn (gen_load_tp_hard (target));
5746 else
5748 /* Always returned in r0. Immediately copy the result into a pseudo,
5749 otherwise other uses of r0 (e.g. setting up function arguments) may
5750 clobber the value. */
5752 rtx tmp;
5754 emit_insn (gen_load_tp_soft ());
5756 tmp = gen_rtx_REG (SImode, 0);
5757 emit_move_insn (target, tmp);
5759 return target;
5762 static rtx
5763 load_tls_operand (rtx x, rtx reg)
5765 rtx tmp;
5767 if (reg == NULL_RTX)
5768 reg = gen_reg_rtx (SImode);
5770 tmp = gen_rtx_CONST (SImode, x);
5772 emit_move_insn (reg, tmp);
5774 return reg;
5777 static rtx
5778 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5780 rtx insns, label, labelno, sum;
5782 start_sequence ();
5784 labelno = GEN_INT (pic_labelno++);
5785 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5786 label = gen_rtx_CONST (VOIDmode, label);
5788 sum = gen_rtx_UNSPEC (Pmode,
5789 gen_rtvec (4, x, GEN_INT (reloc), label,
5790 GEN_INT (TARGET_ARM ? 8 : 4)),
5791 UNSPEC_TLS);
5792 reg = load_tls_operand (sum, reg);
5794 if (TARGET_ARM)
5795 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5796 else if (TARGET_THUMB2)
5797 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5798 else /* TARGET_THUMB1 */
5799 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5801 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5802 Pmode, 1, reg, Pmode);
5804 insns = get_insns ();
5805 end_sequence ();
5807 return insns;
5811 legitimize_tls_address (rtx x, rtx reg)
5813 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5814 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5816 switch (model)
5818 case TLS_MODEL_GLOBAL_DYNAMIC:
5819 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5820 dest = gen_reg_rtx (Pmode);
5821 emit_libcall_block (insns, dest, ret, x);
5822 return dest;
5824 case TLS_MODEL_LOCAL_DYNAMIC:
5825 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5827 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5828 share the LDM result with other LD model accesses. */
5829 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5830 UNSPEC_TLS);
5831 dest = gen_reg_rtx (Pmode);
5832 emit_libcall_block (insns, dest, ret, eqv);
5834 /* Load the addend. */
5835 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5836 UNSPEC_TLS);
5837 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5838 return gen_rtx_PLUS (Pmode, dest, addend);
5840 case TLS_MODEL_INITIAL_EXEC:
5841 labelno = GEN_INT (pic_labelno++);
5842 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5843 label = gen_rtx_CONST (VOIDmode, label);
5844 sum = gen_rtx_UNSPEC (Pmode,
5845 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5846 GEN_INT (TARGET_ARM ? 8 : 4)),
5847 UNSPEC_TLS);
5848 reg = load_tls_operand (sum, reg);
5850 if (TARGET_ARM)
5851 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5852 else if (TARGET_THUMB2)
5853 emit_insn (gen_tls_load_dot_plus_four (reg, reg, labelno));
5854 else
5856 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5857 emit_move_insn (reg, gen_const_mem (SImode, reg));
5860 tp = arm_load_tp (NULL_RTX);
5862 return gen_rtx_PLUS (Pmode, tp, reg);
5864 case TLS_MODEL_LOCAL_EXEC:
5865 tp = arm_load_tp (NULL_RTX);
5867 reg = gen_rtx_UNSPEC (Pmode,
5868 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5869 UNSPEC_TLS);
5870 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5872 return gen_rtx_PLUS (Pmode, tp, reg);
5874 default:
5875 abort ();
5879 /* Try machine-dependent ways of modifying an illegitimate address
5880 to be legitimate. If we find one, return the new, valid address. */
5882 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5884 if (!TARGET_ARM)
5886 /* TODO: legitimize_address for Thumb2. */
5887 if (TARGET_THUMB2)
5888 return x;
5889 return thumb_legitimize_address (x, orig_x, mode);
5892 if (arm_tls_symbol_p (x))
5893 return legitimize_tls_address (x, NULL_RTX);
5895 if (GET_CODE (x) == PLUS)
5897 rtx xop0 = XEXP (x, 0);
5898 rtx xop1 = XEXP (x, 1);
5900 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5901 xop0 = force_reg (SImode, xop0);
5903 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5904 xop1 = force_reg (SImode, xop1);
5906 if (ARM_BASE_REGISTER_RTX_P (xop0)
5907 && GET_CODE (xop1) == CONST_INT)
5909 HOST_WIDE_INT n, low_n;
5910 rtx base_reg, val;
5911 n = INTVAL (xop1);
5913 /* VFP addressing modes actually allow greater offsets, but for
5914 now we just stick with the lowest common denominator. */
5915 if (mode == DImode
5916 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5918 low_n = n & 0x0f;
5919 n &= ~0x0f;
5920 if (low_n > 4)
5922 n += 16;
5923 low_n -= 16;
5926 else
5928 low_n = ((mode) == TImode ? 0
5929 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5930 n -= low_n;
5933 base_reg = gen_reg_rtx (SImode);
5934 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5935 emit_move_insn (base_reg, val);
5936 x = plus_constant (base_reg, low_n);
5938 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5939 x = gen_rtx_PLUS (SImode, xop0, xop1);
5942 /* XXX We don't allow MINUS any more -- see comment in
5943 arm_legitimate_address_outer_p (). */
5944 else if (GET_CODE (x) == MINUS)
5946 rtx xop0 = XEXP (x, 0);
5947 rtx xop1 = XEXP (x, 1);
5949 if (CONSTANT_P (xop0))
5950 xop0 = force_reg (SImode, xop0);
5952 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5953 xop1 = force_reg (SImode, xop1);
5955 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5956 x = gen_rtx_MINUS (SImode, xop0, xop1);
5959 /* Make sure to take full advantage of the pre-indexed addressing mode
5960 with absolute addresses which often allows for the base register to
5961 be factorized for multiple adjacent memory references, and it might
5962 even allows for the mini pool to be avoided entirely. */
5963 else if (GET_CODE (x) == CONST_INT && optimize > 0)
5965 unsigned int bits;
5966 HOST_WIDE_INT mask, base, index;
5967 rtx base_reg;
5969 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5970 use a 8-bit index. So let's use a 12-bit index for SImode only and
5971 hope that arm_gen_constant will enable ldrb to use more bits. */
5972 bits = (mode == SImode) ? 12 : 8;
5973 mask = (1 << bits) - 1;
5974 base = INTVAL (x) & ~mask;
5975 index = INTVAL (x) & mask;
5976 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
5978 /* It'll most probably be more efficient to generate the base
5979 with more bits set and use a negative index instead. */
5980 base |= mask;
5981 index -= mask;
5983 base_reg = force_reg (SImode, GEN_INT (base));
5984 x = plus_constant (base_reg, index);
5987 if (flag_pic)
5989 /* We need to find and carefully transform any SYMBOL and LABEL
5990 references; so go back to the original address expression. */
5991 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
5993 if (new_x != orig_x)
5994 x = new_x;
5997 return x;
6001 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6002 to be legitimate. If we find one, return the new, valid address. */
6004 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6006 if (arm_tls_symbol_p (x))
6007 return legitimize_tls_address (x, NULL_RTX);
6009 if (GET_CODE (x) == PLUS
6010 && GET_CODE (XEXP (x, 1)) == CONST_INT
6011 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6012 || INTVAL (XEXP (x, 1)) < 0))
6014 rtx xop0 = XEXP (x, 0);
6015 rtx xop1 = XEXP (x, 1);
6016 HOST_WIDE_INT offset = INTVAL (xop1);
6018 /* Try and fold the offset into a biasing of the base register and
6019 then offsetting that. Don't do this when optimizing for space
6020 since it can cause too many CSEs. */
6021 if (optimize_size && offset >= 0
6022 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6024 HOST_WIDE_INT delta;
6026 if (offset >= 256)
6027 delta = offset - (256 - GET_MODE_SIZE (mode));
6028 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6029 delta = 31 * GET_MODE_SIZE (mode);
6030 else
6031 delta = offset & (~31 * GET_MODE_SIZE (mode));
6033 xop0 = force_operand (plus_constant (xop0, offset - delta),
6034 NULL_RTX);
6035 x = plus_constant (xop0, delta);
6037 else if (offset < 0 && offset > -256)
6038 /* Small negative offsets are best done with a subtract before the
6039 dereference, forcing these into a register normally takes two
6040 instructions. */
6041 x = force_operand (x, NULL_RTX);
6042 else
6044 /* For the remaining cases, force the constant into a register. */
6045 xop1 = force_reg (SImode, xop1);
6046 x = gen_rtx_PLUS (SImode, xop0, xop1);
6049 else if (GET_CODE (x) == PLUS
6050 && s_register_operand (XEXP (x, 1), SImode)
6051 && !s_register_operand (XEXP (x, 0), SImode))
6053 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6055 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6058 if (flag_pic)
6060 /* We need to find and carefully transform any SYMBOL and LABEL
6061 references; so go back to the original address expression. */
6062 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6064 if (new_x != orig_x)
6065 x = new_x;
6068 return x;
6072 thumb_legitimize_reload_address (rtx *x_p,
6073 enum machine_mode mode,
6074 int opnum, int type,
6075 int ind_levels ATTRIBUTE_UNUSED)
6077 rtx x = *x_p;
6079 if (GET_CODE (x) == PLUS
6080 && GET_MODE_SIZE (mode) < 4
6081 && REG_P (XEXP (x, 0))
6082 && XEXP (x, 0) == stack_pointer_rtx
6083 && GET_CODE (XEXP (x, 1)) == CONST_INT
6084 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6086 rtx orig_x = x;
6088 x = copy_rtx (x);
6089 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6090 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6091 return x;
6094 /* If both registers are hi-regs, then it's better to reload the
6095 entire expression rather than each register individually. That
6096 only requires one reload register rather than two. */
6097 if (GET_CODE (x) == PLUS
6098 && REG_P (XEXP (x, 0))
6099 && REG_P (XEXP (x, 1))
6100 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6101 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6103 rtx orig_x = x;
6105 x = copy_rtx (x);
6106 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6107 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6108 return x;
6111 return NULL;
6114 /* Test for various thread-local symbols. */
6116 /* Return TRUE if X is a thread-local symbol. */
6118 static bool
6119 arm_tls_symbol_p (rtx x)
6121 if (! TARGET_HAVE_TLS)
6122 return false;
6124 if (GET_CODE (x) != SYMBOL_REF)
6125 return false;
6127 return SYMBOL_REF_TLS_MODEL (x) != 0;
6130 /* Helper for arm_tls_referenced_p. */
6132 static int
6133 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6135 if (GET_CODE (*x) == SYMBOL_REF)
6136 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6138 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6139 TLS offsets, not real symbol references. */
6140 if (GET_CODE (*x) == UNSPEC
6141 && XINT (*x, 1) == UNSPEC_TLS)
6142 return -1;
6144 return 0;
6147 /* Return TRUE if X contains any TLS symbol references. */
6149 bool
6150 arm_tls_referenced_p (rtx x)
6152 if (! TARGET_HAVE_TLS)
6153 return false;
6155 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6158 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6160 bool
6161 arm_cannot_force_const_mem (rtx x)
6163 rtx base, offset;
6165 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6167 split_const (x, &base, &offset);
6168 if (GET_CODE (base) == SYMBOL_REF
6169 && !offset_within_block_p (base, INTVAL (offset)))
6170 return true;
6172 return arm_tls_referenced_p (x);
6175 #define REG_OR_SUBREG_REG(X) \
6176 (GET_CODE (X) == REG \
6177 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6179 #define REG_OR_SUBREG_RTX(X) \
6180 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6182 #ifndef COSTS_N_INSNS
6183 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6184 #endif
6185 static inline int
6186 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6188 enum machine_mode mode = GET_MODE (x);
6190 switch (code)
6192 case ASHIFT:
6193 case ASHIFTRT:
6194 case LSHIFTRT:
6195 case ROTATERT:
6196 case PLUS:
6197 case MINUS:
6198 case COMPARE:
6199 case NEG:
6200 case NOT:
6201 return COSTS_N_INSNS (1);
6203 case MULT:
6204 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6206 int cycles = 0;
6207 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6209 while (i)
6211 i >>= 2;
6212 cycles++;
6214 return COSTS_N_INSNS (2) + cycles;
6216 return COSTS_N_INSNS (1) + 16;
6218 case SET:
6219 return (COSTS_N_INSNS (1)
6220 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6221 + GET_CODE (SET_DEST (x)) == MEM));
6223 case CONST_INT:
6224 if (outer == SET)
6226 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6227 return 0;
6228 if (thumb_shiftable_const (INTVAL (x)))
6229 return COSTS_N_INSNS (2);
6230 return COSTS_N_INSNS (3);
6232 else if ((outer == PLUS || outer == COMPARE)
6233 && INTVAL (x) < 256 && INTVAL (x) > -256)
6234 return 0;
6235 else if ((outer == IOR || outer == XOR || outer == AND)
6236 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6237 return COSTS_N_INSNS (1);
6238 else if (outer == ASHIFT || outer == ASHIFTRT
6239 || outer == LSHIFTRT)
6240 return 0;
6241 return COSTS_N_INSNS (2);
6243 case CONST:
6244 case CONST_DOUBLE:
6245 case LABEL_REF:
6246 case SYMBOL_REF:
6247 return COSTS_N_INSNS (3);
6249 case UDIV:
6250 case UMOD:
6251 case DIV:
6252 case MOD:
6253 return 100;
6255 case TRUNCATE:
6256 return 99;
6258 case AND:
6259 case XOR:
6260 case IOR:
6261 /* XXX guess. */
6262 return 8;
6264 case MEM:
6265 /* XXX another guess. */
6266 /* Memory costs quite a lot for the first word, but subsequent words
6267 load at the equivalent of a single insn each. */
6268 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6269 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6270 ? 4 : 0));
6272 case IF_THEN_ELSE:
6273 /* XXX a guess. */
6274 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6275 return 14;
6276 return 2;
6278 case ZERO_EXTEND:
6279 /* XXX still guessing. */
6280 switch (GET_MODE (XEXP (x, 0)))
6282 case QImode:
6283 return (1 + (mode == DImode ? 4 : 0)
6284 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6286 case HImode:
6287 return (4 + (mode == DImode ? 4 : 0)
6288 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6290 case SImode:
6291 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6293 default:
6294 return 99;
6297 default:
6298 return 99;
6302 static inline bool
6303 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6305 enum machine_mode mode = GET_MODE (x);
6306 enum rtx_code subcode;
6307 rtx operand;
6308 enum rtx_code code = GET_CODE (x);
6309 int extra_cost;
6310 *total = 0;
6312 switch (code)
6314 case MEM:
6315 /* Memory costs quite a lot for the first word, but subsequent words
6316 load at the equivalent of a single insn each. */
6317 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6318 return true;
6320 case DIV:
6321 case MOD:
6322 case UDIV:
6323 case UMOD:
6324 if (TARGET_HARD_FLOAT && mode == SFmode)
6325 *total = COSTS_N_INSNS (2);
6326 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6327 *total = COSTS_N_INSNS (4);
6328 else
6329 *total = COSTS_N_INSNS (20);
6330 return false;
6332 case ROTATE:
6333 if (GET_CODE (XEXP (x, 1)) == REG)
6334 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6335 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6336 *total = rtx_cost (XEXP (x, 1), code, speed);
6338 /* Fall through */
6339 case ROTATERT:
6340 if (mode != SImode)
6342 *total += COSTS_N_INSNS (4);
6343 return true;
6346 /* Fall through */
6347 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6348 *total += rtx_cost (XEXP (x, 0), code, speed);
6349 if (mode == DImode)
6351 *total += COSTS_N_INSNS (3);
6352 return true;
6355 *total += COSTS_N_INSNS (1);
6356 /* Increase the cost of complex shifts because they aren't any faster,
6357 and reduce dual issue opportunities. */
6358 if (arm_tune_cortex_a9
6359 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6360 ++*total;
6362 return true;
6364 case MINUS:
6365 if (TARGET_THUMB2)
6367 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6369 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6370 *total = COSTS_N_INSNS (1);
6371 else
6372 *total = COSTS_N_INSNS (20);
6374 else
6375 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6376 /* Thumb2 does not have RSB, so all arguments must be
6377 registers (subtracting a constant is canonicalized as
6378 addition of the negated constant). */
6379 return false;
6382 if (mode == DImode)
6384 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6385 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6386 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6388 *total += rtx_cost (XEXP (x, 1), code, speed);
6389 return true;
6392 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6393 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6395 *total += rtx_cost (XEXP (x, 0), code, speed);
6396 return true;
6399 return false;
6402 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6404 if (TARGET_HARD_FLOAT
6405 && (mode == SFmode
6406 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6408 *total = COSTS_N_INSNS (1);
6409 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6410 && arm_const_double_rtx (XEXP (x, 0)))
6412 *total += rtx_cost (XEXP (x, 1), code, speed);
6413 return true;
6416 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6417 && arm_const_double_rtx (XEXP (x, 1)))
6419 *total += rtx_cost (XEXP (x, 0), code, speed);
6420 return true;
6423 return false;
6425 *total = COSTS_N_INSNS (20);
6426 return false;
6429 *total = COSTS_N_INSNS (1);
6430 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6431 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6433 *total += rtx_cost (XEXP (x, 1), code, speed);
6434 return true;
6437 subcode = GET_CODE (XEXP (x, 1));
6438 if (subcode == ASHIFT || subcode == ASHIFTRT
6439 || subcode == LSHIFTRT
6440 || subcode == ROTATE || subcode == ROTATERT)
6442 *total += rtx_cost (XEXP (x, 0), code, speed);
6443 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6444 return true;
6447 /* A shift as a part of RSB costs no more than RSB itself. */
6448 if (GET_CODE (XEXP (x, 0)) == MULT
6449 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6451 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6452 *total += rtx_cost (XEXP (x, 1), code, speed);
6453 return true;
6456 if (subcode == MULT
6457 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6459 *total += rtx_cost (XEXP (x, 0), code, speed);
6460 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6461 return true;
6464 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6465 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6467 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6468 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6469 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6470 *total += COSTS_N_INSNS (1);
6472 return true;
6475 /* Fall through */
6477 case PLUS:
6478 if (code == PLUS && arm_arch6 && mode == SImode
6479 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6480 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6482 *total = COSTS_N_INSNS (1);
6483 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6484 speed);
6485 *total += rtx_cost (XEXP (x, 1), code, speed);
6486 return true;
6489 /* MLA: All arguments must be registers. We filter out
6490 multiplication by a power of two, so that we fall down into
6491 the code below. */
6492 if (GET_CODE (XEXP (x, 0)) == MULT
6493 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6495 /* The cost comes from the cost of the multiply. */
6496 return false;
6499 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6501 if (TARGET_HARD_FLOAT
6502 && (mode == SFmode
6503 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6505 *total = COSTS_N_INSNS (1);
6506 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6507 && arm_const_double_rtx (XEXP (x, 1)))
6509 *total += rtx_cost (XEXP (x, 0), code, speed);
6510 return true;
6513 return false;
6516 *total = COSTS_N_INSNS (20);
6517 return false;
6520 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6521 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6523 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6524 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6525 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6526 *total += COSTS_N_INSNS (1);
6527 return true;
6530 /* Fall through */
6532 case AND: case XOR: case IOR:
6533 extra_cost = 0;
6535 /* Normally the frame registers will be spilt into reg+const during
6536 reload, so it is a bad idea to combine them with other instructions,
6537 since then they might not be moved outside of loops. As a compromise
6538 we allow integration with ops that have a constant as their second
6539 operand. */
6540 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6541 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6542 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6543 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6544 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6545 *total = 4;
6547 if (mode == DImode)
6549 *total += COSTS_N_INSNS (2);
6550 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6551 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6553 *total += rtx_cost (XEXP (x, 0), code, speed);
6554 return true;
6557 return false;
6560 *total += COSTS_N_INSNS (1);
6561 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6562 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6564 *total += rtx_cost (XEXP (x, 0), code, speed);
6565 return true;
6567 subcode = GET_CODE (XEXP (x, 0));
6568 if (subcode == ASHIFT || subcode == ASHIFTRT
6569 || subcode == LSHIFTRT
6570 || subcode == ROTATE || subcode == ROTATERT)
6572 *total += rtx_cost (XEXP (x, 1), code, speed);
6573 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6574 return true;
6577 if (subcode == MULT
6578 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6580 *total += rtx_cost (XEXP (x, 1), code, speed);
6581 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6582 return true;
6585 if (subcode == UMIN || subcode == UMAX
6586 || subcode == SMIN || subcode == SMAX)
6588 *total = COSTS_N_INSNS (3);
6589 return true;
6592 return false;
6594 case MULT:
6595 /* This should have been handled by the CPU specific routines. */
6596 gcc_unreachable ();
6598 case TRUNCATE:
6599 if (arm_arch3m && mode == SImode
6600 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6601 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6602 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6603 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6604 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6605 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6607 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6608 return true;
6610 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6611 return false;
6613 case NEG:
6614 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6616 if (TARGET_HARD_FLOAT
6617 && (mode == SFmode
6618 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6620 *total = COSTS_N_INSNS (1);
6621 return false;
6623 *total = COSTS_N_INSNS (2);
6624 return false;
6627 /* Fall through */
6628 case NOT:
6629 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6630 if (mode == SImode && code == NOT)
6632 subcode = GET_CODE (XEXP (x, 0));
6633 if (subcode == ASHIFT || subcode == ASHIFTRT
6634 || subcode == LSHIFTRT
6635 || subcode == ROTATE || subcode == ROTATERT
6636 || (subcode == MULT
6637 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6639 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6640 /* Register shifts cost an extra cycle. */
6641 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6642 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6643 subcode, speed);
6644 return true;
6648 return false;
6650 case IF_THEN_ELSE:
6651 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6653 *total = COSTS_N_INSNS (4);
6654 return true;
6657 operand = XEXP (x, 0);
6659 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6660 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6661 && GET_CODE (XEXP (operand, 0)) == REG
6662 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6663 *total += COSTS_N_INSNS (1);
6664 *total += (rtx_cost (XEXP (x, 1), code, speed)
6665 + rtx_cost (XEXP (x, 2), code, speed));
6666 return true;
6668 case NE:
6669 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6671 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6672 return true;
6674 goto scc_insn;
6676 case GE:
6677 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6678 && mode == SImode && XEXP (x, 1) == const0_rtx)
6680 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6681 return true;
6683 goto scc_insn;
6685 case LT:
6686 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6687 && mode == SImode && XEXP (x, 1) == const0_rtx)
6689 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6690 return true;
6692 goto scc_insn;
6694 case EQ:
6695 case GT:
6696 case LE:
6697 case GEU:
6698 case LTU:
6699 case GTU:
6700 case LEU:
6701 case UNORDERED:
6702 case ORDERED:
6703 case UNEQ:
6704 case UNGE:
6705 case UNLT:
6706 case UNGT:
6707 case UNLE:
6708 scc_insn:
6709 /* SCC insns. In the case where the comparison has already been
6710 performed, then they cost 2 instructions. Otherwise they need
6711 an additional comparison before them. */
6712 *total = COSTS_N_INSNS (2);
6713 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6715 return true;
6718 /* Fall through */
6719 case COMPARE:
6720 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6722 *total = 0;
6723 return true;
6726 *total += COSTS_N_INSNS (1);
6727 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6728 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6730 *total += rtx_cost (XEXP (x, 0), code, speed);
6731 return true;
6734 subcode = GET_CODE (XEXP (x, 0));
6735 if (subcode == ASHIFT || subcode == ASHIFTRT
6736 || subcode == LSHIFTRT
6737 || subcode == ROTATE || subcode == ROTATERT)
6739 *total += rtx_cost (XEXP (x, 1), code, speed);
6740 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6741 return true;
6744 if (subcode == MULT
6745 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6747 *total += rtx_cost (XEXP (x, 1), code, speed);
6748 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6749 return true;
6752 return false;
6754 case UMIN:
6755 case UMAX:
6756 case SMIN:
6757 case SMAX:
6758 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6759 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6760 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6761 *total += rtx_cost (XEXP (x, 1), code, speed);
6762 return true;
6764 case ABS:
6765 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6767 if (TARGET_HARD_FLOAT
6768 && (mode == SFmode
6769 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6771 *total = COSTS_N_INSNS (1);
6772 return false;
6774 *total = COSTS_N_INSNS (20);
6775 return false;
6777 *total = COSTS_N_INSNS (1);
6778 if (mode == DImode)
6779 *total += COSTS_N_INSNS (3);
6780 return false;
6782 case SIGN_EXTEND:
6783 if (GET_MODE_CLASS (mode) == MODE_INT)
6785 *total = 0;
6786 if (mode == DImode)
6787 *total += COSTS_N_INSNS (1);
6789 if (GET_MODE (XEXP (x, 0)) != SImode)
6791 if (arm_arch6)
6793 if (GET_CODE (XEXP (x, 0)) != MEM)
6794 *total += COSTS_N_INSNS (1);
6796 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6797 *total += COSTS_N_INSNS (2);
6800 return false;
6803 /* Fall through */
6804 case ZERO_EXTEND:
6805 *total = 0;
6806 if (GET_MODE_CLASS (mode) == MODE_INT)
6808 if (mode == DImode)
6809 *total += COSTS_N_INSNS (1);
6811 if (GET_MODE (XEXP (x, 0)) != SImode)
6813 if (arm_arch6)
6815 if (GET_CODE (XEXP (x, 0)) != MEM)
6816 *total += COSTS_N_INSNS (1);
6818 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6819 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6820 1 : 2);
6823 return false;
6826 switch (GET_MODE (XEXP (x, 0)))
6828 case V8QImode:
6829 case V4HImode:
6830 case V2SImode:
6831 case V4QImode:
6832 case V2HImode:
6833 *total = COSTS_N_INSNS (1);
6834 return false;
6836 default:
6837 gcc_unreachable ();
6839 gcc_unreachable ();
6841 case ZERO_EXTRACT:
6842 case SIGN_EXTRACT:
6843 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6844 return true;
6846 case CONST_INT:
6847 if (const_ok_for_arm (INTVAL (x))
6848 || const_ok_for_arm (~INTVAL (x)))
6849 *total = COSTS_N_INSNS (1);
6850 else
6851 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6852 INTVAL (x), NULL_RTX,
6853 NULL_RTX, 0, 0));
6854 return true;
6856 case CONST:
6857 case LABEL_REF:
6858 case SYMBOL_REF:
6859 *total = COSTS_N_INSNS (3);
6860 return true;
6862 case HIGH:
6863 *total = COSTS_N_INSNS (1);
6864 return true;
6866 case LO_SUM:
6867 *total = COSTS_N_INSNS (1);
6868 *total += rtx_cost (XEXP (x, 0), code, speed);
6869 return true;
6871 case CONST_DOUBLE:
6872 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6873 && (mode == SFmode || !TARGET_VFP_SINGLE))
6874 *total = COSTS_N_INSNS (1);
6875 else
6876 *total = COSTS_N_INSNS (4);
6877 return true;
6879 default:
6880 *total = COSTS_N_INSNS (4);
6881 return false;
6885 /* RTX costs when optimizing for size. */
6886 static bool
6887 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6888 int *total)
6890 enum machine_mode mode = GET_MODE (x);
6891 if (TARGET_THUMB1)
6893 /* XXX TBD. For now, use the standard costs. */
6894 *total = thumb1_rtx_costs (x, code, outer_code);
6895 return true;
6898 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
6899 switch (code)
6901 case MEM:
6902 /* A memory access costs 1 insn if the mode is small, or the address is
6903 a single register, otherwise it costs one insn per word. */
6904 if (REG_P (XEXP (x, 0)))
6905 *total = COSTS_N_INSNS (1);
6906 else
6907 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6908 return true;
6910 case DIV:
6911 case MOD:
6912 case UDIV:
6913 case UMOD:
6914 /* Needs a libcall, so it costs about this. */
6915 *total = COSTS_N_INSNS (2);
6916 return false;
6918 case ROTATE:
6919 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
6921 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
6922 return true;
6924 /* Fall through */
6925 case ROTATERT:
6926 case ASHIFT:
6927 case LSHIFTRT:
6928 case ASHIFTRT:
6929 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
6931 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
6932 return true;
6934 else if (mode == SImode)
6936 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
6937 /* Slightly disparage register shifts, but not by much. */
6938 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6939 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
6940 return true;
6943 /* Needs a libcall. */
6944 *total = COSTS_N_INSNS (2);
6945 return false;
6947 case MINUS:
6948 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
6949 && (mode == SFmode || !TARGET_VFP_SINGLE))
6951 *total = COSTS_N_INSNS (1);
6952 return false;
6955 if (mode == SImode)
6957 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
6958 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
6960 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
6961 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
6962 || subcode1 == ROTATE || subcode1 == ROTATERT
6963 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
6964 || subcode1 == ASHIFTRT)
6966 /* It's just the cost of the two operands. */
6967 *total = 0;
6968 return false;
6971 *total = COSTS_N_INSNS (1);
6972 return false;
6975 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6976 return false;
6978 case PLUS:
6979 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
6980 && (mode == SFmode || !TARGET_VFP_SINGLE))
6982 *total = COSTS_N_INSNS (1);
6983 return false;
6986 /* A shift as a part of ADD costs nothing. */
6987 if (GET_CODE (XEXP (x, 0)) == MULT
6988 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6990 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
6991 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
6992 *total += rtx_cost (XEXP (x, 1), code, false);
6993 return true;
6996 /* Fall through */
6997 case AND: case XOR: case IOR:
6998 if (mode == SImode)
7000 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7002 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7003 || subcode == LSHIFTRT || subcode == ASHIFTRT
7004 || (code == AND && subcode == NOT))
7006 /* It's just the cost of the two operands. */
7007 *total = 0;
7008 return false;
7012 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7013 return false;
7015 case MULT:
7016 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7017 return false;
7019 case NEG:
7020 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7021 && (mode == SFmode || !TARGET_VFP_SINGLE))
7023 *total = COSTS_N_INSNS (1);
7024 return false;
7027 /* Fall through */
7028 case NOT:
7029 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7031 return false;
7033 case IF_THEN_ELSE:
7034 *total = 0;
7035 return false;
7037 case COMPARE:
7038 if (cc_register (XEXP (x, 0), VOIDmode))
7039 * total = 0;
7040 else
7041 *total = COSTS_N_INSNS (1);
7042 return false;
7044 case ABS:
7045 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7046 && (mode == SFmode || !TARGET_VFP_SINGLE))
7047 *total = COSTS_N_INSNS (1);
7048 else
7049 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7050 return false;
7052 case SIGN_EXTEND:
7053 *total = 0;
7054 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7056 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7057 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7059 if (mode == DImode)
7060 *total += COSTS_N_INSNS (1);
7061 return false;
7063 case ZERO_EXTEND:
7064 *total = 0;
7065 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7067 switch (GET_MODE (XEXP (x, 0)))
7069 case QImode:
7070 *total += COSTS_N_INSNS (1);
7071 break;
7073 case HImode:
7074 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7076 case SImode:
7077 break;
7079 default:
7080 *total += COSTS_N_INSNS (2);
7084 if (mode == DImode)
7085 *total += COSTS_N_INSNS (1);
7087 return false;
7089 case CONST_INT:
7090 if (const_ok_for_arm (INTVAL (x)))
7091 /* A multiplication by a constant requires another instruction
7092 to load the constant to a register. */
7093 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7094 ? 1 : 0);
7095 else if (const_ok_for_arm (~INTVAL (x)))
7096 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7097 else if (const_ok_for_arm (-INTVAL (x)))
7099 if (outer_code == COMPARE || outer_code == PLUS
7100 || outer_code == MINUS)
7101 *total = 0;
7102 else
7103 *total = COSTS_N_INSNS (1);
7105 else
7106 *total = COSTS_N_INSNS (2);
7107 return true;
7109 case CONST:
7110 case LABEL_REF:
7111 case SYMBOL_REF:
7112 *total = COSTS_N_INSNS (2);
7113 return true;
7115 case CONST_DOUBLE:
7116 *total = COSTS_N_INSNS (4);
7117 return true;
7119 case HIGH:
7120 case LO_SUM:
7121 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7122 cost of these slightly. */
7123 *total = COSTS_N_INSNS (1) + 1;
7124 return true;
7126 default:
7127 if (mode != VOIDmode)
7128 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7129 else
7130 *total = COSTS_N_INSNS (4); /* How knows? */
7131 return false;
7135 /* RTX costs when optimizing for size. */
7136 static bool
7137 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7138 bool speed)
7140 if (!speed)
7141 return arm_size_rtx_costs (x, (enum rtx_code) code,
7142 (enum rtx_code) outer_code, total);
7143 else
7144 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
7145 (enum rtx_code) outer_code,
7146 total, speed);
7149 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7150 supported on any "slowmul" cores, so it can be ignored. */
7152 static bool
7153 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7154 int *total, bool speed)
7156 enum machine_mode mode = GET_MODE (x);
7158 if (TARGET_THUMB)
7160 *total = thumb1_rtx_costs (x, code, outer_code);
7161 return true;
7164 switch (code)
7166 case MULT:
7167 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7168 || mode == DImode)
7170 *total = COSTS_N_INSNS (20);
7171 return false;
7174 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7176 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7177 & (unsigned HOST_WIDE_INT) 0xffffffff);
7178 int cost, const_ok = const_ok_for_arm (i);
7179 int j, booth_unit_size;
7181 /* Tune as appropriate. */
7182 cost = const_ok ? 4 : 8;
7183 booth_unit_size = 2;
7184 for (j = 0; i && j < 32; j += booth_unit_size)
7186 i >>= booth_unit_size;
7187 cost++;
7190 *total = COSTS_N_INSNS (cost);
7191 *total += rtx_cost (XEXP (x, 0), code, speed);
7192 return true;
7195 *total = COSTS_N_INSNS (20);
7196 return false;
7198 default:
7199 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7204 /* RTX cost for cores with a fast multiply unit (M variants). */
7206 static bool
7207 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7208 int *total, bool speed)
7210 enum machine_mode mode = GET_MODE (x);
7212 if (TARGET_THUMB1)
7214 *total = thumb1_rtx_costs (x, code, outer_code);
7215 return true;
7218 /* ??? should thumb2 use different costs? */
7219 switch (code)
7221 case MULT:
7222 /* There is no point basing this on the tuning, since it is always the
7223 fast variant if it exists at all. */
7224 if (mode == DImode
7225 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7226 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7227 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7229 *total = COSTS_N_INSNS(2);
7230 return false;
7234 if (mode == DImode)
7236 *total = COSTS_N_INSNS (5);
7237 return false;
7240 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7242 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7243 & (unsigned HOST_WIDE_INT) 0xffffffff);
7244 int cost, const_ok = const_ok_for_arm (i);
7245 int j, booth_unit_size;
7247 /* Tune as appropriate. */
7248 cost = const_ok ? 4 : 8;
7249 booth_unit_size = 8;
7250 for (j = 0; i && j < 32; j += booth_unit_size)
7252 i >>= booth_unit_size;
7253 cost++;
7256 *total = COSTS_N_INSNS(cost);
7257 return false;
7260 if (mode == SImode)
7262 *total = COSTS_N_INSNS (4);
7263 return false;
7266 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7268 if (TARGET_HARD_FLOAT
7269 && (mode == SFmode
7270 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7272 *total = COSTS_N_INSNS (1);
7273 return false;
7277 /* Requires a lib call */
7278 *total = COSTS_N_INSNS (20);
7279 return false;
7281 default:
7282 return arm_rtx_costs_1 (x, outer_code, total, speed);
7287 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7288 so it can be ignored. */
7290 static bool
7291 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
7293 enum machine_mode mode = GET_MODE (x);
7295 if (TARGET_THUMB)
7297 *total = thumb1_rtx_costs (x, code, outer_code);
7298 return true;
7301 switch (code)
7303 case COMPARE:
7304 if (GET_CODE (XEXP (x, 0)) != MULT)
7305 return arm_rtx_costs_1 (x, outer_code, total, speed);
7307 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7308 will stall until the multiplication is complete. */
7309 *total = COSTS_N_INSNS (3);
7310 return false;
7312 case MULT:
7313 /* There is no point basing this on the tuning, since it is always the
7314 fast variant if it exists at all. */
7315 if (mode == DImode
7316 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7317 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7318 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7320 *total = COSTS_N_INSNS (2);
7321 return false;
7325 if (mode == DImode)
7327 *total = COSTS_N_INSNS (5);
7328 return false;
7331 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7333 /* If operand 1 is a constant we can more accurately
7334 calculate the cost of the multiply. The multiplier can
7335 retire 15 bits on the first cycle and a further 12 on the
7336 second. We do, of course, have to load the constant into
7337 a register first. */
7338 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7339 /* There's a general overhead of one cycle. */
7340 int cost = 1;
7341 unsigned HOST_WIDE_INT masked_const;
7343 if (i & 0x80000000)
7344 i = ~i;
7346 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7348 masked_const = i & 0xffff8000;
7349 if (masked_const != 0)
7351 cost++;
7352 masked_const = i & 0xf8000000;
7353 if (masked_const != 0)
7354 cost++;
7356 *total = COSTS_N_INSNS (cost);
7357 return false;
7360 if (mode == SImode)
7362 *total = COSTS_N_INSNS (3);
7363 return false;
7366 /* Requires a lib call */
7367 *total = COSTS_N_INSNS (20);
7368 return false;
7370 default:
7371 return arm_rtx_costs_1 (x, outer_code, total, speed);
7376 /* RTX costs for 9e (and later) cores. */
7378 static bool
7379 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7380 int *total, bool speed)
7382 enum machine_mode mode = GET_MODE (x);
7384 if (TARGET_THUMB1)
7386 switch (code)
7388 case MULT:
7389 *total = COSTS_N_INSNS (3);
7390 return true;
7392 default:
7393 *total = thumb1_rtx_costs (x, code, outer_code);
7394 return true;
7398 switch (code)
7400 case MULT:
7401 /* There is no point basing this on the tuning, since it is always the
7402 fast variant if it exists at all. */
7403 if (mode == DImode
7404 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7405 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7406 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7408 *total = COSTS_N_INSNS (2);
7409 return false;
7413 if (mode == DImode)
7415 *total = COSTS_N_INSNS (5);
7416 return false;
7419 if (mode == SImode)
7421 *total = COSTS_N_INSNS (2);
7422 return false;
7425 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7427 if (TARGET_HARD_FLOAT
7428 && (mode == SFmode
7429 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7431 *total = COSTS_N_INSNS (1);
7432 return false;
7436 *total = COSTS_N_INSNS (20);
7437 return false;
7439 default:
7440 return arm_rtx_costs_1 (x, outer_code, total, speed);
7443 /* All address computations that can be done are free, but rtx cost returns
7444 the same for practically all of them. So we weight the different types
7445 of address here in the order (most pref first):
7446 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7447 static inline int
7448 arm_arm_address_cost (rtx x)
7450 enum rtx_code c = GET_CODE (x);
7452 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7453 return 0;
7454 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7455 return 10;
7457 if (c == PLUS)
7459 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7460 return 2;
7462 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7463 return 3;
7465 return 4;
7468 return 6;
7471 static inline int
7472 arm_thumb_address_cost (rtx x)
7474 enum rtx_code c = GET_CODE (x);
7476 if (c == REG)
7477 return 1;
7478 if (c == PLUS
7479 && GET_CODE (XEXP (x, 0)) == REG
7480 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7481 return 1;
7483 return 2;
7486 static int
7487 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7489 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7492 static int
7493 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7495 rtx i_pat, d_pat;
7497 /* Some true dependencies can have a higher cost depending
7498 on precisely how certain input operands are used. */
7499 if (arm_tune_xscale
7500 && REG_NOTE_KIND (link) == 0
7501 && recog_memoized (insn) >= 0
7502 && recog_memoized (dep) >= 0)
7504 int shift_opnum = get_attr_shift (insn);
7505 enum attr_type attr_type = get_attr_type (dep);
7507 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7508 operand for INSN. If we have a shifted input operand and the
7509 instruction we depend on is another ALU instruction, then we may
7510 have to account for an additional stall. */
7511 if (shift_opnum != 0
7512 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7514 rtx shifted_operand;
7515 int opno;
7517 /* Get the shifted operand. */
7518 extract_insn (insn);
7519 shifted_operand = recog_data.operand[shift_opnum];
7521 /* Iterate over all the operands in DEP. If we write an operand
7522 that overlaps with SHIFTED_OPERAND, then we have increase the
7523 cost of this dependency. */
7524 extract_insn (dep);
7525 preprocess_constraints ();
7526 for (opno = 0; opno < recog_data.n_operands; opno++)
7528 /* We can ignore strict inputs. */
7529 if (recog_data.operand_type[opno] == OP_IN)
7530 continue;
7532 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7533 shifted_operand))
7534 return 2;
7539 /* XXX This is not strictly true for the FPA. */
7540 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7541 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7542 return 0;
7544 /* Call insns don't incur a stall, even if they follow a load. */
7545 if (REG_NOTE_KIND (link) == 0
7546 && GET_CODE (insn) == CALL_INSN)
7547 return 1;
7549 if ((i_pat = single_set (insn)) != NULL
7550 && GET_CODE (SET_SRC (i_pat)) == MEM
7551 && (d_pat = single_set (dep)) != NULL
7552 && GET_CODE (SET_DEST (d_pat)) == MEM)
7554 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7555 /* This is a load after a store, there is no conflict if the load reads
7556 from a cached area. Assume that loads from the stack, and from the
7557 constant pool are cached, and that others will miss. This is a
7558 hack. */
7560 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7561 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7562 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7563 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7564 return 1;
7567 return cost;
7570 static int fp_consts_inited = 0;
7572 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7573 static const char * const strings_fp[8] =
7575 "0", "1", "2", "3",
7576 "4", "5", "0.5", "10"
7579 static REAL_VALUE_TYPE values_fp[8];
7581 static void
7582 init_fp_table (void)
7584 int i;
7585 REAL_VALUE_TYPE r;
7587 if (TARGET_VFP)
7588 fp_consts_inited = 1;
7589 else
7590 fp_consts_inited = 8;
7592 for (i = 0; i < fp_consts_inited; i++)
7594 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7595 values_fp[i] = r;
7599 /* Return TRUE if rtx X is a valid immediate FP constant. */
7601 arm_const_double_rtx (rtx x)
7603 REAL_VALUE_TYPE r;
7604 int i;
7606 if (!fp_consts_inited)
7607 init_fp_table ();
7609 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7610 if (REAL_VALUE_MINUS_ZERO (r))
7611 return 0;
7613 for (i = 0; i < fp_consts_inited; i++)
7614 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7615 return 1;
7617 return 0;
7620 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7622 neg_const_double_rtx_ok_for_fpa (rtx x)
7624 REAL_VALUE_TYPE r;
7625 int i;
7627 if (!fp_consts_inited)
7628 init_fp_table ();
7630 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7631 r = REAL_VALUE_NEGATE (r);
7632 if (REAL_VALUE_MINUS_ZERO (r))
7633 return 0;
7635 for (i = 0; i < 8; i++)
7636 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7637 return 1;
7639 return 0;
7643 /* VFPv3 has a fairly wide range of representable immediates, formed from
7644 "quarter-precision" floating-point values. These can be evaluated using this
7645 formula (with ^ for exponentiation):
7647 -1^s * n * 2^-r
7649 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7650 16 <= n <= 31 and 0 <= r <= 7.
7652 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7654 - A (most-significant) is the sign bit.
7655 - BCD are the exponent (encoded as r XOR 3).
7656 - EFGH are the mantissa (encoded as n - 16).
7659 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7660 fconst[sd] instruction, or -1 if X isn't suitable. */
7661 static int
7662 vfp3_const_double_index (rtx x)
7664 REAL_VALUE_TYPE r, m;
7665 int sign, exponent;
7666 unsigned HOST_WIDE_INT mantissa, mant_hi;
7667 unsigned HOST_WIDE_INT mask;
7668 HOST_WIDE_INT m1, m2;
7669 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7671 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7672 return -1;
7674 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7676 /* We can't represent these things, so detect them first. */
7677 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7678 return -1;
7680 /* Extract sign, exponent and mantissa. */
7681 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7682 r = REAL_VALUE_ABS (r);
7683 exponent = REAL_EXP (&r);
7684 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7685 highest (sign) bit, with a fixed binary point at bit point_pos.
7686 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7687 bits for the mantissa, this may fail (low bits would be lost). */
7688 real_ldexp (&m, &r, point_pos - exponent);
7689 REAL_VALUE_TO_INT (&m1, &m2, m);
7690 mantissa = m1;
7691 mant_hi = m2;
7693 /* If there are bits set in the low part of the mantissa, we can't
7694 represent this value. */
7695 if (mantissa != 0)
7696 return -1;
7698 /* Now make it so that mantissa contains the most-significant bits, and move
7699 the point_pos to indicate that the least-significant bits have been
7700 discarded. */
7701 point_pos -= HOST_BITS_PER_WIDE_INT;
7702 mantissa = mant_hi;
7704 /* We can permit four significant bits of mantissa only, plus a high bit
7705 which is always 1. */
7706 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7707 if ((mantissa & mask) != 0)
7708 return -1;
7710 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7711 mantissa >>= point_pos - 5;
7713 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7714 floating-point immediate zero with Neon using an integer-zero load, but
7715 that case is handled elsewhere.) */
7716 if (mantissa == 0)
7717 return -1;
7719 gcc_assert (mantissa >= 16 && mantissa <= 31);
7721 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7722 normalized significands are in the range [1, 2). (Our mantissa is shifted
7723 left 4 places at this point relative to normalized IEEE754 values). GCC
7724 internally uses [0.5, 1) (see real.c), so the exponent returned from
7725 REAL_EXP must be altered. */
7726 exponent = 5 - exponent;
7728 if (exponent < 0 || exponent > 7)
7729 return -1;
7731 /* Sign, mantissa and exponent are now in the correct form to plug into the
7732 formula described in the comment above. */
7733 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7736 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7738 vfp3_const_double_rtx (rtx x)
7740 if (!TARGET_VFP3)
7741 return 0;
7743 return vfp3_const_double_index (x) != -1;
7746 /* Recognize immediates which can be used in various Neon instructions. Legal
7747 immediates are described by the following table (for VMVN variants, the
7748 bitwise inverse of the constant shown is recognized. In either case, VMOV
7749 is output and the correct instruction to use for a given constant is chosen
7750 by the assembler). The constant shown is replicated across all elements of
7751 the destination vector.
7753 insn elems variant constant (binary)
7754 ---- ----- ------- -----------------
7755 vmov i32 0 00000000 00000000 00000000 abcdefgh
7756 vmov i32 1 00000000 00000000 abcdefgh 00000000
7757 vmov i32 2 00000000 abcdefgh 00000000 00000000
7758 vmov i32 3 abcdefgh 00000000 00000000 00000000
7759 vmov i16 4 00000000 abcdefgh
7760 vmov i16 5 abcdefgh 00000000
7761 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7762 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7763 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7764 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7765 vmvn i16 10 00000000 abcdefgh
7766 vmvn i16 11 abcdefgh 00000000
7767 vmov i32 12 00000000 00000000 abcdefgh 11111111
7768 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7769 vmov i32 14 00000000 abcdefgh 11111111 11111111
7770 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7771 vmov i8 16 abcdefgh
7772 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7773 eeeeeeee ffffffff gggggggg hhhhhhhh
7774 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7776 For case 18, B = !b. Representable values are exactly those accepted by
7777 vfp3_const_double_index, but are output as floating-point numbers rather
7778 than indices.
7780 Variants 0-5 (inclusive) may also be used as immediates for the second
7781 operand of VORR/VBIC instructions.
7783 The INVERSE argument causes the bitwise inverse of the given operand to be
7784 recognized instead (used for recognizing legal immediates for the VAND/VORN
7785 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7786 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7787 output, rather than the real insns vbic/vorr).
7789 INVERSE makes no difference to the recognition of float vectors.
7791 The return value is the variant of immediate as shown in the above table, or
7792 -1 if the given value doesn't match any of the listed patterns.
7794 static int
7795 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7796 rtx *modconst, int *elementwidth)
7798 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7799 matches = 1; \
7800 for (i = 0; i < idx; i += (STRIDE)) \
7801 if (!(TEST)) \
7802 matches = 0; \
7803 if (matches) \
7805 immtype = (CLASS); \
7806 elsize = (ELSIZE); \
7807 break; \
7810 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7811 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7812 unsigned char bytes[16];
7813 int immtype = -1, matches;
7814 unsigned int invmask = inverse ? 0xff : 0;
7816 /* Vectors of float constants. */
7817 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7819 rtx el0 = CONST_VECTOR_ELT (op, 0);
7820 REAL_VALUE_TYPE r0;
7822 if (!vfp3_const_double_rtx (el0))
7823 return -1;
7825 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7827 for (i = 1; i < n_elts; i++)
7829 rtx elt = CONST_VECTOR_ELT (op, i);
7830 REAL_VALUE_TYPE re;
7832 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
7834 if (!REAL_VALUES_EQUAL (r0, re))
7835 return -1;
7838 if (modconst)
7839 *modconst = CONST_VECTOR_ELT (op, 0);
7841 if (elementwidth)
7842 *elementwidth = 0;
7844 return 18;
7847 /* Splat vector constant out into a byte vector. */
7848 for (i = 0; i < n_elts; i++)
7850 rtx el = CONST_VECTOR_ELT (op, i);
7851 unsigned HOST_WIDE_INT elpart;
7852 unsigned int part, parts;
7854 if (GET_CODE (el) == CONST_INT)
7856 elpart = INTVAL (el);
7857 parts = 1;
7859 else if (GET_CODE (el) == CONST_DOUBLE)
7861 elpart = CONST_DOUBLE_LOW (el);
7862 parts = 2;
7864 else
7865 gcc_unreachable ();
7867 for (part = 0; part < parts; part++)
7869 unsigned int byte;
7870 for (byte = 0; byte < innersize; byte++)
7872 bytes[idx++] = (elpart & 0xff) ^ invmask;
7873 elpart >>= BITS_PER_UNIT;
7875 if (GET_CODE (el) == CONST_DOUBLE)
7876 elpart = CONST_DOUBLE_HIGH (el);
7880 /* Sanity check. */
7881 gcc_assert (idx == GET_MODE_SIZE (mode));
7885 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7886 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7888 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7889 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7891 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7892 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7894 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7895 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
7897 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
7899 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
7901 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7902 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7904 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7905 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7907 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7908 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7910 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7911 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
7913 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
7915 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
7917 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7918 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7920 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7921 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7923 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7924 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7926 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7927 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7929 CHECK (1, 8, 16, bytes[i] == bytes[0]);
7931 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7932 && bytes[i] == bytes[(i + 8) % idx]);
7934 while (0);
7936 if (immtype == -1)
7937 return -1;
7939 if (elementwidth)
7940 *elementwidth = elsize;
7942 if (modconst)
7944 unsigned HOST_WIDE_INT imm = 0;
7946 /* Un-invert bytes of recognized vector, if necessary. */
7947 if (invmask != 0)
7948 for (i = 0; i < idx; i++)
7949 bytes[i] ^= invmask;
7951 if (immtype == 17)
7953 /* FIXME: Broken on 32-bit H_W_I hosts. */
7954 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7956 for (i = 0; i < 8; i++)
7957 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7958 << (i * BITS_PER_UNIT);
7960 *modconst = GEN_INT (imm);
7962 else
7964 unsigned HOST_WIDE_INT imm = 0;
7966 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7967 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7969 *modconst = GEN_INT (imm);
7973 return immtype;
7974 #undef CHECK
7977 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
7978 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
7979 float elements), and a modified constant (whatever should be output for a
7980 VMOV) in *MODCONST. */
7983 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
7984 rtx *modconst, int *elementwidth)
7986 rtx tmpconst;
7987 int tmpwidth;
7988 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
7990 if (retval == -1)
7991 return 0;
7993 if (modconst)
7994 *modconst = tmpconst;
7996 if (elementwidth)
7997 *elementwidth = tmpwidth;
7999 return 1;
8002 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8003 the immediate is valid, write a constant suitable for using as an operand
8004 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8005 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8008 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8009 rtx *modconst, int *elementwidth)
8011 rtx tmpconst;
8012 int tmpwidth;
8013 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8015 if (retval < 0 || retval > 5)
8016 return 0;
8018 if (modconst)
8019 *modconst = tmpconst;
8021 if (elementwidth)
8022 *elementwidth = tmpwidth;
8024 return 1;
8027 /* Return a string suitable for output of Neon immediate logic operation
8028 MNEM. */
8030 char *
8031 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8032 int inverse, int quad)
8034 int width, is_valid;
8035 static char templ[40];
8037 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8039 gcc_assert (is_valid != 0);
8041 if (quad)
8042 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8043 else
8044 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8046 return templ;
8049 /* Output a sequence of pairwise operations to implement a reduction.
8050 NOTE: We do "too much work" here, because pairwise operations work on two
8051 registers-worth of operands in one go. Unfortunately we can't exploit those
8052 extra calculations to do the full operation in fewer steps, I don't think.
8053 Although all vector elements of the result but the first are ignored, we
8054 actually calculate the same result in each of the elements. An alternative
8055 such as initially loading a vector with zero to use as each of the second
8056 operands would use up an additional register and take an extra instruction,
8057 for no particular gain. */
8059 void
8060 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8061 rtx (*reduc) (rtx, rtx, rtx))
8063 enum machine_mode inner = GET_MODE_INNER (mode);
8064 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8065 rtx tmpsum = op1;
8067 for (i = parts / 2; i >= 1; i /= 2)
8069 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8070 emit_insn (reduc (dest, tmpsum, tmpsum));
8071 tmpsum = dest;
8075 /* If VALS is a vector constant that can be loaded into a register
8076 using VDUP, generate instructions to do so and return an RTX to
8077 assign to the register. Otherwise return NULL_RTX. */
8079 static rtx
8080 neon_vdup_constant (rtx vals)
8082 enum machine_mode mode = GET_MODE (vals);
8083 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8084 int n_elts = GET_MODE_NUNITS (mode);
8085 bool all_same = true;
8086 rtx x;
8087 int i;
8089 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8090 return NULL_RTX;
8092 for (i = 0; i < n_elts; ++i)
8094 x = XVECEXP (vals, 0, i);
8095 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8096 all_same = false;
8099 if (!all_same)
8100 /* The elements are not all the same. We could handle repeating
8101 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8102 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8103 vdup.i16). */
8104 return NULL_RTX;
8106 /* We can load this constant by using VDUP and a constant in a
8107 single ARM register. This will be cheaper than a vector
8108 load. */
8110 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8111 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8112 UNSPEC_VDUP_N);
8115 /* Generate code to load VALS, which is a PARALLEL containing only
8116 constants (for vec_init) or CONST_VECTOR, efficiently into a
8117 register. Returns an RTX to copy into the register, or NULL_RTX
8118 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8121 neon_make_constant (rtx vals)
8123 enum machine_mode mode = GET_MODE (vals);
8124 rtx target;
8125 rtx const_vec = NULL_RTX;
8126 int n_elts = GET_MODE_NUNITS (mode);
8127 int n_const = 0;
8128 int i;
8130 if (GET_CODE (vals) == CONST_VECTOR)
8131 const_vec = vals;
8132 else if (GET_CODE (vals) == PARALLEL)
8134 /* A CONST_VECTOR must contain only CONST_INTs and
8135 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8136 Only store valid constants in a CONST_VECTOR. */
8137 for (i = 0; i < n_elts; ++i)
8139 rtx x = XVECEXP (vals, 0, i);
8140 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8141 n_const++;
8143 if (n_const == n_elts)
8144 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8146 else
8147 gcc_unreachable ();
8149 if (const_vec != NULL
8150 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8151 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8152 return const_vec;
8153 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8154 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8155 pipeline cycle; creating the constant takes one or two ARM
8156 pipeline cycles. */
8157 return target;
8158 else if (const_vec != NULL_RTX)
8159 /* Load from constant pool. On Cortex-A8 this takes two cycles
8160 (for either double or quad vectors). We can not take advantage
8161 of single-cycle VLD1 because we need a PC-relative addressing
8162 mode. */
8163 return const_vec;
8164 else
8165 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8166 We can not construct an initializer. */
8167 return NULL_RTX;
8170 /* Initialize vector TARGET to VALS. */
8172 void
8173 neon_expand_vector_init (rtx target, rtx vals)
8175 enum machine_mode mode = GET_MODE (target);
8176 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8177 int n_elts = GET_MODE_NUNITS (mode);
8178 int n_var = 0, one_var = -1;
8179 bool all_same = true;
8180 rtx x, mem;
8181 int i;
8183 for (i = 0; i < n_elts; ++i)
8185 x = XVECEXP (vals, 0, i);
8186 if (!CONSTANT_P (x))
8187 ++n_var, one_var = i;
8189 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8190 all_same = false;
8193 if (n_var == 0)
8195 rtx constant = neon_make_constant (vals);
8196 if (constant != NULL_RTX)
8198 emit_move_insn (target, constant);
8199 return;
8203 /* Splat a single non-constant element if we can. */
8204 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8206 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8207 emit_insn (gen_rtx_SET (VOIDmode, target,
8208 gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8209 UNSPEC_VDUP_N)));
8210 return;
8213 /* One field is non-constant. Load constant then overwrite varying
8214 field. This is more efficient than using the stack. */
8215 if (n_var == 1)
8217 rtx copy = copy_rtx (vals);
8218 rtvec ops;
8220 /* Load constant part of vector, substitute neighboring value for
8221 varying element. */
8222 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8223 neon_expand_vector_init (target, copy);
8225 /* Insert variable. */
8226 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8227 ops = gen_rtvec (3, x, target, GEN_INT (one_var));
8228 emit_insn (gen_rtx_SET (VOIDmode, target,
8229 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
8230 return;
8233 /* Construct the vector in memory one field at a time
8234 and load the whole vector. */
8235 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8236 for (i = 0; i < n_elts; i++)
8237 emit_move_insn (adjust_address_nv (mem, inner_mode,
8238 i * GET_MODE_SIZE (inner_mode)),
8239 XVECEXP (vals, 0, i));
8240 emit_move_insn (target, mem);
8243 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8244 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8245 reported source locations are bogus. */
8247 static void
8248 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8249 const char *err)
8251 HOST_WIDE_INT lane;
8253 gcc_assert (GET_CODE (operand) == CONST_INT);
8255 lane = INTVAL (operand);
8257 if (lane < low || lane >= high)
8258 error (err);
8261 /* Bounds-check lanes. */
8263 void
8264 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8266 bounds_check (operand, low, high, "lane out of range");
8269 /* Bounds-check constants. */
8271 void
8272 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8274 bounds_check (operand, low, high, "constant out of range");
8277 HOST_WIDE_INT
8278 neon_element_bits (enum machine_mode mode)
8280 if (mode == DImode)
8281 return GET_MODE_BITSIZE (mode);
8282 else
8283 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8287 /* Predicates for `match_operand' and `match_operator'. */
8289 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8291 cirrus_memory_offset (rtx op)
8293 /* Reject eliminable registers. */
8294 if (! (reload_in_progress || reload_completed)
8295 && ( reg_mentioned_p (frame_pointer_rtx, op)
8296 || reg_mentioned_p (arg_pointer_rtx, op)
8297 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8298 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8299 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8300 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8301 return 0;
8303 if (GET_CODE (op) == MEM)
8305 rtx ind;
8307 ind = XEXP (op, 0);
8309 /* Match: (mem (reg)). */
8310 if (GET_CODE (ind) == REG)
8311 return 1;
8313 /* Match:
8314 (mem (plus (reg)
8315 (const))). */
8316 if (GET_CODE (ind) == PLUS
8317 && GET_CODE (XEXP (ind, 0)) == REG
8318 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8319 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8320 return 1;
8323 return 0;
8326 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8327 WB is true if full writeback address modes are allowed and is false
8328 if limited writeback address modes (POST_INC and PRE_DEC) are
8329 allowed. */
8332 arm_coproc_mem_operand (rtx op, bool wb)
8334 rtx ind;
8336 /* Reject eliminable registers. */
8337 if (! (reload_in_progress || reload_completed)
8338 && ( reg_mentioned_p (frame_pointer_rtx, op)
8339 || reg_mentioned_p (arg_pointer_rtx, op)
8340 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8341 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8342 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8343 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8344 return FALSE;
8346 /* Constants are converted into offsets from labels. */
8347 if (GET_CODE (op) != MEM)
8348 return FALSE;
8350 ind = XEXP (op, 0);
8352 if (reload_completed
8353 && (GET_CODE (ind) == LABEL_REF
8354 || (GET_CODE (ind) == CONST
8355 && GET_CODE (XEXP (ind, 0)) == PLUS
8356 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8357 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8358 return TRUE;
8360 /* Match: (mem (reg)). */
8361 if (GET_CODE (ind) == REG)
8362 return arm_address_register_rtx_p (ind, 0);
8364 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8365 acceptable in any case (subject to verification by
8366 arm_address_register_rtx_p). We need WB to be true to accept
8367 PRE_INC and POST_DEC. */
8368 if (GET_CODE (ind) == POST_INC
8369 || GET_CODE (ind) == PRE_DEC
8370 || (wb
8371 && (GET_CODE (ind) == PRE_INC
8372 || GET_CODE (ind) == POST_DEC)))
8373 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8375 if (wb
8376 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8377 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8378 && GET_CODE (XEXP (ind, 1)) == PLUS
8379 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8380 ind = XEXP (ind, 1);
8382 /* Match:
8383 (plus (reg)
8384 (const)). */
8385 if (GET_CODE (ind) == PLUS
8386 && GET_CODE (XEXP (ind, 0)) == REG
8387 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8388 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8389 && INTVAL (XEXP (ind, 1)) > -1024
8390 && INTVAL (XEXP (ind, 1)) < 1024
8391 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8392 return TRUE;
8394 return FALSE;
8397 /* Return TRUE if OP is a memory operand which we can load or store a vector
8398 to/from. TYPE is one of the following values:
8399 0 - Vector load/stor (vldr)
8400 1 - Core registers (ldm)
8401 2 - Element/structure loads (vld1)
8404 neon_vector_mem_operand (rtx op, int type)
8406 rtx ind;
8408 /* Reject eliminable registers. */
8409 if (! (reload_in_progress || reload_completed)
8410 && ( reg_mentioned_p (frame_pointer_rtx, op)
8411 || reg_mentioned_p (arg_pointer_rtx, op)
8412 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8413 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8414 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8415 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8416 return FALSE;
8418 /* Constants are converted into offsets from labels. */
8419 if (GET_CODE (op) != MEM)
8420 return FALSE;
8422 ind = XEXP (op, 0);
8424 if (reload_completed
8425 && (GET_CODE (ind) == LABEL_REF
8426 || (GET_CODE (ind) == CONST
8427 && GET_CODE (XEXP (ind, 0)) == PLUS
8428 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8429 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8430 return TRUE;
8432 /* Match: (mem (reg)). */
8433 if (GET_CODE (ind) == REG)
8434 return arm_address_register_rtx_p (ind, 0);
8436 /* Allow post-increment with Neon registers. */
8437 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8438 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8440 /* FIXME: vld1 allows register post-modify. */
8442 /* Match:
8443 (plus (reg)
8444 (const)). */
8445 if (type == 0
8446 && GET_CODE (ind) == PLUS
8447 && GET_CODE (XEXP (ind, 0)) == REG
8448 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8449 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8450 && INTVAL (XEXP (ind, 1)) > -1024
8451 && INTVAL (XEXP (ind, 1)) < 1016
8452 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8453 return TRUE;
8455 return FALSE;
8458 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8459 type. */
8461 neon_struct_mem_operand (rtx op)
8463 rtx ind;
8465 /* Reject eliminable registers. */
8466 if (! (reload_in_progress || reload_completed)
8467 && ( reg_mentioned_p (frame_pointer_rtx, op)
8468 || reg_mentioned_p (arg_pointer_rtx, op)
8469 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8470 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8471 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8472 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8473 return FALSE;
8475 /* Constants are converted into offsets from labels. */
8476 if (GET_CODE (op) != MEM)
8477 return FALSE;
8479 ind = XEXP (op, 0);
8481 if (reload_completed
8482 && (GET_CODE (ind) == LABEL_REF
8483 || (GET_CODE (ind) == CONST
8484 && GET_CODE (XEXP (ind, 0)) == PLUS
8485 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8486 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8487 return TRUE;
8489 /* Match: (mem (reg)). */
8490 if (GET_CODE (ind) == REG)
8491 return arm_address_register_rtx_p (ind, 0);
8493 return FALSE;
8496 /* Return true if X is a register that will be eliminated later on. */
8498 arm_eliminable_register (rtx x)
8500 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8501 || REGNO (x) == ARG_POINTER_REGNUM
8502 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8503 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8506 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8507 coprocessor registers. Otherwise return NO_REGS. */
8509 enum reg_class
8510 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8512 if (mode == HFmode)
8514 if (!TARGET_NEON_FP16)
8515 return GENERAL_REGS;
8516 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8517 return NO_REGS;
8518 return GENERAL_REGS;
8521 if (TARGET_NEON
8522 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8523 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8524 && neon_vector_mem_operand (x, 0))
8525 return NO_REGS;
8527 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8528 return NO_REGS;
8530 return GENERAL_REGS;
8533 /* Values which must be returned in the most-significant end of the return
8534 register. */
8536 static bool
8537 arm_return_in_msb (const_tree valtype)
8539 return (TARGET_AAPCS_BASED
8540 && BYTES_BIG_ENDIAN
8541 && (AGGREGATE_TYPE_P (valtype)
8542 || TREE_CODE (valtype) == COMPLEX_TYPE));
8545 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8546 Use by the Cirrus Maverick code which has to workaround
8547 a hardware bug triggered by such instructions. */
8548 static bool
8549 arm_memory_load_p (rtx insn)
8551 rtx body, lhs, rhs;;
8553 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8554 return false;
8556 body = PATTERN (insn);
8558 if (GET_CODE (body) != SET)
8559 return false;
8561 lhs = XEXP (body, 0);
8562 rhs = XEXP (body, 1);
8564 lhs = REG_OR_SUBREG_RTX (lhs);
8566 /* If the destination is not a general purpose
8567 register we do not have to worry. */
8568 if (GET_CODE (lhs) != REG
8569 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8570 return false;
8572 /* As well as loads from memory we also have to react
8573 to loads of invalid constants which will be turned
8574 into loads from the minipool. */
8575 return (GET_CODE (rhs) == MEM
8576 || GET_CODE (rhs) == SYMBOL_REF
8577 || note_invalid_constants (insn, -1, false));
8580 /* Return TRUE if INSN is a Cirrus instruction. */
8581 static bool
8582 arm_cirrus_insn_p (rtx insn)
8584 enum attr_cirrus attr;
8586 /* get_attr cannot accept USE or CLOBBER. */
8587 if (!insn
8588 || GET_CODE (insn) != INSN
8589 || GET_CODE (PATTERN (insn)) == USE
8590 || GET_CODE (PATTERN (insn)) == CLOBBER)
8591 return 0;
8593 attr = get_attr_cirrus (insn);
8595 return attr != CIRRUS_NOT;
8598 /* Cirrus reorg for invalid instruction combinations. */
8599 static void
8600 cirrus_reorg (rtx first)
8602 enum attr_cirrus attr;
8603 rtx body = PATTERN (first);
8604 rtx t;
8605 int nops;
8607 /* Any branch must be followed by 2 non Cirrus instructions. */
8608 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8610 nops = 0;
8611 t = next_nonnote_insn (first);
8613 if (arm_cirrus_insn_p (t))
8614 ++ nops;
8616 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8617 ++ nops;
8619 while (nops --)
8620 emit_insn_after (gen_nop (), first);
8622 return;
8625 /* (float (blah)) is in parallel with a clobber. */
8626 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8627 body = XVECEXP (body, 0, 0);
8629 if (GET_CODE (body) == SET)
8631 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8633 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8634 be followed by a non Cirrus insn. */
8635 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8637 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8638 emit_insn_after (gen_nop (), first);
8640 return;
8642 else if (arm_memory_load_p (first))
8644 unsigned int arm_regno;
8646 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8647 ldr/cfmv64hr combination where the Rd field is the same
8648 in both instructions must be split with a non Cirrus
8649 insn. Example:
8651 ldr r0, blah
8653 cfmvsr mvf0, r0. */
8655 /* Get Arm register number for ldr insn. */
8656 if (GET_CODE (lhs) == REG)
8657 arm_regno = REGNO (lhs);
8658 else
8660 gcc_assert (GET_CODE (rhs) == REG);
8661 arm_regno = REGNO (rhs);
8664 /* Next insn. */
8665 first = next_nonnote_insn (first);
8667 if (! arm_cirrus_insn_p (first))
8668 return;
8670 body = PATTERN (first);
8672 /* (float (blah)) is in parallel with a clobber. */
8673 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8674 body = XVECEXP (body, 0, 0);
8676 if (GET_CODE (body) == FLOAT)
8677 body = XEXP (body, 0);
8679 if (get_attr_cirrus (first) == CIRRUS_MOVE
8680 && GET_CODE (XEXP (body, 1)) == REG
8681 && arm_regno == REGNO (XEXP (body, 1)))
8682 emit_insn_after (gen_nop (), first);
8684 return;
8688 /* get_attr cannot accept USE or CLOBBER. */
8689 if (!first
8690 || GET_CODE (first) != INSN
8691 || GET_CODE (PATTERN (first)) == USE
8692 || GET_CODE (PATTERN (first)) == CLOBBER)
8693 return;
8695 attr = get_attr_cirrus (first);
8697 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8698 must be followed by a non-coprocessor instruction. */
8699 if (attr == CIRRUS_COMPARE)
8701 nops = 0;
8703 t = next_nonnote_insn (first);
8705 if (arm_cirrus_insn_p (t))
8706 ++ nops;
8708 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8709 ++ nops;
8711 while (nops --)
8712 emit_insn_after (gen_nop (), first);
8714 return;
8718 /* Return TRUE if X references a SYMBOL_REF. */
8720 symbol_mentioned_p (rtx x)
8722 const char * fmt;
8723 int i;
8725 if (GET_CODE (x) == SYMBOL_REF)
8726 return 1;
8728 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8729 are constant offsets, not symbols. */
8730 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8731 return 0;
8733 fmt = GET_RTX_FORMAT (GET_CODE (x));
8735 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8737 if (fmt[i] == 'E')
8739 int j;
8741 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8742 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8743 return 1;
8745 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8746 return 1;
8749 return 0;
8752 /* Return TRUE if X references a LABEL_REF. */
8754 label_mentioned_p (rtx x)
8756 const char * fmt;
8757 int i;
8759 if (GET_CODE (x) == LABEL_REF)
8760 return 1;
8762 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8763 instruction, but they are constant offsets, not symbols. */
8764 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8765 return 0;
8767 fmt = GET_RTX_FORMAT (GET_CODE (x));
8768 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8770 if (fmt[i] == 'E')
8772 int j;
8774 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8775 if (label_mentioned_p (XVECEXP (x, i, j)))
8776 return 1;
8778 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8779 return 1;
8782 return 0;
8786 tls_mentioned_p (rtx x)
8788 switch (GET_CODE (x))
8790 case CONST:
8791 return tls_mentioned_p (XEXP (x, 0));
8793 case UNSPEC:
8794 if (XINT (x, 1) == UNSPEC_TLS)
8795 return 1;
8797 default:
8798 return 0;
8802 /* Must not copy a SET whose source operand is PC-relative. */
8804 static bool
8805 arm_cannot_copy_insn_p (rtx insn)
8807 rtx pat = PATTERN (insn);
8809 if (GET_CODE (pat) == SET)
8811 rtx rhs = SET_SRC (pat);
8813 if (GET_CODE (rhs) == UNSPEC
8814 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
8815 return TRUE;
8817 if (GET_CODE (rhs) == MEM
8818 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
8819 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
8820 return TRUE;
8823 return FALSE;
8826 enum rtx_code
8827 minmax_code (rtx x)
8829 enum rtx_code code = GET_CODE (x);
8831 switch (code)
8833 case SMAX:
8834 return GE;
8835 case SMIN:
8836 return LE;
8837 case UMIN:
8838 return LEU;
8839 case UMAX:
8840 return GEU;
8841 default:
8842 gcc_unreachable ();
8846 /* Return 1 if memory locations are adjacent. */
8848 adjacent_mem_locations (rtx a, rtx b)
8850 /* We don't guarantee to preserve the order of these memory refs. */
8851 if (volatile_refs_p (a) || volatile_refs_p (b))
8852 return 0;
8854 if ((GET_CODE (XEXP (a, 0)) == REG
8855 || (GET_CODE (XEXP (a, 0)) == PLUS
8856 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
8857 && (GET_CODE (XEXP (b, 0)) == REG
8858 || (GET_CODE (XEXP (b, 0)) == PLUS
8859 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
8861 HOST_WIDE_INT val0 = 0, val1 = 0;
8862 rtx reg0, reg1;
8863 int val_diff;
8865 if (GET_CODE (XEXP (a, 0)) == PLUS)
8867 reg0 = XEXP (XEXP (a, 0), 0);
8868 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
8870 else
8871 reg0 = XEXP (a, 0);
8873 if (GET_CODE (XEXP (b, 0)) == PLUS)
8875 reg1 = XEXP (XEXP (b, 0), 0);
8876 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
8878 else
8879 reg1 = XEXP (b, 0);
8881 /* Don't accept any offset that will require multiple
8882 instructions to handle, since this would cause the
8883 arith_adjacentmem pattern to output an overlong sequence. */
8884 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
8885 return 0;
8887 /* Don't allow an eliminable register: register elimination can make
8888 the offset too large. */
8889 if (arm_eliminable_register (reg0))
8890 return 0;
8892 val_diff = val1 - val0;
8894 if (arm_ld_sched)
8896 /* If the target has load delay slots, then there's no benefit
8897 to using an ldm instruction unless the offset is zero and
8898 we are optimizing for size. */
8899 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
8900 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
8901 && (val_diff == 4 || val_diff == -4));
8904 return ((REGNO (reg0) == REGNO (reg1))
8905 && (val_diff == 4 || val_diff == -4));
8908 return 0;
8912 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8913 HOST_WIDE_INT *load_offset)
8915 int unsorted_regs[4];
8916 HOST_WIDE_INT unsorted_offsets[4];
8917 int order[4];
8918 int base_reg = -1;
8919 int i;
8921 /* Can only handle 2, 3, or 4 insns at present,
8922 though could be easily extended if required. */
8923 gcc_assert (nops >= 2 && nops <= 4);
8925 memset (order, 0, 4 * sizeof (int));
8927 /* Loop over the operands and check that the memory references are
8928 suitable (i.e. immediate offsets from the same base register). At
8929 the same time, extract the target register, and the memory
8930 offsets. */
8931 for (i = 0; i < nops; i++)
8933 rtx reg;
8934 rtx offset;
8936 /* Convert a subreg of a mem into the mem itself. */
8937 if (GET_CODE (operands[nops + i]) == SUBREG)
8938 operands[nops + i] = alter_subreg (operands + (nops + i));
8940 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8942 /* Don't reorder volatile memory references; it doesn't seem worth
8943 looking for the case where the order is ok anyway. */
8944 if (MEM_VOLATILE_P (operands[nops + i]))
8945 return 0;
8947 offset = const0_rtx;
8949 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8950 || (GET_CODE (reg) == SUBREG
8951 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8952 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8953 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8954 == REG)
8955 || (GET_CODE (reg) == SUBREG
8956 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8957 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8958 == CONST_INT)))
8960 if (i == 0)
8962 base_reg = REGNO (reg);
8963 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8964 ? REGNO (operands[i])
8965 : REGNO (SUBREG_REG (operands[i])));
8966 order[0] = 0;
8968 else
8970 if (base_reg != (int) REGNO (reg))
8971 /* Not addressed from the same base register. */
8972 return 0;
8974 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
8975 ? REGNO (operands[i])
8976 : REGNO (SUBREG_REG (operands[i])));
8977 if (unsorted_regs[i] < unsorted_regs[order[0]])
8978 order[0] = i;
8981 /* If it isn't an integer register, or if it overwrites the
8982 base register but isn't the last insn in the list, then
8983 we can't do this. */
8984 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
8985 || (i != nops - 1 && unsorted_regs[i] == base_reg))
8986 return 0;
8988 unsorted_offsets[i] = INTVAL (offset);
8990 else
8991 /* Not a suitable memory address. */
8992 return 0;
8995 /* All the useful information has now been extracted from the
8996 operands into unsorted_regs and unsorted_offsets; additionally,
8997 order[0] has been set to the lowest numbered register in the
8998 list. Sort the registers into order, and check that the memory
8999 offsets are ascending and adjacent. */
9001 for (i = 1; i < nops; i++)
9003 int j;
9005 order[i] = order[i - 1];
9006 for (j = 0; j < nops; j++)
9007 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9008 && (order[i] == order[i - 1]
9009 || unsorted_regs[j] < unsorted_regs[order[i]]))
9010 order[i] = j;
9012 /* Have we found a suitable register? if not, one must be used more
9013 than once. */
9014 if (order[i] == order[i - 1])
9015 return 0;
9017 /* Is the memory address adjacent and ascending? */
9018 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9019 return 0;
9022 if (base)
9024 *base = base_reg;
9026 for (i = 0; i < nops; i++)
9027 regs[i] = unsorted_regs[order[i]];
9029 *load_offset = unsorted_offsets[order[0]];
9032 if (unsorted_offsets[order[0]] == 0)
9033 return 1; /* ldmia */
9035 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9036 return 2; /* ldmib */
9038 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9039 return 3; /* ldmda */
9041 if (unsorted_offsets[order[nops - 1]] == -4)
9042 return 4; /* ldmdb */
9044 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9045 if the offset isn't small enough. The reason 2 ldrs are faster
9046 is because these ARMs are able to do more than one cache access
9047 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9048 whilst the ARM8 has a double bandwidth cache. This means that
9049 these cores can do both an instruction fetch and a data fetch in
9050 a single cycle, so the trick of calculating the address into a
9051 scratch register (one of the result regs) and then doing a load
9052 multiple actually becomes slower (and no smaller in code size).
9053 That is the transformation
9055 ldr rd1, [rbase + offset]
9056 ldr rd2, [rbase + offset + 4]
9060 add rd1, rbase, offset
9061 ldmia rd1, {rd1, rd2}
9063 produces worse code -- '3 cycles + any stalls on rd2' instead of
9064 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9065 access per cycle, the first sequence could never complete in less
9066 than 6 cycles, whereas the ldm sequence would only take 5 and
9067 would make better use of sequential accesses if not hitting the
9068 cache.
9070 We cheat here and test 'arm_ld_sched' which we currently know to
9071 only be true for the ARM8, ARM9 and StrongARM. If this ever
9072 changes, then the test below needs to be reworked. */
9073 if (nops == 2 && arm_ld_sched)
9074 return 0;
9076 /* Can't do it without setting up the offset, only do this if it takes
9077 no more than one insn. */
9078 return (const_ok_for_arm (unsorted_offsets[order[0]])
9079 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
9082 const char *
9083 emit_ldm_seq (rtx *operands, int nops)
9085 int regs[4];
9086 int base_reg;
9087 HOST_WIDE_INT offset;
9088 char buf[100];
9089 int i;
9091 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9093 case 1:
9094 strcpy (buf, "ldm%(ia%)\t");
9095 break;
9097 case 2:
9098 strcpy (buf, "ldm%(ib%)\t");
9099 break;
9101 case 3:
9102 strcpy (buf, "ldm%(da%)\t");
9103 break;
9105 case 4:
9106 strcpy (buf, "ldm%(db%)\t");
9107 break;
9109 case 5:
9110 if (offset >= 0)
9111 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9112 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9113 (long) offset);
9114 else
9115 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9116 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9117 (long) -offset);
9118 output_asm_insn (buf, operands);
9119 base_reg = regs[0];
9120 strcpy (buf, "ldm%(ia%)\t");
9121 break;
9123 default:
9124 gcc_unreachable ();
9127 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9128 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9130 for (i = 1; i < nops; i++)
9131 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9132 reg_names[regs[i]]);
9134 strcat (buf, "}\t%@ phole ldm");
9136 output_asm_insn (buf, operands);
9137 return "";
9141 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9142 HOST_WIDE_INT * load_offset)
9144 int unsorted_regs[4];
9145 HOST_WIDE_INT unsorted_offsets[4];
9146 int order[4];
9147 int base_reg = -1;
9148 int i;
9150 /* Can only handle 2, 3, or 4 insns at present, though could be easily
9151 extended if required. */
9152 gcc_assert (nops >= 2 && nops <= 4);
9154 memset (order, 0, 4 * sizeof (int));
9156 /* Loop over the operands and check that the memory references are
9157 suitable (i.e. immediate offsets from the same base register). At
9158 the same time, extract the target register, and the memory
9159 offsets. */
9160 for (i = 0; i < nops; i++)
9162 rtx reg;
9163 rtx offset;
9165 /* Convert a subreg of a mem into the mem itself. */
9166 if (GET_CODE (operands[nops + i]) == SUBREG)
9167 operands[nops + i] = alter_subreg (operands + (nops + i));
9169 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9171 /* Don't reorder volatile memory references; it doesn't seem worth
9172 looking for the case where the order is ok anyway. */
9173 if (MEM_VOLATILE_P (operands[nops + i]))
9174 return 0;
9176 offset = const0_rtx;
9178 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9179 || (GET_CODE (reg) == SUBREG
9180 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9181 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9182 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9183 == REG)
9184 || (GET_CODE (reg) == SUBREG
9185 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9186 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9187 == CONST_INT)))
9189 if (i == 0)
9191 base_reg = REGNO (reg);
9192 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
9193 ? REGNO (operands[i])
9194 : REGNO (SUBREG_REG (operands[i])));
9195 order[0] = 0;
9197 else
9199 if (base_reg != (int) REGNO (reg))
9200 /* Not addressed from the same base register. */
9201 return 0;
9203 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9204 ? REGNO (operands[i])
9205 : REGNO (SUBREG_REG (operands[i])));
9206 if (unsorted_regs[i] < unsorted_regs[order[0]])
9207 order[0] = i;
9210 /* If it isn't an integer register, then we can't do this. */
9211 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9212 return 0;
9214 unsorted_offsets[i] = INTVAL (offset);
9216 else
9217 /* Not a suitable memory address. */
9218 return 0;
9221 /* All the useful information has now been extracted from the
9222 operands into unsorted_regs and unsorted_offsets; additionally,
9223 order[0] has been set to the lowest numbered register in the
9224 list. Sort the registers into order, and check that the memory
9225 offsets are ascending and adjacent. */
9227 for (i = 1; i < nops; i++)
9229 int j;
9231 order[i] = order[i - 1];
9232 for (j = 0; j < nops; j++)
9233 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9234 && (order[i] == order[i - 1]
9235 || unsorted_regs[j] < unsorted_regs[order[i]]))
9236 order[i] = j;
9238 /* Have we found a suitable register? if not, one must be used more
9239 than once. */
9240 if (order[i] == order[i - 1])
9241 return 0;
9243 /* Is the memory address adjacent and ascending? */
9244 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9245 return 0;
9248 if (base)
9250 *base = base_reg;
9252 for (i = 0; i < nops; i++)
9253 regs[i] = unsorted_regs[order[i]];
9255 *load_offset = unsorted_offsets[order[0]];
9258 if (unsorted_offsets[order[0]] == 0)
9259 return 1; /* stmia */
9261 if (unsorted_offsets[order[0]] == 4)
9262 return 2; /* stmib */
9264 if (unsorted_offsets[order[nops - 1]] == 0)
9265 return 3; /* stmda */
9267 if (unsorted_offsets[order[nops - 1]] == -4)
9268 return 4; /* stmdb */
9270 return 0;
9273 const char *
9274 emit_stm_seq (rtx *operands, int nops)
9276 int regs[4];
9277 int base_reg;
9278 HOST_WIDE_INT offset;
9279 char buf[100];
9280 int i;
9282 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9284 case 1:
9285 strcpy (buf, "stm%(ia%)\t");
9286 break;
9288 case 2:
9289 strcpy (buf, "stm%(ib%)\t");
9290 break;
9292 case 3:
9293 strcpy (buf, "stm%(da%)\t");
9294 break;
9296 case 4:
9297 strcpy (buf, "stm%(db%)\t");
9298 break;
9300 default:
9301 gcc_unreachable ();
9304 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9305 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9307 for (i = 1; i < nops; i++)
9308 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9309 reg_names[regs[i]]);
9311 strcat (buf, "}\t%@ phole stm");
9313 output_asm_insn (buf, operands);
9314 return "";
9317 /* Routines for use in generating RTL. */
9320 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9321 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9323 HOST_WIDE_INT offset = *offsetp;
9324 int i = 0, j;
9325 rtx result;
9326 int sign = up ? 1 : -1;
9327 rtx mem, addr;
9329 /* XScale has load-store double instructions, but they have stricter
9330 alignment requirements than load-store multiple, so we cannot
9331 use them.
9333 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9334 the pipeline until completion.
9336 NREGS CYCLES
9342 An ldr instruction takes 1-3 cycles, but does not block the
9343 pipeline.
9345 NREGS CYCLES
9346 1 1-3
9347 2 2-6
9348 3 3-9
9349 4 4-12
9351 Best case ldr will always win. However, the more ldr instructions
9352 we issue, the less likely we are to be able to schedule them well.
9353 Using ldr instructions also increases code size.
9355 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9356 for counts of 3 or 4 regs. */
9357 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9359 rtx seq;
9361 start_sequence ();
9363 for (i = 0; i < count; i++)
9365 addr = plus_constant (from, i * 4 * sign);
9366 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9367 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9368 offset += 4 * sign;
9371 if (write_back)
9373 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9374 *offsetp = offset;
9377 seq = get_insns ();
9378 end_sequence ();
9380 return seq;
9383 result = gen_rtx_PARALLEL (VOIDmode,
9384 rtvec_alloc (count + (write_back ? 1 : 0)));
9385 if (write_back)
9387 XVECEXP (result, 0, 0)
9388 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9389 i = 1;
9390 count++;
9393 for (j = 0; i < count; i++, j++)
9395 addr = plus_constant (from, j * 4 * sign);
9396 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9397 XVECEXP (result, 0, i)
9398 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9399 offset += 4 * sign;
9402 if (write_back)
9403 *offsetp = offset;
9405 return result;
9409 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9410 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9412 HOST_WIDE_INT offset = *offsetp;
9413 int i = 0, j;
9414 rtx result;
9415 int sign = up ? 1 : -1;
9416 rtx mem, addr;
9418 /* See arm_gen_load_multiple for discussion of
9419 the pros/cons of ldm/stm usage for XScale. */
9420 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9422 rtx seq;
9424 start_sequence ();
9426 for (i = 0; i < count; i++)
9428 addr = plus_constant (to, i * 4 * sign);
9429 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9430 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9431 offset += 4 * sign;
9434 if (write_back)
9436 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9437 *offsetp = offset;
9440 seq = get_insns ();
9441 end_sequence ();
9443 return seq;
9446 result = gen_rtx_PARALLEL (VOIDmode,
9447 rtvec_alloc (count + (write_back ? 1 : 0)));
9448 if (write_back)
9450 XVECEXP (result, 0, 0)
9451 = gen_rtx_SET (VOIDmode, to,
9452 plus_constant (to, count * 4 * sign));
9453 i = 1;
9454 count++;
9457 for (j = 0; i < count; i++, j++)
9459 addr = plus_constant (to, j * 4 * sign);
9460 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9461 XVECEXP (result, 0, i)
9462 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9463 offset += 4 * sign;
9466 if (write_back)
9467 *offsetp = offset;
9469 return result;
9473 arm_gen_movmemqi (rtx *operands)
9475 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9476 HOST_WIDE_INT srcoffset, dstoffset;
9477 int i;
9478 rtx src, dst, srcbase, dstbase;
9479 rtx part_bytes_reg = NULL;
9480 rtx mem;
9482 if (GET_CODE (operands[2]) != CONST_INT
9483 || GET_CODE (operands[3]) != CONST_INT
9484 || INTVAL (operands[2]) > 64
9485 || INTVAL (operands[3]) & 3)
9486 return 0;
9488 dstbase = operands[0];
9489 srcbase = operands[1];
9491 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9492 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9494 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9495 out_words_to_go = INTVAL (operands[2]) / 4;
9496 last_bytes = INTVAL (operands[2]) & 3;
9497 dstoffset = srcoffset = 0;
9499 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9500 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9502 for (i = 0; in_words_to_go >= 2; i+=4)
9504 if (in_words_to_go > 4)
9505 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9506 srcbase, &srcoffset));
9507 else
9508 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9509 FALSE, srcbase, &srcoffset));
9511 if (out_words_to_go)
9513 if (out_words_to_go > 4)
9514 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9515 dstbase, &dstoffset));
9516 else if (out_words_to_go != 1)
9517 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9518 dst, TRUE,
9519 (last_bytes == 0
9520 ? FALSE : TRUE),
9521 dstbase, &dstoffset));
9522 else
9524 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9525 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9526 if (last_bytes != 0)
9528 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9529 dstoffset += 4;
9534 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9535 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9538 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9539 if (out_words_to_go)
9541 rtx sreg;
9543 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9544 sreg = copy_to_reg (mem);
9546 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9547 emit_move_insn (mem, sreg);
9548 in_words_to_go--;
9550 gcc_assert (!in_words_to_go); /* Sanity check */
9553 if (in_words_to_go)
9555 gcc_assert (in_words_to_go > 0);
9557 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9558 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9561 gcc_assert (!last_bytes || part_bytes_reg);
9563 if (BYTES_BIG_ENDIAN && last_bytes)
9565 rtx tmp = gen_reg_rtx (SImode);
9567 /* The bytes we want are in the top end of the word. */
9568 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9569 GEN_INT (8 * (4 - last_bytes))));
9570 part_bytes_reg = tmp;
9572 while (last_bytes)
9574 mem = adjust_automodify_address (dstbase, QImode,
9575 plus_constant (dst, last_bytes - 1),
9576 dstoffset + last_bytes - 1);
9577 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9579 if (--last_bytes)
9581 tmp = gen_reg_rtx (SImode);
9582 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9583 part_bytes_reg = tmp;
9588 else
9590 if (last_bytes > 1)
9592 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9593 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9594 last_bytes -= 2;
9595 if (last_bytes)
9597 rtx tmp = gen_reg_rtx (SImode);
9598 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9599 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9600 part_bytes_reg = tmp;
9601 dstoffset += 2;
9605 if (last_bytes)
9607 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9608 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9612 return 1;
9615 /* Select a dominance comparison mode if possible for a test of the general
9616 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9617 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9618 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9619 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9620 In all cases OP will be either EQ or NE, but we don't need to know which
9621 here. If we are unable to support a dominance comparison we return
9622 CC mode. This will then fail to match for the RTL expressions that
9623 generate this call. */
9624 enum machine_mode
9625 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9627 enum rtx_code cond1, cond2;
9628 int swapped = 0;
9630 /* Currently we will probably get the wrong result if the individual
9631 comparisons are not simple. This also ensures that it is safe to
9632 reverse a comparison if necessary. */
9633 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9634 != CCmode)
9635 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9636 != CCmode))
9637 return CCmode;
9639 /* The if_then_else variant of this tests the second condition if the
9640 first passes, but is true if the first fails. Reverse the first
9641 condition to get a true "inclusive-or" expression. */
9642 if (cond_or == DOM_CC_NX_OR_Y)
9643 cond1 = reverse_condition (cond1);
9645 /* If the comparisons are not equal, and one doesn't dominate the other,
9646 then we can't do this. */
9647 if (cond1 != cond2
9648 && !comparison_dominates_p (cond1, cond2)
9649 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9650 return CCmode;
9652 if (swapped)
9654 enum rtx_code temp = cond1;
9655 cond1 = cond2;
9656 cond2 = temp;
9659 switch (cond1)
9661 case EQ:
9662 if (cond_or == DOM_CC_X_AND_Y)
9663 return CC_DEQmode;
9665 switch (cond2)
9667 case EQ: return CC_DEQmode;
9668 case LE: return CC_DLEmode;
9669 case LEU: return CC_DLEUmode;
9670 case GE: return CC_DGEmode;
9671 case GEU: return CC_DGEUmode;
9672 default: gcc_unreachable ();
9675 case LT:
9676 if (cond_or == DOM_CC_X_AND_Y)
9677 return CC_DLTmode;
9679 switch (cond2)
9681 case LT:
9682 return CC_DLTmode;
9683 case LE:
9684 return CC_DLEmode;
9685 case NE:
9686 return CC_DNEmode;
9687 default:
9688 gcc_unreachable ();
9691 case GT:
9692 if (cond_or == DOM_CC_X_AND_Y)
9693 return CC_DGTmode;
9695 switch (cond2)
9697 case GT:
9698 return CC_DGTmode;
9699 case GE:
9700 return CC_DGEmode;
9701 case NE:
9702 return CC_DNEmode;
9703 default:
9704 gcc_unreachable ();
9707 case LTU:
9708 if (cond_or == DOM_CC_X_AND_Y)
9709 return CC_DLTUmode;
9711 switch (cond2)
9713 case LTU:
9714 return CC_DLTUmode;
9715 case LEU:
9716 return CC_DLEUmode;
9717 case NE:
9718 return CC_DNEmode;
9719 default:
9720 gcc_unreachable ();
9723 case GTU:
9724 if (cond_or == DOM_CC_X_AND_Y)
9725 return CC_DGTUmode;
9727 switch (cond2)
9729 case GTU:
9730 return CC_DGTUmode;
9731 case GEU:
9732 return CC_DGEUmode;
9733 case NE:
9734 return CC_DNEmode;
9735 default:
9736 gcc_unreachable ();
9739 /* The remaining cases only occur when both comparisons are the
9740 same. */
9741 case NE:
9742 gcc_assert (cond1 == cond2);
9743 return CC_DNEmode;
9745 case LE:
9746 gcc_assert (cond1 == cond2);
9747 return CC_DLEmode;
9749 case GE:
9750 gcc_assert (cond1 == cond2);
9751 return CC_DGEmode;
9753 case LEU:
9754 gcc_assert (cond1 == cond2);
9755 return CC_DLEUmode;
9757 case GEU:
9758 gcc_assert (cond1 == cond2);
9759 return CC_DGEUmode;
9761 default:
9762 gcc_unreachable ();
9766 enum machine_mode
9767 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9769 /* All floating point compares return CCFP if it is an equality
9770 comparison, and CCFPE otherwise. */
9771 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9773 switch (op)
9775 case EQ:
9776 case NE:
9777 case UNORDERED:
9778 case ORDERED:
9779 case UNLT:
9780 case UNLE:
9781 case UNGT:
9782 case UNGE:
9783 case UNEQ:
9784 case LTGT:
9785 return CCFPmode;
9787 case LT:
9788 case LE:
9789 case GT:
9790 case GE:
9791 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9792 return CCFPmode;
9793 return CCFPEmode;
9795 default:
9796 gcc_unreachable ();
9800 /* A compare with a shifted operand. Because of canonicalization, the
9801 comparison will have to be swapped when we emit the assembler. */
9802 if (GET_MODE (y) == SImode
9803 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9804 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9805 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9806 || GET_CODE (x) == ROTATERT))
9807 return CC_SWPmode;
9809 /* This operation is performed swapped, but since we only rely on the Z
9810 flag we don't need an additional mode. */
9811 if (GET_MODE (y) == SImode
9812 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9813 && GET_CODE (x) == NEG
9814 && (op == EQ || op == NE))
9815 return CC_Zmode;
9817 /* This is a special case that is used by combine to allow a
9818 comparison of a shifted byte load to be split into a zero-extend
9819 followed by a comparison of the shifted integer (only valid for
9820 equalities and unsigned inequalities). */
9821 if (GET_MODE (x) == SImode
9822 && GET_CODE (x) == ASHIFT
9823 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9824 && GET_CODE (XEXP (x, 0)) == SUBREG
9825 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9826 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9827 && (op == EQ || op == NE
9828 || op == GEU || op == GTU || op == LTU || op == LEU)
9829 && GET_CODE (y) == CONST_INT)
9830 return CC_Zmode;
9832 /* A construct for a conditional compare, if the false arm contains
9833 0, then both conditions must be true, otherwise either condition
9834 must be true. Not all conditions are possible, so CCmode is
9835 returned if it can't be done. */
9836 if (GET_CODE (x) == IF_THEN_ELSE
9837 && (XEXP (x, 2) == const0_rtx
9838 || XEXP (x, 2) == const1_rtx)
9839 && COMPARISON_P (XEXP (x, 0))
9840 && COMPARISON_P (XEXP (x, 1)))
9841 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9842 INTVAL (XEXP (x, 2)));
9844 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
9845 if (GET_CODE (x) == AND
9846 && COMPARISON_P (XEXP (x, 0))
9847 && COMPARISON_P (XEXP (x, 1)))
9848 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9849 DOM_CC_X_AND_Y);
9851 if (GET_CODE (x) == IOR
9852 && COMPARISON_P (XEXP (x, 0))
9853 && COMPARISON_P (XEXP (x, 1)))
9854 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9855 DOM_CC_X_OR_Y);
9857 /* An operation (on Thumb) where we want to test for a single bit.
9858 This is done by shifting that bit up into the top bit of a
9859 scratch register; we can then branch on the sign bit. */
9860 if (TARGET_THUMB1
9861 && GET_MODE (x) == SImode
9862 && (op == EQ || op == NE)
9863 && GET_CODE (x) == ZERO_EXTRACT
9864 && XEXP (x, 1) == const1_rtx)
9865 return CC_Nmode;
9867 /* An operation that sets the condition codes as a side-effect, the
9868 V flag is not set correctly, so we can only use comparisons where
9869 this doesn't matter. (For LT and GE we can use "mi" and "pl"
9870 instead.) */
9871 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
9872 if (GET_MODE (x) == SImode
9873 && y == const0_rtx
9874 && (op == EQ || op == NE || op == LT || op == GE)
9875 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
9876 || GET_CODE (x) == AND || GET_CODE (x) == IOR
9877 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
9878 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
9879 || GET_CODE (x) == LSHIFTRT
9880 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9881 || GET_CODE (x) == ROTATERT
9882 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
9883 return CC_NOOVmode;
9885 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
9886 return CC_Zmode;
9888 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
9889 && GET_CODE (x) == PLUS
9890 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
9891 return CC_Cmode;
9893 return CCmode;
9896 /* X and Y are two things to compare using CODE. Emit the compare insn and
9897 return the rtx for register 0 in the proper mode. FP means this is a
9898 floating point compare: I don't think that it is needed on the arm. */
9900 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
9902 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
9903 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
9905 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
9907 return cc_reg;
9910 /* Generate a sequence of insns that will generate the correct return
9911 address mask depending on the physical architecture that the program
9912 is running on. */
9914 arm_gen_return_addr_mask (void)
9916 rtx reg = gen_reg_rtx (Pmode);
9918 emit_insn (gen_return_addr_mask (reg));
9919 return reg;
9922 void
9923 arm_reload_in_hi (rtx *operands)
9925 rtx ref = operands[1];
9926 rtx base, scratch;
9927 HOST_WIDE_INT offset = 0;
9929 if (GET_CODE (ref) == SUBREG)
9931 offset = SUBREG_BYTE (ref);
9932 ref = SUBREG_REG (ref);
9935 if (GET_CODE (ref) == REG)
9937 /* We have a pseudo which has been spilt onto the stack; there
9938 are two cases here: the first where there is a simple
9939 stack-slot replacement and a second where the stack-slot is
9940 out of range, or is used as a subreg. */
9941 if (reg_equiv_mem[REGNO (ref)])
9943 ref = reg_equiv_mem[REGNO (ref)];
9944 base = find_replacement (&XEXP (ref, 0));
9946 else
9947 /* The slot is out of range, or was dressed up in a SUBREG. */
9948 base = reg_equiv_address[REGNO (ref)];
9950 else
9951 base = find_replacement (&XEXP (ref, 0));
9953 /* Handle the case where the address is too complex to be offset by 1. */
9954 if (GET_CODE (base) == MINUS
9955 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9957 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9959 emit_set_insn (base_plus, base);
9960 base = base_plus;
9962 else if (GET_CODE (base) == PLUS)
9964 /* The addend must be CONST_INT, or we would have dealt with it above. */
9965 HOST_WIDE_INT hi, lo;
9967 offset += INTVAL (XEXP (base, 1));
9968 base = XEXP (base, 0);
9970 /* Rework the address into a legal sequence of insns. */
9971 /* Valid range for lo is -4095 -> 4095 */
9972 lo = (offset >= 0
9973 ? (offset & 0xfff)
9974 : -((-offset) & 0xfff));
9976 /* Corner case, if lo is the max offset then we would be out of range
9977 once we have added the additional 1 below, so bump the msb into the
9978 pre-loading insn(s). */
9979 if (lo == 4095)
9980 lo &= 0x7ff;
9982 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9983 ^ (HOST_WIDE_INT) 0x80000000)
9984 - (HOST_WIDE_INT) 0x80000000);
9986 gcc_assert (hi + lo == offset);
9988 if (hi != 0)
9990 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9992 /* Get the base address; addsi3 knows how to handle constants
9993 that require more than one insn. */
9994 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9995 base = base_plus;
9996 offset = lo;
10000 /* Operands[2] may overlap operands[0] (though it won't overlap
10001 operands[1]), that's why we asked for a DImode reg -- so we can
10002 use the bit that does not overlap. */
10003 if (REGNO (operands[2]) == REGNO (operands[0]))
10004 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10005 else
10006 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10008 emit_insn (gen_zero_extendqisi2 (scratch,
10009 gen_rtx_MEM (QImode,
10010 plus_constant (base,
10011 offset))));
10012 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10013 gen_rtx_MEM (QImode,
10014 plus_constant (base,
10015 offset + 1))));
10016 if (!BYTES_BIG_ENDIAN)
10017 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10018 gen_rtx_IOR (SImode,
10019 gen_rtx_ASHIFT
10020 (SImode,
10021 gen_rtx_SUBREG (SImode, operands[0], 0),
10022 GEN_INT (8)),
10023 scratch));
10024 else
10025 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10026 gen_rtx_IOR (SImode,
10027 gen_rtx_ASHIFT (SImode, scratch,
10028 GEN_INT (8)),
10029 gen_rtx_SUBREG (SImode, operands[0], 0)));
10032 /* Handle storing a half-word to memory during reload by synthesizing as two
10033 byte stores. Take care not to clobber the input values until after we
10034 have moved them somewhere safe. This code assumes that if the DImode
10035 scratch in operands[2] overlaps either the input value or output address
10036 in some way, then that value must die in this insn (we absolutely need
10037 two scratch registers for some corner cases). */
10038 void
10039 arm_reload_out_hi (rtx *operands)
10041 rtx ref = operands[0];
10042 rtx outval = operands[1];
10043 rtx base, scratch;
10044 HOST_WIDE_INT offset = 0;
10046 if (GET_CODE (ref) == SUBREG)
10048 offset = SUBREG_BYTE (ref);
10049 ref = SUBREG_REG (ref);
10052 if (GET_CODE (ref) == REG)
10054 /* We have a pseudo which has been spilt onto the stack; there
10055 are two cases here: the first where there is a simple
10056 stack-slot replacement and a second where the stack-slot is
10057 out of range, or is used as a subreg. */
10058 if (reg_equiv_mem[REGNO (ref)])
10060 ref = reg_equiv_mem[REGNO (ref)];
10061 base = find_replacement (&XEXP (ref, 0));
10063 else
10064 /* The slot is out of range, or was dressed up in a SUBREG. */
10065 base = reg_equiv_address[REGNO (ref)];
10067 else
10068 base = find_replacement (&XEXP (ref, 0));
10070 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10072 /* Handle the case where the address is too complex to be offset by 1. */
10073 if (GET_CODE (base) == MINUS
10074 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10076 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10078 /* Be careful not to destroy OUTVAL. */
10079 if (reg_overlap_mentioned_p (base_plus, outval))
10081 /* Updating base_plus might destroy outval, see if we can
10082 swap the scratch and base_plus. */
10083 if (!reg_overlap_mentioned_p (scratch, outval))
10085 rtx tmp = scratch;
10086 scratch = base_plus;
10087 base_plus = tmp;
10089 else
10091 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10093 /* Be conservative and copy OUTVAL into the scratch now,
10094 this should only be necessary if outval is a subreg
10095 of something larger than a word. */
10096 /* XXX Might this clobber base? I can't see how it can,
10097 since scratch is known to overlap with OUTVAL, and
10098 must be wider than a word. */
10099 emit_insn (gen_movhi (scratch_hi, outval));
10100 outval = scratch_hi;
10104 emit_set_insn (base_plus, base);
10105 base = base_plus;
10107 else if (GET_CODE (base) == PLUS)
10109 /* The addend must be CONST_INT, or we would have dealt with it above. */
10110 HOST_WIDE_INT hi, lo;
10112 offset += INTVAL (XEXP (base, 1));
10113 base = XEXP (base, 0);
10115 /* Rework the address into a legal sequence of insns. */
10116 /* Valid range for lo is -4095 -> 4095 */
10117 lo = (offset >= 0
10118 ? (offset & 0xfff)
10119 : -((-offset) & 0xfff));
10121 /* Corner case, if lo is the max offset then we would be out of range
10122 once we have added the additional 1 below, so bump the msb into the
10123 pre-loading insn(s). */
10124 if (lo == 4095)
10125 lo &= 0x7ff;
10127 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10128 ^ (HOST_WIDE_INT) 0x80000000)
10129 - (HOST_WIDE_INT) 0x80000000);
10131 gcc_assert (hi + lo == offset);
10133 if (hi != 0)
10135 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10137 /* Be careful not to destroy OUTVAL. */
10138 if (reg_overlap_mentioned_p (base_plus, outval))
10140 /* Updating base_plus might destroy outval, see if we
10141 can swap the scratch and base_plus. */
10142 if (!reg_overlap_mentioned_p (scratch, outval))
10144 rtx tmp = scratch;
10145 scratch = base_plus;
10146 base_plus = tmp;
10148 else
10150 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10152 /* Be conservative and copy outval into scratch now,
10153 this should only be necessary if outval is a
10154 subreg of something larger than a word. */
10155 /* XXX Might this clobber base? I can't see how it
10156 can, since scratch is known to overlap with
10157 outval. */
10158 emit_insn (gen_movhi (scratch_hi, outval));
10159 outval = scratch_hi;
10163 /* Get the base address; addsi3 knows how to handle constants
10164 that require more than one insn. */
10165 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10166 base = base_plus;
10167 offset = lo;
10171 if (BYTES_BIG_ENDIAN)
10173 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10174 plus_constant (base, offset + 1)),
10175 gen_lowpart (QImode, outval)));
10176 emit_insn (gen_lshrsi3 (scratch,
10177 gen_rtx_SUBREG (SImode, outval, 0),
10178 GEN_INT (8)));
10179 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10180 gen_lowpart (QImode, scratch)));
10182 else
10184 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10185 gen_lowpart (QImode, outval)));
10186 emit_insn (gen_lshrsi3 (scratch,
10187 gen_rtx_SUBREG (SImode, outval, 0),
10188 GEN_INT (8)));
10189 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10190 plus_constant (base, offset + 1)),
10191 gen_lowpart (QImode, scratch)));
10195 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10196 (padded to the size of a word) should be passed in a register. */
10198 static bool
10199 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10201 if (TARGET_AAPCS_BASED)
10202 return must_pass_in_stack_var_size (mode, type);
10203 else
10204 return must_pass_in_stack_var_size_or_pad (mode, type);
10208 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10209 Return true if an argument passed on the stack should be padded upwards,
10210 i.e. if the least-significant byte has useful data.
10211 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10212 aggregate types are placed in the lowest memory address. */
10214 bool
10215 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10217 if (!TARGET_AAPCS_BASED)
10218 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10220 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10221 return false;
10223 return true;
10227 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10228 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10229 byte of the register has useful data, and return the opposite if the
10230 most significant byte does.
10231 For AAPCS, small aggregates and small complex types are always padded
10232 upwards. */
10234 bool
10235 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10236 tree type, int first ATTRIBUTE_UNUSED)
10238 if (TARGET_AAPCS_BASED
10239 && BYTES_BIG_ENDIAN
10240 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10241 && int_size_in_bytes (type) <= 4)
10242 return true;
10244 /* Otherwise, use default padding. */
10245 return !BYTES_BIG_ENDIAN;
10249 /* Print a symbolic form of X to the debug file, F. */
10250 static void
10251 arm_print_value (FILE *f, rtx x)
10253 switch (GET_CODE (x))
10255 case CONST_INT:
10256 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10257 return;
10259 case CONST_DOUBLE:
10260 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10261 return;
10263 case CONST_VECTOR:
10265 int i;
10267 fprintf (f, "<");
10268 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10270 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10271 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10272 fputc (',', f);
10274 fprintf (f, ">");
10276 return;
10278 case CONST_STRING:
10279 fprintf (f, "\"%s\"", XSTR (x, 0));
10280 return;
10282 case SYMBOL_REF:
10283 fprintf (f, "`%s'", XSTR (x, 0));
10284 return;
10286 case LABEL_REF:
10287 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10288 return;
10290 case CONST:
10291 arm_print_value (f, XEXP (x, 0));
10292 return;
10294 case PLUS:
10295 arm_print_value (f, XEXP (x, 0));
10296 fprintf (f, "+");
10297 arm_print_value (f, XEXP (x, 1));
10298 return;
10300 case PC:
10301 fprintf (f, "pc");
10302 return;
10304 default:
10305 fprintf (f, "????");
10306 return;
10310 /* Routines for manipulation of the constant pool. */
10312 /* Arm instructions cannot load a large constant directly into a
10313 register; they have to come from a pc relative load. The constant
10314 must therefore be placed in the addressable range of the pc
10315 relative load. Depending on the precise pc relative load
10316 instruction the range is somewhere between 256 bytes and 4k. This
10317 means that we often have to dump a constant inside a function, and
10318 generate code to branch around it.
10320 It is important to minimize this, since the branches will slow
10321 things down and make the code larger.
10323 Normally we can hide the table after an existing unconditional
10324 branch so that there is no interruption of the flow, but in the
10325 worst case the code looks like this:
10327 ldr rn, L1
10329 b L2
10330 align
10331 L1: .long value
10335 ldr rn, L3
10337 b L4
10338 align
10339 L3: .long value
10343 We fix this by performing a scan after scheduling, which notices
10344 which instructions need to have their operands fetched from the
10345 constant table and builds the table.
10347 The algorithm starts by building a table of all the constants that
10348 need fixing up and all the natural barriers in the function (places
10349 where a constant table can be dropped without breaking the flow).
10350 For each fixup we note how far the pc-relative replacement will be
10351 able to reach and the offset of the instruction into the function.
10353 Having built the table we then group the fixes together to form
10354 tables that are as large as possible (subject to addressing
10355 constraints) and emit each table of constants after the last
10356 barrier that is within range of all the instructions in the group.
10357 If a group does not contain a barrier, then we forcibly create one
10358 by inserting a jump instruction into the flow. Once the table has
10359 been inserted, the insns are then modified to reference the
10360 relevant entry in the pool.
10362 Possible enhancements to the algorithm (not implemented) are:
10364 1) For some processors and object formats, there may be benefit in
10365 aligning the pools to the start of cache lines; this alignment
10366 would need to be taken into account when calculating addressability
10367 of a pool. */
10369 /* These typedefs are located at the start of this file, so that
10370 they can be used in the prototypes there. This comment is to
10371 remind readers of that fact so that the following structures
10372 can be understood more easily.
10374 typedef struct minipool_node Mnode;
10375 typedef struct minipool_fixup Mfix; */
10377 struct minipool_node
10379 /* Doubly linked chain of entries. */
10380 Mnode * next;
10381 Mnode * prev;
10382 /* The maximum offset into the code that this entry can be placed. While
10383 pushing fixes for forward references, all entries are sorted in order
10384 of increasing max_address. */
10385 HOST_WIDE_INT max_address;
10386 /* Similarly for an entry inserted for a backwards ref. */
10387 HOST_WIDE_INT min_address;
10388 /* The number of fixes referencing this entry. This can become zero
10389 if we "unpush" an entry. In this case we ignore the entry when we
10390 come to emit the code. */
10391 int refcount;
10392 /* The offset from the start of the minipool. */
10393 HOST_WIDE_INT offset;
10394 /* The value in table. */
10395 rtx value;
10396 /* The mode of value. */
10397 enum machine_mode mode;
10398 /* The size of the value. With iWMMXt enabled
10399 sizes > 4 also imply an alignment of 8-bytes. */
10400 int fix_size;
10403 struct minipool_fixup
10405 Mfix * next;
10406 rtx insn;
10407 HOST_WIDE_INT address;
10408 rtx * loc;
10409 enum machine_mode mode;
10410 int fix_size;
10411 rtx value;
10412 Mnode * minipool;
10413 HOST_WIDE_INT forwards;
10414 HOST_WIDE_INT backwards;
10417 /* Fixes less than a word need padding out to a word boundary. */
10418 #define MINIPOOL_FIX_SIZE(mode) \
10419 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10421 static Mnode * minipool_vector_head;
10422 static Mnode * minipool_vector_tail;
10423 static rtx minipool_vector_label;
10424 static int minipool_pad;
10426 /* The linked list of all minipool fixes required for this function. */
10427 Mfix * minipool_fix_head;
10428 Mfix * minipool_fix_tail;
10429 /* The fix entry for the current minipool, once it has been placed. */
10430 Mfix * minipool_barrier;
10432 /* Determines if INSN is the start of a jump table. Returns the end
10433 of the TABLE or NULL_RTX. */
10434 static rtx
10435 is_jump_table (rtx insn)
10437 rtx table;
10439 if (GET_CODE (insn) == JUMP_INSN
10440 && JUMP_LABEL (insn) != NULL
10441 && ((table = next_real_insn (JUMP_LABEL (insn)))
10442 == next_real_insn (insn))
10443 && table != NULL
10444 && GET_CODE (table) == JUMP_INSN
10445 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10446 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10447 return table;
10449 return NULL_RTX;
10452 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10453 #define JUMP_TABLES_IN_TEXT_SECTION 0
10454 #endif
10456 static HOST_WIDE_INT
10457 get_jump_table_size (rtx insn)
10459 /* ADDR_VECs only take room if read-only data does into the text
10460 section. */
10461 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10463 rtx body = PATTERN (insn);
10464 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10465 HOST_WIDE_INT size;
10466 HOST_WIDE_INT modesize;
10468 modesize = GET_MODE_SIZE (GET_MODE (body));
10469 size = modesize * XVECLEN (body, elt);
10470 switch (modesize)
10472 case 1:
10473 /* Round up size of TBB table to a halfword boundary. */
10474 size = (size + 1) & ~(HOST_WIDE_INT)1;
10475 break;
10476 case 2:
10477 /* No padding necessary for TBH. */
10478 break;
10479 case 4:
10480 /* Add two bytes for alignment on Thumb. */
10481 if (TARGET_THUMB)
10482 size += 2;
10483 break;
10484 default:
10485 gcc_unreachable ();
10487 return size;
10490 return 0;
10493 /* Move a minipool fix MP from its current location to before MAX_MP.
10494 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10495 constraints may need updating. */
10496 static Mnode *
10497 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10498 HOST_WIDE_INT max_address)
10500 /* The code below assumes these are different. */
10501 gcc_assert (mp != max_mp);
10503 if (max_mp == NULL)
10505 if (max_address < mp->max_address)
10506 mp->max_address = max_address;
10508 else
10510 if (max_address > max_mp->max_address - mp->fix_size)
10511 mp->max_address = max_mp->max_address - mp->fix_size;
10512 else
10513 mp->max_address = max_address;
10515 /* Unlink MP from its current position. Since max_mp is non-null,
10516 mp->prev must be non-null. */
10517 mp->prev->next = mp->next;
10518 if (mp->next != NULL)
10519 mp->next->prev = mp->prev;
10520 else
10521 minipool_vector_tail = mp->prev;
10523 /* Re-insert it before MAX_MP. */
10524 mp->next = max_mp;
10525 mp->prev = max_mp->prev;
10526 max_mp->prev = mp;
10528 if (mp->prev != NULL)
10529 mp->prev->next = mp;
10530 else
10531 minipool_vector_head = mp;
10534 /* Save the new entry. */
10535 max_mp = mp;
10537 /* Scan over the preceding entries and adjust their addresses as
10538 required. */
10539 while (mp->prev != NULL
10540 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10542 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10543 mp = mp->prev;
10546 return max_mp;
10549 /* Add a constant to the minipool for a forward reference. Returns the
10550 node added or NULL if the constant will not fit in this pool. */
10551 static Mnode *
10552 add_minipool_forward_ref (Mfix *fix)
10554 /* If set, max_mp is the first pool_entry that has a lower
10555 constraint than the one we are trying to add. */
10556 Mnode * max_mp = NULL;
10557 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10558 Mnode * mp;
10560 /* If the minipool starts before the end of FIX->INSN then this FIX
10561 can not be placed into the current pool. Furthermore, adding the
10562 new constant pool entry may cause the pool to start FIX_SIZE bytes
10563 earlier. */
10564 if (minipool_vector_head &&
10565 (fix->address + get_attr_length (fix->insn)
10566 >= minipool_vector_head->max_address - fix->fix_size))
10567 return NULL;
10569 /* Scan the pool to see if a constant with the same value has
10570 already been added. While we are doing this, also note the
10571 location where we must insert the constant if it doesn't already
10572 exist. */
10573 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10575 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10576 && fix->mode == mp->mode
10577 && (GET_CODE (fix->value) != CODE_LABEL
10578 || (CODE_LABEL_NUMBER (fix->value)
10579 == CODE_LABEL_NUMBER (mp->value)))
10580 && rtx_equal_p (fix->value, mp->value))
10582 /* More than one fix references this entry. */
10583 mp->refcount++;
10584 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10587 /* Note the insertion point if necessary. */
10588 if (max_mp == NULL
10589 && mp->max_address > max_address)
10590 max_mp = mp;
10592 /* If we are inserting an 8-bytes aligned quantity and
10593 we have not already found an insertion point, then
10594 make sure that all such 8-byte aligned quantities are
10595 placed at the start of the pool. */
10596 if (ARM_DOUBLEWORD_ALIGN
10597 && max_mp == NULL
10598 && fix->fix_size >= 8
10599 && mp->fix_size < 8)
10601 max_mp = mp;
10602 max_address = mp->max_address;
10606 /* The value is not currently in the minipool, so we need to create
10607 a new entry for it. If MAX_MP is NULL, the entry will be put on
10608 the end of the list since the placement is less constrained than
10609 any existing entry. Otherwise, we insert the new fix before
10610 MAX_MP and, if necessary, adjust the constraints on the other
10611 entries. */
10612 mp = XNEW (Mnode);
10613 mp->fix_size = fix->fix_size;
10614 mp->mode = fix->mode;
10615 mp->value = fix->value;
10616 mp->refcount = 1;
10617 /* Not yet required for a backwards ref. */
10618 mp->min_address = -65536;
10620 if (max_mp == NULL)
10622 mp->max_address = max_address;
10623 mp->next = NULL;
10624 mp->prev = minipool_vector_tail;
10626 if (mp->prev == NULL)
10628 minipool_vector_head = mp;
10629 minipool_vector_label = gen_label_rtx ();
10631 else
10632 mp->prev->next = mp;
10634 minipool_vector_tail = mp;
10636 else
10638 if (max_address > max_mp->max_address - mp->fix_size)
10639 mp->max_address = max_mp->max_address - mp->fix_size;
10640 else
10641 mp->max_address = max_address;
10643 mp->next = max_mp;
10644 mp->prev = max_mp->prev;
10645 max_mp->prev = mp;
10646 if (mp->prev != NULL)
10647 mp->prev->next = mp;
10648 else
10649 minipool_vector_head = mp;
10652 /* Save the new entry. */
10653 max_mp = mp;
10655 /* Scan over the preceding entries and adjust their addresses as
10656 required. */
10657 while (mp->prev != NULL
10658 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10660 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10661 mp = mp->prev;
10664 return max_mp;
10667 static Mnode *
10668 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10669 HOST_WIDE_INT min_address)
10671 HOST_WIDE_INT offset;
10673 /* The code below assumes these are different. */
10674 gcc_assert (mp != min_mp);
10676 if (min_mp == NULL)
10678 if (min_address > mp->min_address)
10679 mp->min_address = min_address;
10681 else
10683 /* We will adjust this below if it is too loose. */
10684 mp->min_address = min_address;
10686 /* Unlink MP from its current position. Since min_mp is non-null,
10687 mp->next must be non-null. */
10688 mp->next->prev = mp->prev;
10689 if (mp->prev != NULL)
10690 mp->prev->next = mp->next;
10691 else
10692 minipool_vector_head = mp->next;
10694 /* Reinsert it after MIN_MP. */
10695 mp->prev = min_mp;
10696 mp->next = min_mp->next;
10697 min_mp->next = mp;
10698 if (mp->next != NULL)
10699 mp->next->prev = mp;
10700 else
10701 minipool_vector_tail = mp;
10704 min_mp = mp;
10706 offset = 0;
10707 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10709 mp->offset = offset;
10710 if (mp->refcount > 0)
10711 offset += mp->fix_size;
10713 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10714 mp->next->min_address = mp->min_address + mp->fix_size;
10717 return min_mp;
10720 /* Add a constant to the minipool for a backward reference. Returns the
10721 node added or NULL if the constant will not fit in this pool.
10723 Note that the code for insertion for a backwards reference can be
10724 somewhat confusing because the calculated offsets for each fix do
10725 not take into account the size of the pool (which is still under
10726 construction. */
10727 static Mnode *
10728 add_minipool_backward_ref (Mfix *fix)
10730 /* If set, min_mp is the last pool_entry that has a lower constraint
10731 than the one we are trying to add. */
10732 Mnode *min_mp = NULL;
10733 /* This can be negative, since it is only a constraint. */
10734 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10735 Mnode *mp;
10737 /* If we can't reach the current pool from this insn, or if we can't
10738 insert this entry at the end of the pool without pushing other
10739 fixes out of range, then we don't try. This ensures that we
10740 can't fail later on. */
10741 if (min_address >= minipool_barrier->address
10742 || (minipool_vector_tail->min_address + fix->fix_size
10743 >= minipool_barrier->address))
10744 return NULL;
10746 /* Scan the pool to see if a constant with the same value has
10747 already been added. While we are doing this, also note the
10748 location where we must insert the constant if it doesn't already
10749 exist. */
10750 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10752 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10753 && fix->mode == mp->mode
10754 && (GET_CODE (fix->value) != CODE_LABEL
10755 || (CODE_LABEL_NUMBER (fix->value)
10756 == CODE_LABEL_NUMBER (mp->value)))
10757 && rtx_equal_p (fix->value, mp->value)
10758 /* Check that there is enough slack to move this entry to the
10759 end of the table (this is conservative). */
10760 && (mp->max_address
10761 > (minipool_barrier->address
10762 + minipool_vector_tail->offset
10763 + minipool_vector_tail->fix_size)))
10765 mp->refcount++;
10766 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10769 if (min_mp != NULL)
10770 mp->min_address += fix->fix_size;
10771 else
10773 /* Note the insertion point if necessary. */
10774 if (mp->min_address < min_address)
10776 /* For now, we do not allow the insertion of 8-byte alignment
10777 requiring nodes anywhere but at the start of the pool. */
10778 if (ARM_DOUBLEWORD_ALIGN
10779 && fix->fix_size >= 8 && mp->fix_size < 8)
10780 return NULL;
10781 else
10782 min_mp = mp;
10784 else if (mp->max_address
10785 < minipool_barrier->address + mp->offset + fix->fix_size)
10787 /* Inserting before this entry would push the fix beyond
10788 its maximum address (which can happen if we have
10789 re-located a forwards fix); force the new fix to come
10790 after it. */
10791 if (ARM_DOUBLEWORD_ALIGN
10792 && fix->fix_size >= 8 && mp->fix_size < 8)
10793 return NULL;
10794 else
10796 min_mp = mp;
10797 min_address = mp->min_address + fix->fix_size;
10800 /* Do not insert a non-8-byte aligned quantity before 8-byte
10801 aligned quantities. */
10802 else if (ARM_DOUBLEWORD_ALIGN
10803 && fix->fix_size < 8
10804 && mp->fix_size >= 8)
10806 min_mp = mp;
10807 min_address = mp->min_address + fix->fix_size;
10812 /* We need to create a new entry. */
10813 mp = XNEW (Mnode);
10814 mp->fix_size = fix->fix_size;
10815 mp->mode = fix->mode;
10816 mp->value = fix->value;
10817 mp->refcount = 1;
10818 mp->max_address = minipool_barrier->address + 65536;
10820 mp->min_address = min_address;
10822 if (min_mp == NULL)
10824 mp->prev = NULL;
10825 mp->next = minipool_vector_head;
10827 if (mp->next == NULL)
10829 minipool_vector_tail = mp;
10830 minipool_vector_label = gen_label_rtx ();
10832 else
10833 mp->next->prev = mp;
10835 minipool_vector_head = mp;
10837 else
10839 mp->next = min_mp->next;
10840 mp->prev = min_mp;
10841 min_mp->next = mp;
10843 if (mp->next != NULL)
10844 mp->next->prev = mp;
10845 else
10846 minipool_vector_tail = mp;
10849 /* Save the new entry. */
10850 min_mp = mp;
10852 if (mp->prev)
10853 mp = mp->prev;
10854 else
10855 mp->offset = 0;
10857 /* Scan over the following entries and adjust their offsets. */
10858 while (mp->next != NULL)
10860 if (mp->next->min_address < mp->min_address + mp->fix_size)
10861 mp->next->min_address = mp->min_address + mp->fix_size;
10863 if (mp->refcount)
10864 mp->next->offset = mp->offset + mp->fix_size;
10865 else
10866 mp->next->offset = mp->offset;
10868 mp = mp->next;
10871 return min_mp;
10874 static void
10875 assign_minipool_offsets (Mfix *barrier)
10877 HOST_WIDE_INT offset = 0;
10878 Mnode *mp;
10880 minipool_barrier = barrier;
10882 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10884 mp->offset = offset;
10886 if (mp->refcount > 0)
10887 offset += mp->fix_size;
10891 /* Output the literal table */
10892 static void
10893 dump_minipool (rtx scan)
10895 Mnode * mp;
10896 Mnode * nmp;
10897 int align64 = 0;
10899 if (ARM_DOUBLEWORD_ALIGN)
10900 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10901 if (mp->refcount > 0 && mp->fix_size >= 8)
10903 align64 = 1;
10904 break;
10907 if (dump_file)
10908 fprintf (dump_file,
10909 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
10910 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
10912 scan = emit_label_after (gen_label_rtx (), scan);
10913 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
10914 scan = emit_label_after (minipool_vector_label, scan);
10916 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
10918 if (mp->refcount > 0)
10920 if (dump_file)
10922 fprintf (dump_file,
10923 ";; Offset %u, min %ld, max %ld ",
10924 (unsigned) mp->offset, (unsigned long) mp->min_address,
10925 (unsigned long) mp->max_address);
10926 arm_print_value (dump_file, mp->value);
10927 fputc ('\n', dump_file);
10930 switch (mp->fix_size)
10932 #ifdef HAVE_consttable_1
10933 case 1:
10934 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
10935 break;
10937 #endif
10938 #ifdef HAVE_consttable_2
10939 case 2:
10940 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
10941 break;
10943 #endif
10944 #ifdef HAVE_consttable_4
10945 case 4:
10946 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
10947 break;
10949 #endif
10950 #ifdef HAVE_consttable_8
10951 case 8:
10952 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
10953 break;
10955 #endif
10956 #ifdef HAVE_consttable_16
10957 case 16:
10958 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
10959 break;
10961 #endif
10962 default:
10963 gcc_unreachable ();
10967 nmp = mp->next;
10968 free (mp);
10971 minipool_vector_head = minipool_vector_tail = NULL;
10972 scan = emit_insn_after (gen_consttable_end (), scan);
10973 scan = emit_barrier_after (scan);
10976 /* Return the cost of forcibly inserting a barrier after INSN. */
10977 static int
10978 arm_barrier_cost (rtx insn)
10980 /* Basing the location of the pool on the loop depth is preferable,
10981 but at the moment, the basic block information seems to be
10982 corrupt by this stage of the compilation. */
10983 int base_cost = 50;
10984 rtx next = next_nonnote_insn (insn);
10986 if (next != NULL && GET_CODE (next) == CODE_LABEL)
10987 base_cost -= 20;
10989 switch (GET_CODE (insn))
10991 case CODE_LABEL:
10992 /* It will always be better to place the table before the label, rather
10993 than after it. */
10994 return 50;
10996 case INSN:
10997 case CALL_INSN:
10998 return base_cost;
11000 case JUMP_INSN:
11001 return base_cost - 10;
11003 default:
11004 return base_cost + 10;
11008 /* Find the best place in the insn stream in the range
11009 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11010 Create the barrier by inserting a jump and add a new fix entry for
11011 it. */
11012 static Mfix *
11013 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11015 HOST_WIDE_INT count = 0;
11016 rtx barrier;
11017 rtx from = fix->insn;
11018 /* The instruction after which we will insert the jump. */
11019 rtx selected = NULL;
11020 int selected_cost;
11021 /* The address at which the jump instruction will be placed. */
11022 HOST_WIDE_INT selected_address;
11023 Mfix * new_fix;
11024 HOST_WIDE_INT max_count = max_address - fix->address;
11025 rtx label = gen_label_rtx ();
11027 selected_cost = arm_barrier_cost (from);
11028 selected_address = fix->address;
11030 while (from && count < max_count)
11032 rtx tmp;
11033 int new_cost;
11035 /* This code shouldn't have been called if there was a natural barrier
11036 within range. */
11037 gcc_assert (GET_CODE (from) != BARRIER);
11039 /* Count the length of this insn. */
11040 count += get_attr_length (from);
11042 /* If there is a jump table, add its length. */
11043 tmp = is_jump_table (from);
11044 if (tmp != NULL)
11046 count += get_jump_table_size (tmp);
11048 /* Jump tables aren't in a basic block, so base the cost on
11049 the dispatch insn. If we select this location, we will
11050 still put the pool after the table. */
11051 new_cost = arm_barrier_cost (from);
11053 if (count < max_count
11054 && (!selected || new_cost <= selected_cost))
11056 selected = tmp;
11057 selected_cost = new_cost;
11058 selected_address = fix->address + count;
11061 /* Continue after the dispatch table. */
11062 from = NEXT_INSN (tmp);
11063 continue;
11066 new_cost = arm_barrier_cost (from);
11068 if (count < max_count
11069 && (!selected || new_cost <= selected_cost))
11071 selected = from;
11072 selected_cost = new_cost;
11073 selected_address = fix->address + count;
11076 from = NEXT_INSN (from);
11079 /* Make sure that we found a place to insert the jump. */
11080 gcc_assert (selected);
11082 /* Create a new JUMP_INSN that branches around a barrier. */
11083 from = emit_jump_insn_after (gen_jump (label), selected);
11084 JUMP_LABEL (from) = label;
11085 barrier = emit_barrier_after (from);
11086 emit_label_after (label, barrier);
11088 /* Create a minipool barrier entry for the new barrier. */
11089 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11090 new_fix->insn = barrier;
11091 new_fix->address = selected_address;
11092 new_fix->next = fix->next;
11093 fix->next = new_fix;
11095 return new_fix;
11098 /* Record that there is a natural barrier in the insn stream at
11099 ADDRESS. */
11100 static void
11101 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11103 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11105 fix->insn = insn;
11106 fix->address = address;
11108 fix->next = NULL;
11109 if (minipool_fix_head != NULL)
11110 minipool_fix_tail->next = fix;
11111 else
11112 minipool_fix_head = fix;
11114 minipool_fix_tail = fix;
11117 /* Record INSN, which will need fixing up to load a value from the
11118 minipool. ADDRESS is the offset of the insn since the start of the
11119 function; LOC is a pointer to the part of the insn which requires
11120 fixing; VALUE is the constant that must be loaded, which is of type
11121 MODE. */
11122 static void
11123 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11124 enum machine_mode mode, rtx value)
11126 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11128 fix->insn = insn;
11129 fix->address = address;
11130 fix->loc = loc;
11131 fix->mode = mode;
11132 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11133 fix->value = value;
11134 fix->forwards = get_attr_pool_range (insn);
11135 fix->backwards = get_attr_neg_pool_range (insn);
11136 fix->minipool = NULL;
11138 /* If an insn doesn't have a range defined for it, then it isn't
11139 expecting to be reworked by this code. Better to stop now than
11140 to generate duff assembly code. */
11141 gcc_assert (fix->forwards || fix->backwards);
11143 /* If an entry requires 8-byte alignment then assume all constant pools
11144 require 4 bytes of padding. Trying to do this later on a per-pool
11145 basis is awkward because existing pool entries have to be modified. */
11146 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11147 minipool_pad = 4;
11149 if (dump_file)
11151 fprintf (dump_file,
11152 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11153 GET_MODE_NAME (mode),
11154 INSN_UID (insn), (unsigned long) address,
11155 -1 * (long)fix->backwards, (long)fix->forwards);
11156 arm_print_value (dump_file, fix->value);
11157 fprintf (dump_file, "\n");
11160 /* Add it to the chain of fixes. */
11161 fix->next = NULL;
11163 if (minipool_fix_head != NULL)
11164 minipool_fix_tail->next = fix;
11165 else
11166 minipool_fix_head = fix;
11168 minipool_fix_tail = fix;
11171 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11172 Returns the number of insns needed, or 99 if we don't know how to
11173 do it. */
11175 arm_const_double_inline_cost (rtx val)
11177 rtx lowpart, highpart;
11178 enum machine_mode mode;
11180 mode = GET_MODE (val);
11182 if (mode == VOIDmode)
11183 mode = DImode;
11185 gcc_assert (GET_MODE_SIZE (mode) == 8);
11187 lowpart = gen_lowpart (SImode, val);
11188 highpart = gen_highpart_mode (SImode, mode, val);
11190 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11191 gcc_assert (GET_CODE (highpart) == CONST_INT);
11193 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11194 NULL_RTX, NULL_RTX, 0, 0)
11195 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11196 NULL_RTX, NULL_RTX, 0, 0));
11199 /* Return true if it is worthwhile to split a 64-bit constant into two
11200 32-bit operations. This is the case if optimizing for size, or
11201 if we have load delay slots, or if one 32-bit part can be done with
11202 a single data operation. */
11203 bool
11204 arm_const_double_by_parts (rtx val)
11206 enum machine_mode mode = GET_MODE (val);
11207 rtx part;
11209 if (optimize_size || arm_ld_sched)
11210 return true;
11212 if (mode == VOIDmode)
11213 mode = DImode;
11215 part = gen_highpart_mode (SImode, mode, val);
11217 gcc_assert (GET_CODE (part) == CONST_INT);
11219 if (const_ok_for_arm (INTVAL (part))
11220 || const_ok_for_arm (~INTVAL (part)))
11221 return true;
11223 part = gen_lowpart (SImode, val);
11225 gcc_assert (GET_CODE (part) == CONST_INT);
11227 if (const_ok_for_arm (INTVAL (part))
11228 || const_ok_for_arm (~INTVAL (part)))
11229 return true;
11231 return false;
11234 /* Scan INSN and note any of its operands that need fixing.
11235 If DO_PUSHES is false we do not actually push any of the fixups
11236 needed. The function returns TRUE if any fixups were needed/pushed.
11237 This is used by arm_memory_load_p() which needs to know about loads
11238 of constants that will be converted into minipool loads. */
11239 static bool
11240 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11242 bool result = false;
11243 int opno;
11245 extract_insn (insn);
11247 if (!constrain_operands (1))
11248 fatal_insn_not_found (insn);
11250 if (recog_data.n_alternatives == 0)
11251 return false;
11253 /* Fill in recog_op_alt with information about the constraints of
11254 this insn. */
11255 preprocess_constraints ();
11257 for (opno = 0; opno < recog_data.n_operands; opno++)
11259 /* Things we need to fix can only occur in inputs. */
11260 if (recog_data.operand_type[opno] != OP_IN)
11261 continue;
11263 /* If this alternative is a memory reference, then any mention
11264 of constants in this alternative is really to fool reload
11265 into allowing us to accept one there. We need to fix them up
11266 now so that we output the right code. */
11267 if (recog_op_alt[opno][which_alternative].memory_ok)
11269 rtx op = recog_data.operand[opno];
11271 if (CONSTANT_P (op))
11273 if (do_pushes)
11274 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11275 recog_data.operand_mode[opno], op);
11276 result = true;
11278 else if (GET_CODE (op) == MEM
11279 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11280 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11282 if (do_pushes)
11284 rtx cop = avoid_constant_pool_reference (op);
11286 /* Casting the address of something to a mode narrower
11287 than a word can cause avoid_constant_pool_reference()
11288 to return the pool reference itself. That's no good to
11289 us here. Lets just hope that we can use the
11290 constant pool value directly. */
11291 if (op == cop)
11292 cop = get_pool_constant (XEXP (op, 0));
11294 push_minipool_fix (insn, address,
11295 recog_data.operand_loc[opno],
11296 recog_data.operand_mode[opno], cop);
11299 result = true;
11304 return result;
11307 /* Gcc puts the pool in the wrong place for ARM, since we can only
11308 load addresses a limited distance around the pc. We do some
11309 special munging to move the constant pool values to the correct
11310 point in the code. */
11311 static void
11312 arm_reorg (void)
11314 rtx insn;
11315 HOST_WIDE_INT address = 0;
11316 Mfix * fix;
11318 minipool_fix_head = minipool_fix_tail = NULL;
11320 /* The first insn must always be a note, or the code below won't
11321 scan it properly. */
11322 insn = get_insns ();
11323 gcc_assert (GET_CODE (insn) == NOTE);
11324 minipool_pad = 0;
11326 /* Scan all the insns and record the operands that will need fixing. */
11327 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11329 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11330 && (arm_cirrus_insn_p (insn)
11331 || GET_CODE (insn) == JUMP_INSN
11332 || arm_memory_load_p (insn)))
11333 cirrus_reorg (insn);
11335 if (GET_CODE (insn) == BARRIER)
11336 push_minipool_barrier (insn, address);
11337 else if (INSN_P (insn))
11339 rtx table;
11341 note_invalid_constants (insn, address, true);
11342 address += get_attr_length (insn);
11344 /* If the insn is a vector jump, add the size of the table
11345 and skip the table. */
11346 if ((table = is_jump_table (insn)) != NULL)
11348 address += get_jump_table_size (table);
11349 insn = table;
11354 fix = minipool_fix_head;
11356 /* Now scan the fixups and perform the required changes. */
11357 while (fix)
11359 Mfix * ftmp;
11360 Mfix * fdel;
11361 Mfix * last_added_fix;
11362 Mfix * last_barrier = NULL;
11363 Mfix * this_fix;
11365 /* Skip any further barriers before the next fix. */
11366 while (fix && GET_CODE (fix->insn) == BARRIER)
11367 fix = fix->next;
11369 /* No more fixes. */
11370 if (fix == NULL)
11371 break;
11373 last_added_fix = NULL;
11375 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11377 if (GET_CODE (ftmp->insn) == BARRIER)
11379 if (ftmp->address >= minipool_vector_head->max_address)
11380 break;
11382 last_barrier = ftmp;
11384 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11385 break;
11387 last_added_fix = ftmp; /* Keep track of the last fix added. */
11390 /* If we found a barrier, drop back to that; any fixes that we
11391 could have reached but come after the barrier will now go in
11392 the next mini-pool. */
11393 if (last_barrier != NULL)
11395 /* Reduce the refcount for those fixes that won't go into this
11396 pool after all. */
11397 for (fdel = last_barrier->next;
11398 fdel && fdel != ftmp;
11399 fdel = fdel->next)
11401 fdel->minipool->refcount--;
11402 fdel->minipool = NULL;
11405 ftmp = last_barrier;
11407 else
11409 /* ftmp is first fix that we can't fit into this pool and
11410 there no natural barriers that we could use. Insert a
11411 new barrier in the code somewhere between the previous
11412 fix and this one, and arrange to jump around it. */
11413 HOST_WIDE_INT max_address;
11415 /* The last item on the list of fixes must be a barrier, so
11416 we can never run off the end of the list of fixes without
11417 last_barrier being set. */
11418 gcc_assert (ftmp);
11420 max_address = minipool_vector_head->max_address;
11421 /* Check that there isn't another fix that is in range that
11422 we couldn't fit into this pool because the pool was
11423 already too large: we need to put the pool before such an
11424 instruction. The pool itself may come just after the
11425 fix because create_fix_barrier also allows space for a
11426 jump instruction. */
11427 if (ftmp->address < max_address)
11428 max_address = ftmp->address + 1;
11430 last_barrier = create_fix_barrier (last_added_fix, max_address);
11433 assign_minipool_offsets (last_barrier);
11435 while (ftmp)
11437 if (GET_CODE (ftmp->insn) != BARRIER
11438 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11439 == NULL))
11440 break;
11442 ftmp = ftmp->next;
11445 /* Scan over the fixes we have identified for this pool, fixing them
11446 up and adding the constants to the pool itself. */
11447 for (this_fix = fix; this_fix && ftmp != this_fix;
11448 this_fix = this_fix->next)
11449 if (GET_CODE (this_fix->insn) != BARRIER)
11451 rtx addr
11452 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11453 minipool_vector_label),
11454 this_fix->minipool->offset);
11455 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11458 dump_minipool (last_barrier->insn);
11459 fix = ftmp;
11462 /* From now on we must synthesize any constants that we can't handle
11463 directly. This can happen if the RTL gets split during final
11464 instruction generation. */
11465 after_arm_reorg = 1;
11467 /* Free the minipool memory. */
11468 obstack_free (&minipool_obstack, minipool_startobj);
11471 /* Routines to output assembly language. */
11473 /* If the rtx is the correct value then return the string of the number.
11474 In this way we can ensure that valid double constants are generated even
11475 when cross compiling. */
11476 const char *
11477 fp_immediate_constant (rtx x)
11479 REAL_VALUE_TYPE r;
11480 int i;
11482 if (!fp_consts_inited)
11483 init_fp_table ();
11485 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11486 for (i = 0; i < 8; i++)
11487 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11488 return strings_fp[i];
11490 gcc_unreachable ();
11493 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11494 static const char *
11495 fp_const_from_val (REAL_VALUE_TYPE *r)
11497 int i;
11499 if (!fp_consts_inited)
11500 init_fp_table ();
11502 for (i = 0; i < 8; i++)
11503 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11504 return strings_fp[i];
11506 gcc_unreachable ();
11509 /* Output the operands of a LDM/STM instruction to STREAM.
11510 MASK is the ARM register set mask of which only bits 0-15 are important.
11511 REG is the base register, either the frame pointer or the stack pointer,
11512 INSTR is the possibly suffixed load or store instruction.
11513 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11515 static void
11516 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11517 unsigned long mask, int rfe)
11519 unsigned i;
11520 bool not_first = FALSE;
11522 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11523 fputc ('\t', stream);
11524 asm_fprintf (stream, instr, reg);
11525 fputc ('{', stream);
11527 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11528 if (mask & (1 << i))
11530 if (not_first)
11531 fprintf (stream, ", ");
11533 asm_fprintf (stream, "%r", i);
11534 not_first = TRUE;
11537 if (rfe)
11538 fprintf (stream, "}^\n");
11539 else
11540 fprintf (stream, "}\n");
11544 /* Output a FLDMD instruction to STREAM.
11545 BASE if the register containing the address.
11546 REG and COUNT specify the register range.
11547 Extra registers may be added to avoid hardware bugs.
11549 We output FLDMD even for ARMv5 VFP implementations. Although
11550 FLDMD is technically not supported until ARMv6, it is believed
11551 that all VFP implementations support its use in this context. */
11553 static void
11554 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11556 int i;
11558 /* Workaround ARM10 VFPr1 bug. */
11559 if (count == 2 && !arm_arch6)
11561 if (reg == 15)
11562 reg--;
11563 count++;
11566 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11567 load into multiple parts if we have to handle more than 16 registers. */
11568 if (count > 16)
11570 vfp_output_fldmd (stream, base, reg, 16);
11571 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11572 return;
11575 fputc ('\t', stream);
11576 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11578 for (i = reg; i < reg + count; i++)
11580 if (i > reg)
11581 fputs (", ", stream);
11582 asm_fprintf (stream, "d%d", i);
11584 fputs ("}\n", stream);
11589 /* Output the assembly for a store multiple. */
11591 const char *
11592 vfp_output_fstmd (rtx * operands)
11594 char pattern[100];
11595 int p;
11596 int base;
11597 int i;
11599 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11600 p = strlen (pattern);
11602 gcc_assert (GET_CODE (operands[1]) == REG);
11604 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11605 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11607 p += sprintf (&pattern[p], ", d%d", base + i);
11609 strcpy (&pattern[p], "}");
11611 output_asm_insn (pattern, operands);
11612 return "";
11616 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11617 number of bytes pushed. */
11619 static int
11620 vfp_emit_fstmd (int base_reg, int count)
11622 rtx par;
11623 rtx dwarf;
11624 rtx tmp, reg;
11625 int i;
11627 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11628 register pairs are stored by a store multiple insn. We avoid this
11629 by pushing an extra pair. */
11630 if (count == 2 && !arm_arch6)
11632 if (base_reg == LAST_VFP_REGNUM - 3)
11633 base_reg -= 2;
11634 count++;
11637 /* FSTMD may not store more than 16 doubleword registers at once. Split
11638 larger stores into multiple parts (up to a maximum of two, in
11639 practice). */
11640 if (count > 16)
11642 int saved;
11643 /* NOTE: base_reg is an internal register number, so each D register
11644 counts as 2. */
11645 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11646 saved += vfp_emit_fstmd (base_reg, 16);
11647 return saved;
11650 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11651 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11653 reg = gen_rtx_REG (DFmode, base_reg);
11654 base_reg += 2;
11656 XVECEXP (par, 0, 0)
11657 = gen_rtx_SET (VOIDmode,
11658 gen_frame_mem (BLKmode,
11659 gen_rtx_PRE_DEC (BLKmode,
11660 stack_pointer_rtx)),
11661 gen_rtx_UNSPEC (BLKmode,
11662 gen_rtvec (1, reg),
11663 UNSPEC_PUSH_MULT));
11665 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11666 plus_constant (stack_pointer_rtx, -(count * 8)));
11667 RTX_FRAME_RELATED_P (tmp) = 1;
11668 XVECEXP (dwarf, 0, 0) = tmp;
11670 tmp = gen_rtx_SET (VOIDmode,
11671 gen_frame_mem (DFmode, stack_pointer_rtx),
11672 reg);
11673 RTX_FRAME_RELATED_P (tmp) = 1;
11674 XVECEXP (dwarf, 0, 1) = tmp;
11676 for (i = 1; i < count; i++)
11678 reg = gen_rtx_REG (DFmode, base_reg);
11679 base_reg += 2;
11680 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11682 tmp = gen_rtx_SET (VOIDmode,
11683 gen_frame_mem (DFmode,
11684 plus_constant (stack_pointer_rtx,
11685 i * 8)),
11686 reg);
11687 RTX_FRAME_RELATED_P (tmp) = 1;
11688 XVECEXP (dwarf, 0, i + 1) = tmp;
11691 par = emit_insn (par);
11692 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11693 RTX_FRAME_RELATED_P (par) = 1;
11695 return count * 8;
11698 /* Emit a call instruction with pattern PAT. ADDR is the address of
11699 the call target. */
11701 void
11702 arm_emit_call_insn (rtx pat, rtx addr)
11704 rtx insn;
11706 insn = emit_call_insn (pat);
11708 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11709 If the call might use such an entry, add a use of the PIC register
11710 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11711 if (TARGET_VXWORKS_RTP
11712 && flag_pic
11713 && GET_CODE (addr) == SYMBOL_REF
11714 && (SYMBOL_REF_DECL (addr)
11715 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11716 : !SYMBOL_REF_LOCAL_P (addr)))
11718 require_pic_register ();
11719 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11723 /* Output a 'call' insn. */
11724 const char *
11725 output_call (rtx *operands)
11727 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11729 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11730 if (REGNO (operands[0]) == LR_REGNUM)
11732 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11733 output_asm_insn ("mov%?\t%0, %|lr", operands);
11736 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11738 if (TARGET_INTERWORK || arm_arch4t)
11739 output_asm_insn ("bx%?\t%0", operands);
11740 else
11741 output_asm_insn ("mov%?\t%|pc, %0", operands);
11743 return "";
11746 /* Output a 'call' insn that is a reference in memory. This is
11747 disabled for ARMv5 and we prefer a blx instead because otherwise
11748 there's a significant performance overhead. */
11749 const char *
11750 output_call_mem (rtx *operands)
11752 gcc_assert (!arm_arch5);
11753 if (TARGET_INTERWORK)
11755 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11756 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11757 output_asm_insn ("bx%?\t%|ip", operands);
11759 else if (regno_use_in (LR_REGNUM, operands[0]))
11761 /* LR is used in the memory address. We load the address in the
11762 first instruction. It's safe to use IP as the target of the
11763 load since the call will kill it anyway. */
11764 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11765 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11766 if (arm_arch4t)
11767 output_asm_insn ("bx%?\t%|ip", operands);
11768 else
11769 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11771 else
11773 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11774 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11777 return "";
11781 /* Output a move from arm registers to an fpa registers.
11782 OPERANDS[0] is an fpa register.
11783 OPERANDS[1] is the first registers of an arm register pair. */
11784 const char *
11785 output_mov_long_double_fpa_from_arm (rtx *operands)
11787 int arm_reg0 = REGNO (operands[1]);
11788 rtx ops[3];
11790 gcc_assert (arm_reg0 != IP_REGNUM);
11792 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11793 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11794 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11796 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11797 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11799 return "";
11802 /* Output a move from an fpa register to arm registers.
11803 OPERANDS[0] is the first registers of an arm register pair.
11804 OPERANDS[1] is an fpa register. */
11805 const char *
11806 output_mov_long_double_arm_from_fpa (rtx *operands)
11808 int arm_reg0 = REGNO (operands[0]);
11809 rtx ops[3];
11811 gcc_assert (arm_reg0 != IP_REGNUM);
11813 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11814 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11815 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11817 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11818 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11819 return "";
11822 /* Output a move from arm registers to arm registers of a long double
11823 OPERANDS[0] is the destination.
11824 OPERANDS[1] is the source. */
11825 const char *
11826 output_mov_long_double_arm_from_arm (rtx *operands)
11828 /* We have to be careful here because the two might overlap. */
11829 int dest_start = REGNO (operands[0]);
11830 int src_start = REGNO (operands[1]);
11831 rtx ops[2];
11832 int i;
11834 if (dest_start < src_start)
11836 for (i = 0; i < 3; i++)
11838 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11839 ops[1] = gen_rtx_REG (SImode, src_start + i);
11840 output_asm_insn ("mov%?\t%0, %1", ops);
11843 else
11845 for (i = 2; i >= 0; i--)
11847 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11848 ops[1] = gen_rtx_REG (SImode, src_start + i);
11849 output_asm_insn ("mov%?\t%0, %1", ops);
11853 return "";
11856 void
11857 arm_emit_movpair (rtx dest, rtx src)
11859 /* If the src is an immediate, simplify it. */
11860 if (CONST_INT_P (src))
11862 HOST_WIDE_INT val = INTVAL (src);
11863 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
11864 if ((val >> 16) & 0x0000ffff)
11865 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
11866 GEN_INT (16)),
11867 GEN_INT ((val >> 16) & 0x0000ffff));
11868 return;
11870 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
11871 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
11874 /* Output a move from arm registers to an fpa registers.
11875 OPERANDS[0] is an fpa register.
11876 OPERANDS[1] is the first registers of an arm register pair. */
11877 const char *
11878 output_mov_double_fpa_from_arm (rtx *operands)
11880 int arm_reg0 = REGNO (operands[1]);
11881 rtx ops[2];
11883 gcc_assert (arm_reg0 != IP_REGNUM);
11885 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11886 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11887 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
11888 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
11889 return "";
11892 /* Output a move from an fpa register to arm registers.
11893 OPERANDS[0] is the first registers of an arm register pair.
11894 OPERANDS[1] is an fpa register. */
11895 const char *
11896 output_mov_double_arm_from_fpa (rtx *operands)
11898 int arm_reg0 = REGNO (operands[0]);
11899 rtx ops[2];
11901 gcc_assert (arm_reg0 != IP_REGNUM);
11903 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11904 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11905 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
11906 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
11907 return "";
11910 /* Output a move between double words.
11911 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
11912 or MEM<-REG and all MEMs must be offsettable addresses. */
11913 const char *
11914 output_move_double (rtx *operands)
11916 enum rtx_code code0 = GET_CODE (operands[0]);
11917 enum rtx_code code1 = GET_CODE (operands[1]);
11918 rtx otherops[3];
11920 if (code0 == REG)
11922 unsigned int reg0 = REGNO (operands[0]);
11924 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
11926 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
11928 switch (GET_CODE (XEXP (operands[1], 0)))
11930 case REG:
11931 if (TARGET_LDRD
11932 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
11933 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
11934 else
11935 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
11936 break;
11938 case PRE_INC:
11939 gcc_assert (TARGET_LDRD);
11940 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
11941 break;
11943 case PRE_DEC:
11944 if (TARGET_LDRD)
11945 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
11946 else
11947 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
11948 break;
11950 case POST_INC:
11951 if (TARGET_LDRD)
11952 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
11953 else
11954 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
11955 break;
11957 case POST_DEC:
11958 gcc_assert (TARGET_LDRD);
11959 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
11960 break;
11962 case PRE_MODIFY:
11963 case POST_MODIFY:
11964 /* Autoicrement addressing modes should never have overlapping
11965 base and destination registers, and overlapping index registers
11966 are already prohibited, so this doesn't need to worry about
11967 fix_cm3_ldrd. */
11968 otherops[0] = operands[0];
11969 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
11970 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
11972 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
11974 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
11976 /* Registers overlap so split out the increment. */
11977 output_asm_insn ("add%?\t%1, %1, %2", otherops);
11978 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
11980 else
11982 /* Use a single insn if we can.
11983 FIXME: IWMMXT allows offsets larger than ldrd can
11984 handle, fix these up with a pair of ldr. */
11985 if (TARGET_THUMB2
11986 || GET_CODE (otherops[2]) != CONST_INT
11987 || (INTVAL (otherops[2]) > -256
11988 && INTVAL (otherops[2]) < 256))
11989 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
11990 else
11992 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
11993 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11997 else
11999 /* Use a single insn if we can.
12000 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12001 fix these up with a pair of ldr. */
12002 if (TARGET_THUMB2
12003 || GET_CODE (otherops[2]) != CONST_INT
12004 || (INTVAL (otherops[2]) > -256
12005 && INTVAL (otherops[2]) < 256))
12006 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12007 else
12009 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12010 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12013 break;
12015 case LABEL_REF:
12016 case CONST:
12017 /* We might be able to use ldrd %0, %1 here. However the range is
12018 different to ldr/adr, and it is broken on some ARMv7-M
12019 implementations. */
12020 /* Use the second register of the pair to avoid problematic
12021 overlap. */
12022 otherops[1] = operands[1];
12023 output_asm_insn ("adr%?\t%0, %1", otherops);
12024 operands[1] = otherops[0];
12025 if (TARGET_LDRD)
12026 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12027 else
12028 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12029 break;
12031 /* ??? This needs checking for thumb2. */
12032 default:
12033 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12034 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12036 otherops[0] = operands[0];
12037 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12038 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12040 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12042 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12044 switch ((int) INTVAL (otherops[2]))
12046 case -8:
12047 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12048 return "";
12049 case -4:
12050 if (TARGET_THUMB2)
12051 break;
12052 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12053 return "";
12054 case 4:
12055 if (TARGET_THUMB2)
12056 break;
12057 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12058 return "";
12061 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12062 operands[1] = otherops[0];
12063 if (TARGET_LDRD
12064 && (GET_CODE (otherops[2]) == REG
12065 || TARGET_THUMB2
12066 || (GET_CODE (otherops[2]) == CONST_INT
12067 && INTVAL (otherops[2]) > -256
12068 && INTVAL (otherops[2]) < 256)))
12070 if (reg_overlap_mentioned_p (operands[0],
12071 otherops[2]))
12073 rtx tmp;
12074 /* Swap base and index registers over to
12075 avoid a conflict. */
12076 tmp = otherops[1];
12077 otherops[1] = otherops[2];
12078 otherops[2] = tmp;
12080 /* If both registers conflict, it will usually
12081 have been fixed by a splitter. */
12082 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12083 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12085 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12086 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12088 else
12090 otherops[0] = operands[0];
12091 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12093 return "";
12096 if (GET_CODE (otherops[2]) == CONST_INT)
12098 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12099 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12100 else
12101 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12103 else
12104 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12106 else
12107 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12109 if (TARGET_LDRD)
12110 return "ldr%(d%)\t%0, [%1]";
12112 return "ldm%(ia%)\t%1, %M0";
12114 else
12116 otherops[1] = adjust_address (operands[1], SImode, 4);
12117 /* Take care of overlapping base/data reg. */
12118 if (reg_mentioned_p (operands[0], operands[1]))
12120 output_asm_insn ("ldr%?\t%0, %1", otherops);
12121 output_asm_insn ("ldr%?\t%0, %1", operands);
12123 else
12125 output_asm_insn ("ldr%?\t%0, %1", operands);
12126 output_asm_insn ("ldr%?\t%0, %1", otherops);
12131 else
12133 /* Constraints should ensure this. */
12134 gcc_assert (code0 == MEM && code1 == REG);
12135 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12137 switch (GET_CODE (XEXP (operands[0], 0)))
12139 case REG:
12140 if (TARGET_LDRD)
12141 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12142 else
12143 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12144 break;
12146 case PRE_INC:
12147 gcc_assert (TARGET_LDRD);
12148 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12149 break;
12151 case PRE_DEC:
12152 if (TARGET_LDRD)
12153 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12154 else
12155 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12156 break;
12158 case POST_INC:
12159 if (TARGET_LDRD)
12160 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12161 else
12162 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12163 break;
12165 case POST_DEC:
12166 gcc_assert (TARGET_LDRD);
12167 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12168 break;
12170 case PRE_MODIFY:
12171 case POST_MODIFY:
12172 otherops[0] = operands[1];
12173 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12174 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12176 /* IWMMXT allows offsets larger than ldrd can handle,
12177 fix these up with a pair of ldr. */
12178 if (!TARGET_THUMB2
12179 && GET_CODE (otherops[2]) == CONST_INT
12180 && (INTVAL(otherops[2]) <= -256
12181 || INTVAL(otherops[2]) >= 256))
12183 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12185 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12186 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12188 else
12190 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12191 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12194 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12195 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12196 else
12197 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12198 break;
12200 case PLUS:
12201 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12202 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12204 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12206 case -8:
12207 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12208 return "";
12210 case -4:
12211 if (TARGET_THUMB2)
12212 break;
12213 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12214 return "";
12216 case 4:
12217 if (TARGET_THUMB2)
12218 break;
12219 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12220 return "";
12223 if (TARGET_LDRD
12224 && (GET_CODE (otherops[2]) == REG
12225 || TARGET_THUMB2
12226 || (GET_CODE (otherops[2]) == CONST_INT
12227 && INTVAL (otherops[2]) > -256
12228 && INTVAL (otherops[2]) < 256)))
12230 otherops[0] = operands[1];
12231 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12232 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12233 return "";
12235 /* Fall through */
12237 default:
12238 otherops[0] = adjust_address (operands[0], SImode, 4);
12239 otherops[1] = operands[1];
12240 output_asm_insn ("str%?\t%1, %0", operands);
12241 output_asm_insn ("str%?\t%H1, %0", otherops);
12245 return "";
12248 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12249 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12251 const char *
12252 output_move_quad (rtx *operands)
12254 if (REG_P (operands[0]))
12256 /* Load, or reg->reg move. */
12258 if (MEM_P (operands[1]))
12260 switch (GET_CODE (XEXP (operands[1], 0)))
12262 case REG:
12263 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12264 break;
12266 case LABEL_REF:
12267 case CONST:
12268 output_asm_insn ("adr%?\t%0, %1", operands);
12269 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12270 break;
12272 default:
12273 gcc_unreachable ();
12276 else
12278 rtx ops[2];
12279 int dest, src, i;
12281 gcc_assert (REG_P (operands[1]));
12283 dest = REGNO (operands[0]);
12284 src = REGNO (operands[1]);
12286 /* This seems pretty dumb, but hopefully GCC won't try to do it
12287 very often. */
12288 if (dest < src)
12289 for (i = 0; i < 4; i++)
12291 ops[0] = gen_rtx_REG (SImode, dest + i);
12292 ops[1] = gen_rtx_REG (SImode, src + i);
12293 output_asm_insn ("mov%?\t%0, %1", ops);
12295 else
12296 for (i = 3; i >= 0; i--)
12298 ops[0] = gen_rtx_REG (SImode, dest + i);
12299 ops[1] = gen_rtx_REG (SImode, src + i);
12300 output_asm_insn ("mov%?\t%0, %1", ops);
12304 else
12306 gcc_assert (MEM_P (operands[0]));
12307 gcc_assert (REG_P (operands[1]));
12308 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12310 switch (GET_CODE (XEXP (operands[0], 0)))
12312 case REG:
12313 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12314 break;
12316 default:
12317 gcc_unreachable ();
12321 return "";
12324 /* Output a VFP load or store instruction. */
12326 const char *
12327 output_move_vfp (rtx *operands)
12329 rtx reg, mem, addr, ops[2];
12330 int load = REG_P (operands[0]);
12331 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12332 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12333 const char *templ;
12334 char buff[50];
12335 enum machine_mode mode;
12337 reg = operands[!load];
12338 mem = operands[load];
12340 mode = GET_MODE (reg);
12342 gcc_assert (REG_P (reg));
12343 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12344 gcc_assert (mode == SFmode
12345 || mode == DFmode
12346 || mode == SImode
12347 || mode == DImode
12348 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12349 gcc_assert (MEM_P (mem));
12351 addr = XEXP (mem, 0);
12353 switch (GET_CODE (addr))
12355 case PRE_DEC:
12356 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12357 ops[0] = XEXP (addr, 0);
12358 ops[1] = reg;
12359 break;
12361 case POST_INC:
12362 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12363 ops[0] = XEXP (addr, 0);
12364 ops[1] = reg;
12365 break;
12367 default:
12368 templ = "f%s%c%%?\t%%%s0, %%1%s";
12369 ops[0] = reg;
12370 ops[1] = mem;
12371 break;
12374 sprintf (buff, templ,
12375 load ? "ld" : "st",
12376 dp ? 'd' : 's',
12377 dp ? "P" : "",
12378 integer_p ? "\t%@ int" : "");
12379 output_asm_insn (buff, ops);
12381 return "";
12384 /* Output a Neon quad-word load or store, or a load or store for
12385 larger structure modes.
12387 WARNING: The ordering of elements is weird in big-endian mode,
12388 because we use VSTM, as required by the EABI. GCC RTL defines
12389 element ordering based on in-memory order. This can be differ
12390 from the architectural ordering of elements within a NEON register.
12391 The intrinsics defined in arm_neon.h use the NEON register element
12392 ordering, not the GCC RTL element ordering.
12394 For example, the in-memory ordering of a big-endian a quadword
12395 vector with 16-bit elements when stored from register pair {d0,d1}
12396 will be (lowest address first, d0[N] is NEON register element N):
12398 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12400 When necessary, quadword registers (dN, dN+1) are moved to ARM
12401 registers from rN in the order:
12403 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12405 So that STM/LDM can be used on vectors in ARM registers, and the
12406 same memory layout will result as if VSTM/VLDM were used. */
12408 const char *
12409 output_move_neon (rtx *operands)
12411 rtx reg, mem, addr, ops[2];
12412 int regno, load = REG_P (operands[0]);
12413 const char *templ;
12414 char buff[50];
12415 enum machine_mode mode;
12417 reg = operands[!load];
12418 mem = operands[load];
12420 mode = GET_MODE (reg);
12422 gcc_assert (REG_P (reg));
12423 regno = REGNO (reg);
12424 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12425 || NEON_REGNO_OK_FOR_QUAD (regno));
12426 gcc_assert (VALID_NEON_DREG_MODE (mode)
12427 || VALID_NEON_QREG_MODE (mode)
12428 || VALID_NEON_STRUCT_MODE (mode));
12429 gcc_assert (MEM_P (mem));
12431 addr = XEXP (mem, 0);
12433 /* Strip off const from addresses like (const (plus (...))). */
12434 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12435 addr = XEXP (addr, 0);
12437 switch (GET_CODE (addr))
12439 case POST_INC:
12440 templ = "v%smia%%?\t%%0!, %%h1";
12441 ops[0] = XEXP (addr, 0);
12442 ops[1] = reg;
12443 break;
12445 case PRE_DEC:
12446 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12447 templ = "v%smdb%%?\t%%0!, %%h1";
12448 ops[0] = XEXP (addr, 0);
12449 ops[1] = reg;
12450 break;
12452 case POST_MODIFY:
12453 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12454 gcc_unreachable ();
12456 case LABEL_REF:
12457 case PLUS:
12459 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12460 int i;
12461 int overlap = -1;
12462 for (i = 0; i < nregs; i++)
12464 /* We're only using DImode here because it's a convenient size. */
12465 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12466 ops[1] = adjust_address (mem, DImode, 8 * i);
12467 if (reg_overlap_mentioned_p (ops[0], mem))
12469 gcc_assert (overlap == -1);
12470 overlap = i;
12472 else
12474 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12475 output_asm_insn (buff, ops);
12478 if (overlap != -1)
12480 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12481 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12482 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12483 output_asm_insn (buff, ops);
12486 return "";
12489 default:
12490 templ = "v%smia%%?\t%%m0, %%h1";
12491 ops[0] = mem;
12492 ops[1] = reg;
12495 sprintf (buff, templ, load ? "ld" : "st");
12496 output_asm_insn (buff, ops);
12498 return "";
12501 /* Output an ADD r, s, #n where n may be too big for one instruction.
12502 If adding zero to one register, output nothing. */
12503 const char *
12504 output_add_immediate (rtx *operands)
12506 HOST_WIDE_INT n = INTVAL (operands[2]);
12508 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12510 if (n < 0)
12511 output_multi_immediate (operands,
12512 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12513 -n);
12514 else
12515 output_multi_immediate (operands,
12516 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12520 return "";
12523 /* Output a multiple immediate operation.
12524 OPERANDS is the vector of operands referred to in the output patterns.
12525 INSTR1 is the output pattern to use for the first constant.
12526 INSTR2 is the output pattern to use for subsequent constants.
12527 IMMED_OP is the index of the constant slot in OPERANDS.
12528 N is the constant value. */
12529 static const char *
12530 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12531 int immed_op, HOST_WIDE_INT n)
12533 #if HOST_BITS_PER_WIDE_INT > 32
12534 n &= 0xffffffff;
12535 #endif
12537 if (n == 0)
12539 /* Quick and easy output. */
12540 operands[immed_op] = const0_rtx;
12541 output_asm_insn (instr1, operands);
12543 else
12545 int i;
12546 const char * instr = instr1;
12548 /* Note that n is never zero here (which would give no output). */
12549 for (i = 0; i < 32; i += 2)
12551 if (n & (3 << i))
12553 operands[immed_op] = GEN_INT (n & (255 << i));
12554 output_asm_insn (instr, operands);
12555 instr = instr2;
12556 i += 6;
12561 return "";
12564 /* Return the name of a shifter operation. */
12565 static const char *
12566 arm_shift_nmem(enum rtx_code code)
12568 switch (code)
12570 case ASHIFT:
12571 return ARM_LSL_NAME;
12573 case ASHIFTRT:
12574 return "asr";
12576 case LSHIFTRT:
12577 return "lsr";
12579 case ROTATERT:
12580 return "ror";
12582 default:
12583 abort();
12587 /* Return the appropriate ARM instruction for the operation code.
12588 The returned result should not be overwritten. OP is the rtx of the
12589 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12590 was shifted. */
12591 const char *
12592 arithmetic_instr (rtx op, int shift_first_arg)
12594 switch (GET_CODE (op))
12596 case PLUS:
12597 return "add";
12599 case MINUS:
12600 return shift_first_arg ? "rsb" : "sub";
12602 case IOR:
12603 return "orr";
12605 case XOR:
12606 return "eor";
12608 case AND:
12609 return "and";
12611 case ASHIFT:
12612 case ASHIFTRT:
12613 case LSHIFTRT:
12614 case ROTATERT:
12615 return arm_shift_nmem(GET_CODE(op));
12617 default:
12618 gcc_unreachable ();
12622 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12623 for the operation code. The returned result should not be overwritten.
12624 OP is the rtx code of the shift.
12625 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12626 shift. */
12627 static const char *
12628 shift_op (rtx op, HOST_WIDE_INT *amountp)
12630 const char * mnem;
12631 enum rtx_code code = GET_CODE (op);
12633 switch (GET_CODE (XEXP (op, 1)))
12635 case REG:
12636 case SUBREG:
12637 *amountp = -1;
12638 break;
12640 case CONST_INT:
12641 *amountp = INTVAL (XEXP (op, 1));
12642 break;
12644 default:
12645 gcc_unreachable ();
12648 switch (code)
12650 case ROTATE:
12651 gcc_assert (*amountp != -1);
12652 *amountp = 32 - *amountp;
12653 code = ROTATERT;
12655 /* Fall through. */
12657 case ASHIFT:
12658 case ASHIFTRT:
12659 case LSHIFTRT:
12660 case ROTATERT:
12661 mnem = arm_shift_nmem(code);
12662 break;
12664 case MULT:
12665 /* We never have to worry about the amount being other than a
12666 power of 2, since this case can never be reloaded from a reg. */
12667 gcc_assert (*amountp != -1);
12668 *amountp = int_log2 (*amountp);
12669 return ARM_LSL_NAME;
12671 default:
12672 gcc_unreachable ();
12675 if (*amountp != -1)
12677 /* This is not 100% correct, but follows from the desire to merge
12678 multiplication by a power of 2 with the recognizer for a
12679 shift. >=32 is not a valid shift for "lsl", so we must try and
12680 output a shift that produces the correct arithmetical result.
12681 Using lsr #32 is identical except for the fact that the carry bit
12682 is not set correctly if we set the flags; but we never use the
12683 carry bit from such an operation, so we can ignore that. */
12684 if (code == ROTATERT)
12685 /* Rotate is just modulo 32. */
12686 *amountp &= 31;
12687 else if (*amountp != (*amountp & 31))
12689 if (code == ASHIFT)
12690 mnem = "lsr";
12691 *amountp = 32;
12694 /* Shifts of 0 are no-ops. */
12695 if (*amountp == 0)
12696 return NULL;
12699 return mnem;
12702 /* Obtain the shift from the POWER of two. */
12704 static HOST_WIDE_INT
12705 int_log2 (HOST_WIDE_INT power)
12707 HOST_WIDE_INT shift = 0;
12709 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12711 gcc_assert (shift <= 31);
12712 shift++;
12715 return shift;
12718 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12719 because /bin/as is horribly restrictive. The judgement about
12720 whether or not each character is 'printable' (and can be output as
12721 is) or not (and must be printed with an octal escape) must be made
12722 with reference to the *host* character set -- the situation is
12723 similar to that discussed in the comments above pp_c_char in
12724 c-pretty-print.c. */
12726 #define MAX_ASCII_LEN 51
12728 void
12729 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12731 int i;
12732 int len_so_far = 0;
12734 fputs ("\t.ascii\t\"", stream);
12736 for (i = 0; i < len; i++)
12738 int c = p[i];
12740 if (len_so_far >= MAX_ASCII_LEN)
12742 fputs ("\"\n\t.ascii\t\"", stream);
12743 len_so_far = 0;
12746 if (ISPRINT (c))
12748 if (c == '\\' || c == '\"')
12750 putc ('\\', stream);
12751 len_so_far++;
12753 putc (c, stream);
12754 len_so_far++;
12756 else
12758 fprintf (stream, "\\%03o", c);
12759 len_so_far += 4;
12763 fputs ("\"\n", stream);
12766 /* Compute the register save mask for registers 0 through 12
12767 inclusive. This code is used by arm_compute_save_reg_mask. */
12769 static unsigned long
12770 arm_compute_save_reg0_reg12_mask (void)
12772 unsigned long func_type = arm_current_func_type ();
12773 unsigned long save_reg_mask = 0;
12774 unsigned int reg;
12776 if (IS_INTERRUPT (func_type))
12778 unsigned int max_reg;
12779 /* Interrupt functions must not corrupt any registers,
12780 even call clobbered ones. If this is a leaf function
12781 we can just examine the registers used by the RTL, but
12782 otherwise we have to assume that whatever function is
12783 called might clobber anything, and so we have to save
12784 all the call-clobbered registers as well. */
12785 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12786 /* FIQ handlers have registers r8 - r12 banked, so
12787 we only need to check r0 - r7, Normal ISRs only
12788 bank r14 and r15, so we must check up to r12.
12789 r13 is the stack pointer which is always preserved,
12790 so we do not need to consider it here. */
12791 max_reg = 7;
12792 else
12793 max_reg = 12;
12795 for (reg = 0; reg <= max_reg; reg++)
12796 if (df_regs_ever_live_p (reg)
12797 || (! current_function_is_leaf && call_used_regs[reg]))
12798 save_reg_mask |= (1 << reg);
12800 /* Also save the pic base register if necessary. */
12801 if (flag_pic
12802 && !TARGET_SINGLE_PIC_BASE
12803 && arm_pic_register != INVALID_REGNUM
12804 && crtl->uses_pic_offset_table)
12805 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12807 else if (IS_VOLATILE(func_type))
12809 /* For noreturn functions we historically omitted register saves
12810 altogether. However this really messes up debugging. As a
12811 compromise save just the frame pointers. Combined with the link
12812 register saved elsewhere this should be sufficient to get
12813 a backtrace. */
12814 if (frame_pointer_needed)
12815 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12816 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
12817 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12818 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
12819 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
12821 else
12823 /* In the normal case we only need to save those registers
12824 which are call saved and which are used by this function. */
12825 for (reg = 0; reg <= 11; reg++)
12826 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12827 save_reg_mask |= (1 << reg);
12829 /* Handle the frame pointer as a special case. */
12830 if (frame_pointer_needed)
12831 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12833 /* If we aren't loading the PIC register,
12834 don't stack it even though it may be live. */
12835 if (flag_pic
12836 && !TARGET_SINGLE_PIC_BASE
12837 && arm_pic_register != INVALID_REGNUM
12838 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
12839 || crtl->uses_pic_offset_table))
12840 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12842 /* The prologue will copy SP into R0, so save it. */
12843 if (IS_STACKALIGN (func_type))
12844 save_reg_mask |= 1;
12847 /* Save registers so the exception handler can modify them. */
12848 if (crtl->calls_eh_return)
12850 unsigned int i;
12852 for (i = 0; ; i++)
12854 reg = EH_RETURN_DATA_REGNO (i);
12855 if (reg == INVALID_REGNUM)
12856 break;
12857 save_reg_mask |= 1 << reg;
12861 return save_reg_mask;
12865 /* Compute the number of bytes used to store the static chain register on the
12866 stack, above the stack frame. We need to know this accurately to get the
12867 alignment of the rest of the stack frame correct. */
12869 static int arm_compute_static_chain_stack_bytes (void)
12871 unsigned long func_type = arm_current_func_type ();
12872 int static_chain_stack_bytes = 0;
12874 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
12875 IS_NESTED (func_type) &&
12876 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
12877 static_chain_stack_bytes = 4;
12879 return static_chain_stack_bytes;
12883 /* Compute a bit mask of which registers need to be
12884 saved on the stack for the current function.
12885 This is used by arm_get_frame_offsets, which may add extra registers. */
12887 static unsigned long
12888 arm_compute_save_reg_mask (void)
12890 unsigned int save_reg_mask = 0;
12891 unsigned long func_type = arm_current_func_type ();
12892 unsigned int reg;
12894 if (IS_NAKED (func_type))
12895 /* This should never really happen. */
12896 return 0;
12898 /* If we are creating a stack frame, then we must save the frame pointer,
12899 IP (which will hold the old stack pointer), LR and the PC. */
12900 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12901 save_reg_mask |=
12902 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
12903 | (1 << IP_REGNUM)
12904 | (1 << LR_REGNUM)
12905 | (1 << PC_REGNUM);
12907 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
12909 /* Decide if we need to save the link register.
12910 Interrupt routines have their own banked link register,
12911 so they never need to save it.
12912 Otherwise if we do not use the link register we do not need to save
12913 it. If we are pushing other registers onto the stack however, we
12914 can save an instruction in the epilogue by pushing the link register
12915 now and then popping it back into the PC. This incurs extra memory
12916 accesses though, so we only do it when optimizing for size, and only
12917 if we know that we will not need a fancy return sequence. */
12918 if (df_regs_ever_live_p (LR_REGNUM)
12919 || (save_reg_mask
12920 && optimize_size
12921 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12922 && !crtl->calls_eh_return))
12923 save_reg_mask |= 1 << LR_REGNUM;
12925 if (cfun->machine->lr_save_eliminated)
12926 save_reg_mask &= ~ (1 << LR_REGNUM);
12928 if (TARGET_REALLY_IWMMXT
12929 && ((bit_count (save_reg_mask)
12930 + ARM_NUM_INTS (crtl->args.pretend_args_size +
12931 arm_compute_static_chain_stack_bytes())
12932 ) % 2) != 0)
12934 /* The total number of registers that are going to be pushed
12935 onto the stack is odd. We need to ensure that the stack
12936 is 64-bit aligned before we start to save iWMMXt registers,
12937 and also before we start to create locals. (A local variable
12938 might be a double or long long which we will load/store using
12939 an iWMMXt instruction). Therefore we need to push another
12940 ARM register, so that the stack will be 64-bit aligned. We
12941 try to avoid using the arg registers (r0 -r3) as they might be
12942 used to pass values in a tail call. */
12943 for (reg = 4; reg <= 12; reg++)
12944 if ((save_reg_mask & (1 << reg)) == 0)
12945 break;
12947 if (reg <= 12)
12948 save_reg_mask |= (1 << reg);
12949 else
12951 cfun->machine->sibcall_blocked = 1;
12952 save_reg_mask |= (1 << 3);
12956 /* We may need to push an additional register for use initializing the
12957 PIC base register. */
12958 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
12959 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
12961 reg = thumb_find_work_register (1 << 4);
12962 if (!call_used_regs[reg])
12963 save_reg_mask |= (1 << reg);
12966 return save_reg_mask;
12970 /* Compute a bit mask of which registers need to be
12971 saved on the stack for the current function. */
12972 static unsigned long
12973 thumb1_compute_save_reg_mask (void)
12975 unsigned long mask;
12976 unsigned reg;
12978 mask = 0;
12979 for (reg = 0; reg < 12; reg ++)
12980 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12981 mask |= 1 << reg;
12983 if (flag_pic
12984 && !TARGET_SINGLE_PIC_BASE
12985 && arm_pic_register != INVALID_REGNUM
12986 && crtl->uses_pic_offset_table)
12987 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12989 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
12990 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
12991 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12993 /* LR will also be pushed if any lo regs are pushed. */
12994 if (mask & 0xff || thumb_force_lr_save ())
12995 mask |= (1 << LR_REGNUM);
12997 /* Make sure we have a low work register if we need one.
12998 We will need one if we are going to push a high register,
12999 but we are not currently intending to push a low register. */
13000 if ((mask & 0xff) == 0
13001 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13003 /* Use thumb_find_work_register to choose which register
13004 we will use. If the register is live then we will
13005 have to push it. Use LAST_LO_REGNUM as our fallback
13006 choice for the register to select. */
13007 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13008 /* Make sure the register returned by thumb_find_work_register is
13009 not part of the return value. */
13010 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13011 reg = LAST_LO_REGNUM;
13013 if (! call_used_regs[reg])
13014 mask |= 1 << reg;
13017 /* The 504 below is 8 bytes less than 512 because there are two possible
13018 alignment words. We can't tell here if they will be present or not so we
13019 have to play it safe and assume that they are. */
13020 if ((CALLER_INTERWORKING_SLOT_SIZE +
13021 ROUND_UP_WORD (get_frame_size ()) +
13022 crtl->outgoing_args_size) >= 504)
13024 /* This is the same as the code in thumb1_expand_prologue() which
13025 determines which register to use for stack decrement. */
13026 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13027 if (mask & (1 << reg))
13028 break;
13030 if (reg > LAST_LO_REGNUM)
13032 /* Make sure we have a register available for stack decrement. */
13033 mask |= 1 << LAST_LO_REGNUM;
13037 return mask;
13041 /* Return the number of bytes required to save VFP registers. */
13042 static int
13043 arm_get_vfp_saved_size (void)
13045 unsigned int regno;
13046 int count;
13047 int saved;
13049 saved = 0;
13050 /* Space for saved VFP registers. */
13051 if (TARGET_HARD_FLOAT && TARGET_VFP)
13053 count = 0;
13054 for (regno = FIRST_VFP_REGNUM;
13055 regno < LAST_VFP_REGNUM;
13056 regno += 2)
13058 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13059 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13061 if (count > 0)
13063 /* Workaround ARM10 VFPr1 bug. */
13064 if (count == 2 && !arm_arch6)
13065 count++;
13066 saved += count * 8;
13068 count = 0;
13070 else
13071 count++;
13073 if (count > 0)
13075 if (count == 2 && !arm_arch6)
13076 count++;
13077 saved += count * 8;
13080 return saved;
13084 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13085 everything bar the final return instruction. */
13086 const char *
13087 output_return_instruction (rtx operand, int really_return, int reverse)
13089 char conditional[10];
13090 char instr[100];
13091 unsigned reg;
13092 unsigned long live_regs_mask;
13093 unsigned long func_type;
13094 arm_stack_offsets *offsets;
13096 func_type = arm_current_func_type ();
13098 if (IS_NAKED (func_type))
13099 return "";
13101 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13103 /* If this function was declared non-returning, and we have
13104 found a tail call, then we have to trust that the called
13105 function won't return. */
13106 if (really_return)
13108 rtx ops[2];
13110 /* Otherwise, trap an attempted return by aborting. */
13111 ops[0] = operand;
13112 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13113 : "abort");
13114 assemble_external_libcall (ops[1]);
13115 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13118 return "";
13121 gcc_assert (!cfun->calls_alloca || really_return);
13123 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13125 cfun->machine->return_used_this_function = 1;
13127 offsets = arm_get_frame_offsets ();
13128 live_regs_mask = offsets->saved_regs_mask;
13130 if (live_regs_mask)
13132 const char * return_reg;
13134 /* If we do not have any special requirements for function exit
13135 (e.g. interworking) then we can load the return address
13136 directly into the PC. Otherwise we must load it into LR. */
13137 if (really_return
13138 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13139 return_reg = reg_names[PC_REGNUM];
13140 else
13141 return_reg = reg_names[LR_REGNUM];
13143 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13145 /* There are three possible reasons for the IP register
13146 being saved. 1) a stack frame was created, in which case
13147 IP contains the old stack pointer, or 2) an ISR routine
13148 corrupted it, or 3) it was saved to align the stack on
13149 iWMMXt. In case 1, restore IP into SP, otherwise just
13150 restore IP. */
13151 if (frame_pointer_needed)
13153 live_regs_mask &= ~ (1 << IP_REGNUM);
13154 live_regs_mask |= (1 << SP_REGNUM);
13156 else
13157 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13160 /* On some ARM architectures it is faster to use LDR rather than
13161 LDM to load a single register. On other architectures, the
13162 cost is the same. In 26 bit mode, or for exception handlers,
13163 we have to use LDM to load the PC so that the CPSR is also
13164 restored. */
13165 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13166 if (live_regs_mask == (1U << reg))
13167 break;
13169 if (reg <= LAST_ARM_REGNUM
13170 && (reg != LR_REGNUM
13171 || ! really_return
13172 || ! IS_INTERRUPT (func_type)))
13174 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13175 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13177 else
13179 char *p;
13180 int first = 1;
13182 /* Generate the load multiple instruction to restore the
13183 registers. Note we can get here, even if
13184 frame_pointer_needed is true, but only if sp already
13185 points to the base of the saved core registers. */
13186 if (live_regs_mask & (1 << SP_REGNUM))
13188 unsigned HOST_WIDE_INT stack_adjust;
13190 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13191 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13193 if (stack_adjust && arm_arch5 && TARGET_ARM)
13194 if (TARGET_UNIFIED_ASM)
13195 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13196 else
13197 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13198 else
13200 /* If we can't use ldmib (SA110 bug),
13201 then try to pop r3 instead. */
13202 if (stack_adjust)
13203 live_regs_mask |= 1 << 3;
13205 if (TARGET_UNIFIED_ASM)
13206 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13207 else
13208 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13211 else
13212 if (TARGET_UNIFIED_ASM)
13213 sprintf (instr, "pop%s\t{", conditional);
13214 else
13215 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13217 p = instr + strlen (instr);
13219 for (reg = 0; reg <= SP_REGNUM; reg++)
13220 if (live_regs_mask & (1 << reg))
13222 int l = strlen (reg_names[reg]);
13224 if (first)
13225 first = 0;
13226 else
13228 memcpy (p, ", ", 2);
13229 p += 2;
13232 memcpy (p, "%|", 2);
13233 memcpy (p + 2, reg_names[reg], l);
13234 p += l + 2;
13237 if (live_regs_mask & (1 << LR_REGNUM))
13239 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13240 /* If returning from an interrupt, restore the CPSR. */
13241 if (IS_INTERRUPT (func_type))
13242 strcat (p, "^");
13244 else
13245 strcpy (p, "}");
13248 output_asm_insn (instr, & operand);
13250 /* See if we need to generate an extra instruction to
13251 perform the actual function return. */
13252 if (really_return
13253 && func_type != ARM_FT_INTERWORKED
13254 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13256 /* The return has already been handled
13257 by loading the LR into the PC. */
13258 really_return = 0;
13262 if (really_return)
13264 switch ((int) ARM_FUNC_TYPE (func_type))
13266 case ARM_FT_ISR:
13267 case ARM_FT_FIQ:
13268 /* ??? This is wrong for unified assembly syntax. */
13269 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13270 break;
13272 case ARM_FT_INTERWORKED:
13273 sprintf (instr, "bx%s\t%%|lr", conditional);
13274 break;
13276 case ARM_FT_EXCEPTION:
13277 /* ??? This is wrong for unified assembly syntax. */
13278 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13279 break;
13281 default:
13282 /* Use bx if it's available. */
13283 if (arm_arch5 || arm_arch4t)
13284 sprintf (instr, "bx%s\t%%|lr", conditional);
13285 else
13286 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13287 break;
13290 output_asm_insn (instr, & operand);
13293 return "";
13296 /* Write the function name into the code section, directly preceding
13297 the function prologue.
13299 Code will be output similar to this:
13301 .ascii "arm_poke_function_name", 0
13302 .align
13304 .word 0xff000000 + (t1 - t0)
13305 arm_poke_function_name
13306 mov ip, sp
13307 stmfd sp!, {fp, ip, lr, pc}
13308 sub fp, ip, #4
13310 When performing a stack backtrace, code can inspect the value
13311 of 'pc' stored at 'fp' + 0. If the trace function then looks
13312 at location pc - 12 and the top 8 bits are set, then we know
13313 that there is a function name embedded immediately preceding this
13314 location and has length ((pc[-3]) & 0xff000000).
13316 We assume that pc is declared as a pointer to an unsigned long.
13318 It is of no benefit to output the function name if we are assembling
13319 a leaf function. These function types will not contain a stack
13320 backtrace structure, therefore it is not possible to determine the
13321 function name. */
13322 void
13323 arm_poke_function_name (FILE *stream, const char *name)
13325 unsigned long alignlength;
13326 unsigned long length;
13327 rtx x;
13329 length = strlen (name) + 1;
13330 alignlength = ROUND_UP_WORD (length);
13332 ASM_OUTPUT_ASCII (stream, name, length);
13333 ASM_OUTPUT_ALIGN (stream, 2);
13334 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13335 assemble_aligned_integer (UNITS_PER_WORD, x);
13338 /* Place some comments into the assembler stream
13339 describing the current function. */
13340 static void
13341 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13343 unsigned long func_type;
13345 if (TARGET_THUMB1)
13347 thumb1_output_function_prologue (f, frame_size);
13348 return;
13351 /* Sanity check. */
13352 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13354 func_type = arm_current_func_type ();
13356 switch ((int) ARM_FUNC_TYPE (func_type))
13358 default:
13359 case ARM_FT_NORMAL:
13360 break;
13361 case ARM_FT_INTERWORKED:
13362 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13363 break;
13364 case ARM_FT_ISR:
13365 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13366 break;
13367 case ARM_FT_FIQ:
13368 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13369 break;
13370 case ARM_FT_EXCEPTION:
13371 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13372 break;
13375 if (IS_NAKED (func_type))
13376 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13378 if (IS_VOLATILE (func_type))
13379 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13381 if (IS_NESTED (func_type))
13382 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13383 if (IS_STACKALIGN (func_type))
13384 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13386 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13387 crtl->args.size,
13388 crtl->args.pretend_args_size, frame_size);
13390 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13391 frame_pointer_needed,
13392 cfun->machine->uses_anonymous_args);
13394 if (cfun->machine->lr_save_eliminated)
13395 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13397 if (crtl->calls_eh_return)
13398 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13402 const char *
13403 arm_output_epilogue (rtx sibling)
13405 int reg;
13406 unsigned long saved_regs_mask;
13407 unsigned long func_type;
13408 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13409 frame that is $fp + 4 for a non-variadic function. */
13410 int floats_offset = 0;
13411 rtx operands[3];
13412 FILE * f = asm_out_file;
13413 unsigned int lrm_count = 0;
13414 int really_return = (sibling == NULL);
13415 int start_reg;
13416 arm_stack_offsets *offsets;
13418 /* If we have already generated the return instruction
13419 then it is futile to generate anything else. */
13420 if (use_return_insn (FALSE, sibling) &&
13421 (cfun->machine->return_used_this_function != 0))
13422 return "";
13424 func_type = arm_current_func_type ();
13426 if (IS_NAKED (func_type))
13427 /* Naked functions don't have epilogues. */
13428 return "";
13430 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13432 rtx op;
13434 /* A volatile function should never return. Call abort. */
13435 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13436 assemble_external_libcall (op);
13437 output_asm_insn ("bl\t%a0", &op);
13439 return "";
13442 /* If we are throwing an exception, then we really must be doing a
13443 return, so we can't tail-call. */
13444 gcc_assert (!crtl->calls_eh_return || really_return);
13446 offsets = arm_get_frame_offsets ();
13447 saved_regs_mask = offsets->saved_regs_mask;
13449 if (TARGET_IWMMXT)
13450 lrm_count = bit_count (saved_regs_mask);
13452 floats_offset = offsets->saved_args;
13453 /* Compute how far away the floats will be. */
13454 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13455 if (saved_regs_mask & (1 << reg))
13456 floats_offset += 4;
13458 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13460 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13461 int vfp_offset = offsets->frame;
13463 if (TARGET_FPA_EMU2)
13465 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13466 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13468 floats_offset += 12;
13469 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13470 reg, FP_REGNUM, floats_offset - vfp_offset);
13473 else
13475 start_reg = LAST_FPA_REGNUM;
13477 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13479 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13481 floats_offset += 12;
13483 /* We can't unstack more than four registers at once. */
13484 if (start_reg - reg == 3)
13486 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13487 reg, FP_REGNUM, floats_offset - vfp_offset);
13488 start_reg = reg - 1;
13491 else
13493 if (reg != start_reg)
13494 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13495 reg + 1, start_reg - reg,
13496 FP_REGNUM, floats_offset - vfp_offset);
13497 start_reg = reg - 1;
13501 /* Just in case the last register checked also needs unstacking. */
13502 if (reg != start_reg)
13503 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13504 reg + 1, start_reg - reg,
13505 FP_REGNUM, floats_offset - vfp_offset);
13508 if (TARGET_HARD_FLOAT && TARGET_VFP)
13510 int saved_size;
13512 /* The fldmd insns do not have base+offset addressing
13513 modes, so we use IP to hold the address. */
13514 saved_size = arm_get_vfp_saved_size ();
13516 if (saved_size > 0)
13518 floats_offset += saved_size;
13519 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13520 FP_REGNUM, floats_offset - vfp_offset);
13522 start_reg = FIRST_VFP_REGNUM;
13523 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13525 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13526 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13528 if (start_reg != reg)
13529 vfp_output_fldmd (f, IP_REGNUM,
13530 (start_reg - FIRST_VFP_REGNUM) / 2,
13531 (reg - start_reg) / 2);
13532 start_reg = reg + 2;
13535 if (start_reg != reg)
13536 vfp_output_fldmd (f, IP_REGNUM,
13537 (start_reg - FIRST_VFP_REGNUM) / 2,
13538 (reg - start_reg) / 2);
13541 if (TARGET_IWMMXT)
13543 /* The frame pointer is guaranteed to be non-double-word aligned.
13544 This is because it is set to (old_stack_pointer - 4) and the
13545 old_stack_pointer was double word aligned. Thus the offset to
13546 the iWMMXt registers to be loaded must also be non-double-word
13547 sized, so that the resultant address *is* double-word aligned.
13548 We can ignore floats_offset since that was already included in
13549 the live_regs_mask. */
13550 lrm_count += (lrm_count % 2 ? 2 : 1);
13552 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13553 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13555 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13556 reg, FP_REGNUM, lrm_count * 4);
13557 lrm_count += 2;
13561 /* saved_regs_mask should contain the IP, which at the time of stack
13562 frame generation actually contains the old stack pointer. So a
13563 quick way to unwind the stack is just pop the IP register directly
13564 into the stack pointer. */
13565 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13566 saved_regs_mask &= ~ (1 << IP_REGNUM);
13567 saved_regs_mask |= (1 << SP_REGNUM);
13569 /* There are two registers left in saved_regs_mask - LR and PC. We
13570 only need to restore the LR register (the return address), but to
13571 save time we can load it directly into the PC, unless we need a
13572 special function exit sequence, or we are not really returning. */
13573 if (really_return
13574 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13575 && !crtl->calls_eh_return)
13576 /* Delete the LR from the register mask, so that the LR on
13577 the stack is loaded into the PC in the register mask. */
13578 saved_regs_mask &= ~ (1 << LR_REGNUM);
13579 else
13580 saved_regs_mask &= ~ (1 << PC_REGNUM);
13582 /* We must use SP as the base register, because SP is one of the
13583 registers being restored. If an interrupt or page fault
13584 happens in the ldm instruction, the SP might or might not
13585 have been restored. That would be bad, as then SP will no
13586 longer indicate the safe area of stack, and we can get stack
13587 corruption. Using SP as the base register means that it will
13588 be reset correctly to the original value, should an interrupt
13589 occur. If the stack pointer already points at the right
13590 place, then omit the subtraction. */
13591 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13592 || cfun->calls_alloca)
13593 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13594 4 * bit_count (saved_regs_mask));
13595 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13597 if (IS_INTERRUPT (func_type))
13598 /* Interrupt handlers will have pushed the
13599 IP onto the stack, so restore it now. */
13600 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13602 else
13604 /* This branch is executed for ARM mode (non-apcs frames) and
13605 Thumb-2 mode. Frame layout is essentially the same for those
13606 cases, except that in ARM mode frame pointer points to the
13607 first saved register, while in Thumb-2 mode the frame pointer points
13608 to the last saved register.
13610 It is possible to make frame pointer point to last saved
13611 register in both cases, and remove some conditionals below.
13612 That means that fp setup in prologue would be just "mov fp, sp"
13613 and sp restore in epilogue would be just "mov sp, fp", whereas
13614 now we have to use add/sub in those cases. However, the value
13615 of that would be marginal, as both mov and add/sub are 32-bit
13616 in ARM mode, and it would require extra conditionals
13617 in arm_expand_prologue to distingish ARM-apcs-frame case
13618 (where frame pointer is required to point at first register)
13619 and ARM-non-apcs-frame. Therefore, such change is postponed
13620 until real need arise. */
13621 unsigned HOST_WIDE_INT amount;
13622 int rfe;
13623 /* Restore stack pointer if necessary. */
13624 if (TARGET_ARM && frame_pointer_needed)
13626 operands[0] = stack_pointer_rtx;
13627 operands[1] = hard_frame_pointer_rtx;
13629 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13630 output_add_immediate (operands);
13632 else
13634 if (frame_pointer_needed)
13636 /* For Thumb-2 restore sp from the frame pointer.
13637 Operand restrictions mean we have to incrememnt FP, then copy
13638 to SP. */
13639 amount = offsets->locals_base - offsets->saved_regs;
13640 operands[0] = hard_frame_pointer_rtx;
13642 else
13644 unsigned long count;
13645 operands[0] = stack_pointer_rtx;
13646 amount = offsets->outgoing_args - offsets->saved_regs;
13647 /* pop call clobbered registers if it avoids a
13648 separate stack adjustment. */
13649 count = offsets->saved_regs - offsets->saved_args;
13650 if (optimize_size
13651 && count != 0
13652 && !crtl->calls_eh_return
13653 && bit_count(saved_regs_mask) * 4 == count
13654 && !IS_INTERRUPT (func_type)
13655 && !crtl->tail_call_emit)
13657 unsigned long mask;
13658 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13659 mask ^= 0xf;
13660 mask &= ~saved_regs_mask;
13661 reg = 0;
13662 while (bit_count (mask) * 4 > amount)
13664 while ((mask & (1 << reg)) == 0)
13665 reg++;
13666 mask &= ~(1 << reg);
13668 if (bit_count (mask) * 4 == amount) {
13669 amount = 0;
13670 saved_regs_mask |= mask;
13675 if (amount)
13677 operands[1] = operands[0];
13678 operands[2] = GEN_INT (amount);
13679 output_add_immediate (operands);
13681 if (frame_pointer_needed)
13682 asm_fprintf (f, "\tmov\t%r, %r\n",
13683 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13686 if (TARGET_FPA_EMU2)
13688 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13689 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13690 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13691 reg, SP_REGNUM);
13693 else
13695 start_reg = FIRST_FPA_REGNUM;
13697 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13699 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13701 if (reg - start_reg == 3)
13703 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13704 start_reg, SP_REGNUM);
13705 start_reg = reg + 1;
13708 else
13710 if (reg != start_reg)
13711 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13712 start_reg, reg - start_reg,
13713 SP_REGNUM);
13715 start_reg = reg + 1;
13719 /* Just in case the last register checked also needs unstacking. */
13720 if (reg != start_reg)
13721 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13722 start_reg, reg - start_reg, SP_REGNUM);
13725 if (TARGET_HARD_FLOAT && TARGET_VFP)
13727 start_reg = FIRST_VFP_REGNUM;
13728 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13730 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13731 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13733 if (start_reg != reg)
13734 vfp_output_fldmd (f, SP_REGNUM,
13735 (start_reg - FIRST_VFP_REGNUM) / 2,
13736 (reg - start_reg) / 2);
13737 start_reg = reg + 2;
13740 if (start_reg != reg)
13741 vfp_output_fldmd (f, SP_REGNUM,
13742 (start_reg - FIRST_VFP_REGNUM) / 2,
13743 (reg - start_reg) / 2);
13745 if (TARGET_IWMMXT)
13746 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13747 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13748 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13750 /* If we can, restore the LR into the PC. */
13751 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13752 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13753 && !IS_STACKALIGN (func_type)
13754 && really_return
13755 && crtl->args.pretend_args_size == 0
13756 && saved_regs_mask & (1 << LR_REGNUM)
13757 && !crtl->calls_eh_return)
13759 saved_regs_mask &= ~ (1 << LR_REGNUM);
13760 saved_regs_mask |= (1 << PC_REGNUM);
13761 rfe = IS_INTERRUPT (func_type);
13763 else
13764 rfe = 0;
13766 /* Load the registers off the stack. If we only have one register
13767 to load use the LDR instruction - it is faster. For Thumb-2
13768 always use pop and the assembler will pick the best instruction.*/
13769 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13770 && !IS_INTERRUPT(func_type))
13772 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13774 else if (saved_regs_mask)
13776 if (saved_regs_mask & (1 << SP_REGNUM))
13777 /* Note - write back to the stack register is not enabled
13778 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13779 in the list of registers and if we add writeback the
13780 instruction becomes UNPREDICTABLE. */
13781 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13782 rfe);
13783 else if (TARGET_ARM)
13784 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13785 rfe);
13786 else
13787 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13790 if (crtl->args.pretend_args_size)
13792 /* Unwind the pre-pushed regs. */
13793 operands[0] = operands[1] = stack_pointer_rtx;
13794 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13795 output_add_immediate (operands);
13799 /* We may have already restored PC directly from the stack. */
13800 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13801 return "";
13803 /* Stack adjustment for exception handler. */
13804 if (crtl->calls_eh_return)
13805 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13806 ARM_EH_STACKADJ_REGNUM);
13808 /* Generate the return instruction. */
13809 switch ((int) ARM_FUNC_TYPE (func_type))
13811 case ARM_FT_ISR:
13812 case ARM_FT_FIQ:
13813 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13814 break;
13816 case ARM_FT_EXCEPTION:
13817 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13818 break;
13820 case ARM_FT_INTERWORKED:
13821 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13822 break;
13824 default:
13825 if (IS_STACKALIGN (func_type))
13827 /* See comment in arm_expand_prologue. */
13828 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
13830 if (arm_arch5 || arm_arch4t)
13831 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13832 else
13833 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13834 break;
13837 return "";
13840 static void
13841 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
13842 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
13844 arm_stack_offsets *offsets;
13846 if (TARGET_THUMB1)
13848 int regno;
13850 /* Emit any call-via-reg trampolines that are needed for v4t support
13851 of call_reg and call_value_reg type insns. */
13852 for (regno = 0; regno < LR_REGNUM; regno++)
13854 rtx label = cfun->machine->call_via[regno];
13856 if (label != NULL)
13858 switch_to_section (function_section (current_function_decl));
13859 targetm.asm_out.internal_label (asm_out_file, "L",
13860 CODE_LABEL_NUMBER (label));
13861 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13865 /* ??? Probably not safe to set this here, since it assumes that a
13866 function will be emitted as assembly immediately after we generate
13867 RTL for it. This does not happen for inline functions. */
13868 cfun->machine->return_used_this_function = 0;
13870 else /* TARGET_32BIT */
13872 /* We need to take into account any stack-frame rounding. */
13873 offsets = arm_get_frame_offsets ();
13875 gcc_assert (!use_return_insn (FALSE, NULL)
13876 || (cfun->machine->return_used_this_function != 0)
13877 || offsets->saved_regs == offsets->outgoing_args
13878 || frame_pointer_needed);
13880 /* Reset the ARM-specific per-function variables. */
13881 after_arm_reorg = 0;
13885 /* Generate and emit an insn that we will recognize as a push_multi.
13886 Unfortunately, since this insn does not reflect very well the actual
13887 semantics of the operation, we need to annotate the insn for the benefit
13888 of DWARF2 frame unwind information. */
13889 static rtx
13890 emit_multi_reg_push (unsigned long mask)
13892 int num_regs = 0;
13893 int num_dwarf_regs;
13894 int i, j;
13895 rtx par;
13896 rtx dwarf;
13897 int dwarf_par_index;
13898 rtx tmp, reg;
13900 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13901 if (mask & (1 << i))
13902 num_regs++;
13904 gcc_assert (num_regs && num_regs <= 16);
13906 /* We don't record the PC in the dwarf frame information. */
13907 num_dwarf_regs = num_regs;
13908 if (mask & (1 << PC_REGNUM))
13909 num_dwarf_regs--;
13911 /* For the body of the insn we are going to generate an UNSPEC in
13912 parallel with several USEs. This allows the insn to be recognized
13913 by the push_multi pattern in the arm.md file. The insn looks
13914 something like this:
13916 (parallel [
13917 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
13918 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
13919 (use (reg:SI 11 fp))
13920 (use (reg:SI 12 ip))
13921 (use (reg:SI 14 lr))
13922 (use (reg:SI 15 pc))
13925 For the frame note however, we try to be more explicit and actually
13926 show each register being stored into the stack frame, plus a (single)
13927 decrement of the stack pointer. We do it this way in order to be
13928 friendly to the stack unwinding code, which only wants to see a single
13929 stack decrement per instruction. The RTL we generate for the note looks
13930 something like this:
13932 (sequence [
13933 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
13934 (set (mem:SI (reg:SI sp)) (reg:SI r4))
13935 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
13936 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
13937 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
13940 This sequence is used both by the code to support stack unwinding for
13941 exceptions handlers and the code to generate dwarf2 frame debugging. */
13943 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
13944 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
13945 dwarf_par_index = 1;
13947 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13949 if (mask & (1 << i))
13951 reg = gen_rtx_REG (SImode, i);
13953 XVECEXP (par, 0, 0)
13954 = gen_rtx_SET (VOIDmode,
13955 gen_frame_mem (BLKmode,
13956 gen_rtx_PRE_DEC (BLKmode,
13957 stack_pointer_rtx)),
13958 gen_rtx_UNSPEC (BLKmode,
13959 gen_rtvec (1, reg),
13960 UNSPEC_PUSH_MULT));
13962 if (i != PC_REGNUM)
13964 tmp = gen_rtx_SET (VOIDmode,
13965 gen_frame_mem (SImode, stack_pointer_rtx),
13966 reg);
13967 RTX_FRAME_RELATED_P (tmp) = 1;
13968 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
13969 dwarf_par_index++;
13972 break;
13976 for (j = 1, i++; j < num_regs; i++)
13978 if (mask & (1 << i))
13980 reg = gen_rtx_REG (SImode, i);
13982 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
13984 if (i != PC_REGNUM)
13987 = gen_rtx_SET (VOIDmode,
13988 gen_frame_mem (SImode,
13989 plus_constant (stack_pointer_rtx,
13990 4 * j)),
13991 reg);
13992 RTX_FRAME_RELATED_P (tmp) = 1;
13993 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
13996 j++;
14000 par = emit_insn (par);
14002 tmp = gen_rtx_SET (VOIDmode,
14003 stack_pointer_rtx,
14004 plus_constant (stack_pointer_rtx, -4 * num_regs));
14005 RTX_FRAME_RELATED_P (tmp) = 1;
14006 XVECEXP (dwarf, 0, 0) = tmp;
14008 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14010 return par;
14013 /* Calculate the size of the return value that is passed in registers. */
14014 static unsigned
14015 arm_size_return_regs (void)
14017 enum machine_mode mode;
14019 if (crtl->return_rtx != 0)
14020 mode = GET_MODE (crtl->return_rtx);
14021 else
14022 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14024 return GET_MODE_SIZE (mode);
14027 static rtx
14028 emit_sfm (int base_reg, int count)
14030 rtx par;
14031 rtx dwarf;
14032 rtx tmp, reg;
14033 int i;
14035 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14036 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14038 reg = gen_rtx_REG (XFmode, base_reg++);
14040 XVECEXP (par, 0, 0)
14041 = gen_rtx_SET (VOIDmode,
14042 gen_frame_mem (BLKmode,
14043 gen_rtx_PRE_DEC (BLKmode,
14044 stack_pointer_rtx)),
14045 gen_rtx_UNSPEC (BLKmode,
14046 gen_rtvec (1, reg),
14047 UNSPEC_PUSH_MULT));
14048 tmp = gen_rtx_SET (VOIDmode,
14049 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14050 RTX_FRAME_RELATED_P (tmp) = 1;
14051 XVECEXP (dwarf, 0, 1) = tmp;
14053 for (i = 1; i < count; i++)
14055 reg = gen_rtx_REG (XFmode, base_reg++);
14056 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14058 tmp = gen_rtx_SET (VOIDmode,
14059 gen_frame_mem (XFmode,
14060 plus_constant (stack_pointer_rtx,
14061 i * 12)),
14062 reg);
14063 RTX_FRAME_RELATED_P (tmp) = 1;
14064 XVECEXP (dwarf, 0, i + 1) = tmp;
14067 tmp = gen_rtx_SET (VOIDmode,
14068 stack_pointer_rtx,
14069 plus_constant (stack_pointer_rtx, -12 * count));
14071 RTX_FRAME_RELATED_P (tmp) = 1;
14072 XVECEXP (dwarf, 0, 0) = tmp;
14074 par = emit_insn (par);
14075 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14077 return par;
14081 /* Return true if the current function needs to save/restore LR. */
14083 static bool
14084 thumb_force_lr_save (void)
14086 return !cfun->machine->lr_save_eliminated
14087 && (!leaf_function_p ()
14088 || thumb_far_jump_used_p ()
14089 || df_regs_ever_live_p (LR_REGNUM));
14093 /* Compute the distance from register FROM to register TO.
14094 These can be the arg pointer (26), the soft frame pointer (25),
14095 the stack pointer (13) or the hard frame pointer (11).
14096 In thumb mode r7 is used as the soft frame pointer, if needed.
14097 Typical stack layout looks like this:
14099 old stack pointer -> | |
14100 ----
14101 | | \
14102 | | saved arguments for
14103 | | vararg functions
14104 | | /
14106 hard FP & arg pointer -> | | \
14107 | | stack
14108 | | frame
14109 | | /
14111 | | \
14112 | | call saved
14113 | | registers
14114 soft frame pointer -> | | /
14116 | | \
14117 | | local
14118 | | variables
14119 locals base pointer -> | | /
14121 | | \
14122 | | outgoing
14123 | | arguments
14124 current stack pointer -> | | /
14127 For a given function some or all of these stack components
14128 may not be needed, giving rise to the possibility of
14129 eliminating some of the registers.
14131 The values returned by this function must reflect the behavior
14132 of arm_expand_prologue() and arm_compute_save_reg_mask().
14134 The sign of the number returned reflects the direction of stack
14135 growth, so the values are positive for all eliminations except
14136 from the soft frame pointer to the hard frame pointer.
14138 SFP may point just inside the local variables block to ensure correct
14139 alignment. */
14142 /* Calculate stack offsets. These are used to calculate register elimination
14143 offsets and in prologue/epilogue code. Also calculates which registers
14144 should be saved. */
14146 static arm_stack_offsets *
14147 arm_get_frame_offsets (void)
14149 struct arm_stack_offsets *offsets;
14150 unsigned long func_type;
14151 int leaf;
14152 int saved;
14153 int core_saved;
14154 HOST_WIDE_INT frame_size;
14155 int i;
14157 offsets = &cfun->machine->stack_offsets;
14159 /* We need to know if we are a leaf function. Unfortunately, it
14160 is possible to be called after start_sequence has been called,
14161 which causes get_insns to return the insns for the sequence,
14162 not the function, which will cause leaf_function_p to return
14163 the incorrect result.
14165 to know about leaf functions once reload has completed, and the
14166 frame size cannot be changed after that time, so we can safely
14167 use the cached value. */
14169 if (reload_completed)
14170 return offsets;
14172 /* Initially this is the size of the local variables. It will translated
14173 into an offset once we have determined the size of preceding data. */
14174 frame_size = ROUND_UP_WORD (get_frame_size ());
14176 leaf = leaf_function_p ();
14178 /* Space for variadic functions. */
14179 offsets->saved_args = crtl->args.pretend_args_size;
14181 /* In Thumb mode this is incorrect, but never used. */
14182 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14183 arm_compute_static_chain_stack_bytes();
14185 if (TARGET_32BIT)
14187 unsigned int regno;
14189 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14190 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14191 saved = core_saved;
14193 /* We know that SP will be doubleword aligned on entry, and we must
14194 preserve that condition at any subroutine call. We also require the
14195 soft frame pointer to be doubleword aligned. */
14197 if (TARGET_REALLY_IWMMXT)
14199 /* Check for the call-saved iWMMXt registers. */
14200 for (regno = FIRST_IWMMXT_REGNUM;
14201 regno <= LAST_IWMMXT_REGNUM;
14202 regno++)
14203 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14204 saved += 8;
14207 func_type = arm_current_func_type ();
14208 if (! IS_VOLATILE (func_type))
14210 /* Space for saved FPA registers. */
14211 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14212 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14213 saved += 12;
14215 /* Space for saved VFP registers. */
14216 if (TARGET_HARD_FLOAT && TARGET_VFP)
14217 saved += arm_get_vfp_saved_size ();
14220 else /* TARGET_THUMB1 */
14222 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14223 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14224 saved = core_saved;
14225 if (TARGET_BACKTRACE)
14226 saved += 16;
14229 /* Saved registers include the stack frame. */
14230 offsets->saved_regs = offsets->saved_args + saved +
14231 arm_compute_static_chain_stack_bytes();
14232 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14233 /* A leaf function does not need any stack alignment if it has nothing
14234 on the stack. */
14235 if (leaf && frame_size == 0)
14237 offsets->outgoing_args = offsets->soft_frame;
14238 offsets->locals_base = offsets->soft_frame;
14239 return offsets;
14242 /* Ensure SFP has the correct alignment. */
14243 if (ARM_DOUBLEWORD_ALIGN
14244 && (offsets->soft_frame & 7))
14246 offsets->soft_frame += 4;
14247 /* Try to align stack by pushing an extra reg. Don't bother doing this
14248 when there is a stack frame as the alignment will be rolled into
14249 the normal stack adjustment. */
14250 if (frame_size + crtl->outgoing_args_size == 0)
14252 int reg = -1;
14254 /* If it is safe to use r3, then do so. This sometimes
14255 generates better code on Thumb-2 by avoiding the need to
14256 use 32-bit push/pop instructions. */
14257 if (!crtl->tail_call_emit
14258 && arm_size_return_regs () <= 12)
14260 reg = 3;
14262 else
14263 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14265 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14267 reg = i;
14268 break;
14272 if (reg != -1)
14274 offsets->saved_regs += 4;
14275 offsets->saved_regs_mask |= (1 << reg);
14280 offsets->locals_base = offsets->soft_frame + frame_size;
14281 offsets->outgoing_args = (offsets->locals_base
14282 + crtl->outgoing_args_size);
14284 if (ARM_DOUBLEWORD_ALIGN)
14286 /* Ensure SP remains doubleword aligned. */
14287 if (offsets->outgoing_args & 7)
14288 offsets->outgoing_args += 4;
14289 gcc_assert (!(offsets->outgoing_args & 7));
14292 return offsets;
14296 /* Calculate the relative offsets for the different stack pointers. Positive
14297 offsets are in the direction of stack growth. */
14299 HOST_WIDE_INT
14300 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14302 arm_stack_offsets *offsets;
14304 offsets = arm_get_frame_offsets ();
14306 /* OK, now we have enough information to compute the distances.
14307 There must be an entry in these switch tables for each pair
14308 of registers in ELIMINABLE_REGS, even if some of the entries
14309 seem to be redundant or useless. */
14310 switch (from)
14312 case ARG_POINTER_REGNUM:
14313 switch (to)
14315 case THUMB_HARD_FRAME_POINTER_REGNUM:
14316 return 0;
14318 case FRAME_POINTER_REGNUM:
14319 /* This is the reverse of the soft frame pointer
14320 to hard frame pointer elimination below. */
14321 return offsets->soft_frame - offsets->saved_args;
14323 case ARM_HARD_FRAME_POINTER_REGNUM:
14324 /* This is only non-zero in the case where the static chain register
14325 is stored above the frame. */
14326 return offsets->frame - offsets->saved_args - 4;
14328 case STACK_POINTER_REGNUM:
14329 /* If nothing has been pushed on the stack at all
14330 then this will return -4. This *is* correct! */
14331 return offsets->outgoing_args - (offsets->saved_args + 4);
14333 default:
14334 gcc_unreachable ();
14336 gcc_unreachable ();
14338 case FRAME_POINTER_REGNUM:
14339 switch (to)
14341 case THUMB_HARD_FRAME_POINTER_REGNUM:
14342 return 0;
14344 case ARM_HARD_FRAME_POINTER_REGNUM:
14345 /* The hard frame pointer points to the top entry in the
14346 stack frame. The soft frame pointer to the bottom entry
14347 in the stack frame. If there is no stack frame at all,
14348 then they are identical. */
14350 return offsets->frame - offsets->soft_frame;
14352 case STACK_POINTER_REGNUM:
14353 return offsets->outgoing_args - offsets->soft_frame;
14355 default:
14356 gcc_unreachable ();
14358 gcc_unreachable ();
14360 default:
14361 /* You cannot eliminate from the stack pointer.
14362 In theory you could eliminate from the hard frame
14363 pointer to the stack pointer, but this will never
14364 happen, since if a stack frame is not needed the
14365 hard frame pointer will never be used. */
14366 gcc_unreachable ();
14370 /* Given FROM and TO register numbers, say whether this elimination is
14371 allowed. Frame pointer elimination is automatically handled.
14373 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14374 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14375 pointer, we must eliminate FRAME_POINTER_REGNUM into
14376 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14377 ARG_POINTER_REGNUM. */
14379 bool
14380 arm_can_eliminate (const int from, const int to)
14382 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14383 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14384 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14385 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14386 true);
14389 /* Emit RTL to save coprocessor registers on function entry. Returns the
14390 number of bytes pushed. */
14392 static int
14393 arm_save_coproc_regs(void)
14395 int saved_size = 0;
14396 unsigned reg;
14397 unsigned start_reg;
14398 rtx insn;
14400 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14401 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14403 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
14404 insn = gen_rtx_MEM (V2SImode, insn);
14405 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14406 RTX_FRAME_RELATED_P (insn) = 1;
14407 saved_size += 8;
14410 /* Save any floating point call-saved registers used by this
14411 function. */
14412 if (TARGET_FPA_EMU2)
14414 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14415 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14417 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
14418 insn = gen_rtx_MEM (XFmode, insn);
14419 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14420 RTX_FRAME_RELATED_P (insn) = 1;
14421 saved_size += 12;
14424 else
14426 start_reg = LAST_FPA_REGNUM;
14428 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14430 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14432 if (start_reg - reg == 3)
14434 insn = emit_sfm (reg, 4);
14435 RTX_FRAME_RELATED_P (insn) = 1;
14436 saved_size += 48;
14437 start_reg = reg - 1;
14440 else
14442 if (start_reg != reg)
14444 insn = emit_sfm (reg + 1, start_reg - reg);
14445 RTX_FRAME_RELATED_P (insn) = 1;
14446 saved_size += (start_reg - reg) * 12;
14448 start_reg = reg - 1;
14452 if (start_reg != reg)
14454 insn = emit_sfm (reg + 1, start_reg - reg);
14455 saved_size += (start_reg - reg) * 12;
14456 RTX_FRAME_RELATED_P (insn) = 1;
14459 if (TARGET_HARD_FLOAT && TARGET_VFP)
14461 start_reg = FIRST_VFP_REGNUM;
14463 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14465 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14466 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14468 if (start_reg != reg)
14469 saved_size += vfp_emit_fstmd (start_reg,
14470 (reg - start_reg) / 2);
14471 start_reg = reg + 2;
14474 if (start_reg != reg)
14475 saved_size += vfp_emit_fstmd (start_reg,
14476 (reg - start_reg) / 2);
14478 return saved_size;
14482 /* Set the Thumb frame pointer from the stack pointer. */
14484 static void
14485 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14487 HOST_WIDE_INT amount;
14488 rtx insn, dwarf;
14490 amount = offsets->outgoing_args - offsets->locals_base;
14491 if (amount < 1024)
14492 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14493 stack_pointer_rtx, GEN_INT (amount)));
14494 else
14496 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14497 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14498 expects the first two operands to be the same. */
14499 if (TARGET_THUMB2)
14501 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14502 stack_pointer_rtx,
14503 hard_frame_pointer_rtx));
14505 else
14507 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14508 hard_frame_pointer_rtx,
14509 stack_pointer_rtx));
14511 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14512 plus_constant (stack_pointer_rtx, amount));
14513 RTX_FRAME_RELATED_P (dwarf) = 1;
14514 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14517 RTX_FRAME_RELATED_P (insn) = 1;
14520 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14521 function. */
14522 void
14523 arm_expand_prologue (void)
14525 rtx amount;
14526 rtx insn;
14527 rtx ip_rtx;
14528 unsigned long live_regs_mask;
14529 unsigned long func_type;
14530 int fp_offset = 0;
14531 int saved_pretend_args = 0;
14532 int saved_regs = 0;
14533 unsigned HOST_WIDE_INT args_to_push;
14534 arm_stack_offsets *offsets;
14536 func_type = arm_current_func_type ();
14538 /* Naked functions don't have prologues. */
14539 if (IS_NAKED (func_type))
14540 return;
14542 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14543 args_to_push = crtl->args.pretend_args_size;
14545 /* Compute which register we will have to save onto the stack. */
14546 offsets = arm_get_frame_offsets ();
14547 live_regs_mask = offsets->saved_regs_mask;
14549 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14551 if (IS_STACKALIGN (func_type))
14553 rtx dwarf;
14554 rtx r0;
14555 rtx r1;
14556 /* Handle a word-aligned stack pointer. We generate the following:
14558 mov r0, sp
14559 bic r1, r0, #7
14560 mov sp, r1
14561 <save and restore r0 in normal prologue/epilogue>
14562 mov sp, r0
14563 bx lr
14565 The unwinder doesn't need to know about the stack realignment.
14566 Just tell it we saved SP in r0. */
14567 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14569 r0 = gen_rtx_REG (SImode, 0);
14570 r1 = gen_rtx_REG (SImode, 1);
14571 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14572 compiler won't choke. */
14573 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14574 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14575 insn = gen_movsi (r0, stack_pointer_rtx);
14576 RTX_FRAME_RELATED_P (insn) = 1;
14577 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14578 emit_insn (insn);
14579 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14580 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14583 /* For APCS frames, if IP register is clobbered
14584 when creating frame, save that register in a special
14585 way. */
14586 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14588 if (IS_INTERRUPT (func_type))
14590 /* Interrupt functions must not corrupt any registers.
14591 Creating a frame pointer however, corrupts the IP
14592 register, so we must push it first. */
14593 insn = emit_multi_reg_push (1 << IP_REGNUM);
14595 /* Do not set RTX_FRAME_RELATED_P on this insn.
14596 The dwarf stack unwinding code only wants to see one
14597 stack decrement per function, and this is not it. If
14598 this instruction is labeled as being part of the frame
14599 creation sequence then dwarf2out_frame_debug_expr will
14600 die when it encounters the assignment of IP to FP
14601 later on, since the use of SP here establishes SP as
14602 the CFA register and not IP.
14604 Anyway this instruction is not really part of the stack
14605 frame creation although it is part of the prologue. */
14607 else if (IS_NESTED (func_type))
14609 /* The Static chain register is the same as the IP register
14610 used as a scratch register during stack frame creation.
14611 To get around this need to find somewhere to store IP
14612 whilst the frame is being created. We try the following
14613 places in order:
14615 1. The last argument register.
14616 2. A slot on the stack above the frame. (This only
14617 works if the function is not a varargs function).
14618 3. Register r3, after pushing the argument registers
14619 onto the stack.
14621 Note - we only need to tell the dwarf2 backend about the SP
14622 adjustment in the second variant; the static chain register
14623 doesn't need to be unwound, as it doesn't contain a value
14624 inherited from the caller. */
14626 if (df_regs_ever_live_p (3) == false)
14627 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14628 else if (args_to_push == 0)
14630 rtx dwarf;
14632 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14633 saved_regs += 4;
14635 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14636 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14637 fp_offset = 4;
14639 /* Just tell the dwarf backend that we adjusted SP. */
14640 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14641 plus_constant (stack_pointer_rtx,
14642 -fp_offset));
14643 RTX_FRAME_RELATED_P (insn) = 1;
14644 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14646 else
14648 /* Store the args on the stack. */
14649 if (cfun->machine->uses_anonymous_args)
14650 insn = emit_multi_reg_push
14651 ((0xf0 >> (args_to_push / 4)) & 0xf);
14652 else
14653 insn = emit_insn
14654 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14655 GEN_INT (- args_to_push)));
14657 RTX_FRAME_RELATED_P (insn) = 1;
14659 saved_pretend_args = 1;
14660 fp_offset = args_to_push;
14661 args_to_push = 0;
14663 /* Now reuse r3 to preserve IP. */
14664 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14668 insn = emit_set_insn (ip_rtx,
14669 plus_constant (stack_pointer_rtx, fp_offset));
14670 RTX_FRAME_RELATED_P (insn) = 1;
14673 if (args_to_push)
14675 /* Push the argument registers, or reserve space for them. */
14676 if (cfun->machine->uses_anonymous_args)
14677 insn = emit_multi_reg_push
14678 ((0xf0 >> (args_to_push / 4)) & 0xf);
14679 else
14680 insn = emit_insn
14681 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14682 GEN_INT (- args_to_push)));
14683 RTX_FRAME_RELATED_P (insn) = 1;
14686 /* If this is an interrupt service routine, and the link register
14687 is going to be pushed, and we're not generating extra
14688 push of IP (needed when frame is needed and frame layout if apcs),
14689 subtracting four from LR now will mean that the function return
14690 can be done with a single instruction. */
14691 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14692 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14693 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14694 && TARGET_ARM)
14696 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14698 emit_set_insn (lr, plus_constant (lr, -4));
14701 if (live_regs_mask)
14703 saved_regs += bit_count (live_regs_mask) * 4;
14704 if (optimize_size && !frame_pointer_needed
14705 && saved_regs == offsets->saved_regs - offsets->saved_args)
14707 /* If no coprocessor registers are being pushed and we don't have
14708 to worry about a frame pointer then push extra registers to
14709 create the stack frame. This is done is a way that does not
14710 alter the frame layout, so is independent of the epilogue. */
14711 int n;
14712 int frame;
14713 n = 0;
14714 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14715 n++;
14716 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14717 if (frame && n * 4 >= frame)
14719 n = frame / 4;
14720 live_regs_mask |= (1 << n) - 1;
14721 saved_regs += frame;
14724 insn = emit_multi_reg_push (live_regs_mask);
14725 RTX_FRAME_RELATED_P (insn) = 1;
14728 if (! IS_VOLATILE (func_type))
14729 saved_regs += arm_save_coproc_regs ();
14731 if (frame_pointer_needed && TARGET_ARM)
14733 /* Create the new frame pointer. */
14734 if (TARGET_APCS_FRAME)
14736 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14737 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14738 RTX_FRAME_RELATED_P (insn) = 1;
14740 if (IS_NESTED (func_type))
14742 /* Recover the static chain register. */
14743 if (!df_regs_ever_live_p (3)
14744 || saved_pretend_args)
14745 insn = gen_rtx_REG (SImode, 3);
14746 else /* if (crtl->args.pretend_args_size == 0) */
14748 insn = plus_constant (hard_frame_pointer_rtx, 4);
14749 insn = gen_frame_mem (SImode, insn);
14751 emit_set_insn (ip_rtx, insn);
14752 /* Add a USE to stop propagate_one_insn() from barfing. */
14753 emit_insn (gen_prologue_use (ip_rtx));
14756 else
14758 insn = GEN_INT (saved_regs - 4);
14759 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14760 stack_pointer_rtx, insn));
14761 RTX_FRAME_RELATED_P (insn) = 1;
14765 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14767 /* This add can produce multiple insns for a large constant, so we
14768 need to get tricky. */
14769 rtx last = get_last_insn ();
14771 amount = GEN_INT (offsets->saved_args + saved_regs
14772 - offsets->outgoing_args);
14774 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14775 amount));
14778 last = last ? NEXT_INSN (last) : get_insns ();
14779 RTX_FRAME_RELATED_P (last) = 1;
14781 while (last != insn);
14783 /* If the frame pointer is needed, emit a special barrier that
14784 will prevent the scheduler from moving stores to the frame
14785 before the stack adjustment. */
14786 if (frame_pointer_needed)
14787 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14788 hard_frame_pointer_rtx));
14792 if (frame_pointer_needed && TARGET_THUMB2)
14793 thumb_set_frame_pointer (offsets);
14795 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14797 unsigned long mask;
14799 mask = live_regs_mask;
14800 mask &= THUMB2_WORK_REGS;
14801 if (!IS_NESTED (func_type))
14802 mask |= (1 << IP_REGNUM);
14803 arm_load_pic_register (mask);
14806 /* If we are profiling, make sure no instructions are scheduled before
14807 the call to mcount. Similarly if the user has requested no
14808 scheduling in the prolog. Similarly if we want non-call exceptions
14809 using the EABI unwinder, to prevent faulting instructions from being
14810 swapped with a stack adjustment. */
14811 if (crtl->profile || !TARGET_SCHED_PROLOG
14812 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
14813 emit_insn (gen_blockage ());
14815 /* If the link register is being kept alive, with the return address in it,
14816 then make sure that it does not get reused by the ce2 pass. */
14817 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
14818 cfun->machine->lr_save_eliminated = 1;
14821 /* Print condition code to STREAM. Helper function for arm_print_operand. */
14822 static void
14823 arm_print_condition (FILE *stream)
14825 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
14827 /* Branch conversion is not implemented for Thumb-2. */
14828 if (TARGET_THUMB)
14830 output_operand_lossage ("predicated Thumb instruction");
14831 return;
14833 if (current_insn_predicate != NULL)
14835 output_operand_lossage
14836 ("predicated instruction in conditional sequence");
14837 return;
14840 fputs (arm_condition_codes[arm_current_cc], stream);
14842 else if (current_insn_predicate)
14844 enum arm_cond_code code;
14846 if (TARGET_THUMB1)
14848 output_operand_lossage ("predicated Thumb instruction");
14849 return;
14852 code = get_arm_condition_code (current_insn_predicate);
14853 fputs (arm_condition_codes[code], stream);
14858 /* If CODE is 'd', then the X is a condition operand and the instruction
14859 should only be executed if the condition is true.
14860 if CODE is 'D', then the X is a condition operand and the instruction
14861 should only be executed if the condition is false: however, if the mode
14862 of the comparison is CCFPEmode, then always execute the instruction -- we
14863 do this because in these circumstances !GE does not necessarily imply LT;
14864 in these cases the instruction pattern will take care to make sure that
14865 an instruction containing %d will follow, thereby undoing the effects of
14866 doing this instruction unconditionally.
14867 If CODE is 'N' then X is a floating point operand that must be negated
14868 before output.
14869 If CODE is 'B' then output a bitwise inverted value of X (a const int).
14870 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
14871 void
14872 arm_print_operand (FILE *stream, rtx x, int code)
14874 switch (code)
14876 case '@':
14877 fputs (ASM_COMMENT_START, stream);
14878 return;
14880 case '_':
14881 fputs (user_label_prefix, stream);
14882 return;
14884 case '|':
14885 fputs (REGISTER_PREFIX, stream);
14886 return;
14888 case '?':
14889 arm_print_condition (stream);
14890 return;
14892 case '(':
14893 /* Nothing in unified syntax, otherwise the current condition code. */
14894 if (!TARGET_UNIFIED_ASM)
14895 arm_print_condition (stream);
14896 break;
14898 case ')':
14899 /* The current condition code in unified syntax, otherwise nothing. */
14900 if (TARGET_UNIFIED_ASM)
14901 arm_print_condition (stream);
14902 break;
14904 case '.':
14905 /* The current condition code for a condition code setting instruction.
14906 Preceded by 's' in unified syntax, otherwise followed by 's'. */
14907 if (TARGET_UNIFIED_ASM)
14909 fputc('s', stream);
14910 arm_print_condition (stream);
14912 else
14914 arm_print_condition (stream);
14915 fputc('s', stream);
14917 return;
14919 case '!':
14920 /* If the instruction is conditionally executed then print
14921 the current condition code, otherwise print 's'. */
14922 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
14923 if (current_insn_predicate)
14924 arm_print_condition (stream);
14925 else
14926 fputc('s', stream);
14927 break;
14929 /* %# is a "break" sequence. It doesn't output anything, but is used to
14930 separate e.g. operand numbers from following text, if that text consists
14931 of further digits which we don't want to be part of the operand
14932 number. */
14933 case '#':
14934 return;
14936 case 'N':
14938 REAL_VALUE_TYPE r;
14939 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14940 r = REAL_VALUE_NEGATE (r);
14941 fprintf (stream, "%s", fp_const_from_val (&r));
14943 return;
14945 /* An integer or symbol address without a preceding # sign. */
14946 case 'c':
14947 switch (GET_CODE (x))
14949 case CONST_INT:
14950 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14951 break;
14953 case SYMBOL_REF:
14954 output_addr_const (stream, x);
14955 break;
14957 default:
14958 gcc_unreachable ();
14960 return;
14962 case 'B':
14963 if (GET_CODE (x) == CONST_INT)
14965 HOST_WIDE_INT val;
14966 val = ARM_SIGN_EXTEND (~INTVAL (x));
14967 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
14969 else
14971 putc ('~', stream);
14972 output_addr_const (stream, x);
14974 return;
14976 case 'L':
14977 /* The low 16 bits of an immediate constant. */
14978 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
14979 return;
14981 case 'i':
14982 fprintf (stream, "%s", arithmetic_instr (x, 1));
14983 return;
14985 /* Truncate Cirrus shift counts. */
14986 case 's':
14987 if (GET_CODE (x) == CONST_INT)
14989 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
14990 return;
14992 arm_print_operand (stream, x, 0);
14993 return;
14995 case 'I':
14996 fprintf (stream, "%s", arithmetic_instr (x, 0));
14997 return;
14999 case 'S':
15001 HOST_WIDE_INT val;
15002 const char *shift;
15004 if (!shift_operator (x, SImode))
15006 output_operand_lossage ("invalid shift operand");
15007 break;
15010 shift = shift_op (x, &val);
15012 if (shift)
15014 fprintf (stream, ", %s ", shift);
15015 if (val == -1)
15016 arm_print_operand (stream, XEXP (x, 1), 0);
15017 else
15018 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15021 return;
15023 /* An explanation of the 'Q', 'R' and 'H' register operands:
15025 In a pair of registers containing a DI or DF value the 'Q'
15026 operand returns the register number of the register containing
15027 the least significant part of the value. The 'R' operand returns
15028 the register number of the register containing the most
15029 significant part of the value.
15031 The 'H' operand returns the higher of the two register numbers.
15032 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15033 same as the 'Q' operand, since the most significant part of the
15034 value is held in the lower number register. The reverse is true
15035 on systems where WORDS_BIG_ENDIAN is false.
15037 The purpose of these operands is to distinguish between cases
15038 where the endian-ness of the values is important (for example
15039 when they are added together), and cases where the endian-ness
15040 is irrelevant, but the order of register operations is important.
15041 For example when loading a value from memory into a register
15042 pair, the endian-ness does not matter. Provided that the value
15043 from the lower memory address is put into the lower numbered
15044 register, and the value from the higher address is put into the
15045 higher numbered register, the load will work regardless of whether
15046 the value being loaded is big-wordian or little-wordian. The
15047 order of the two register loads can matter however, if the address
15048 of the memory location is actually held in one of the registers
15049 being overwritten by the load. */
15050 case 'Q':
15051 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15053 output_operand_lossage ("invalid operand for code '%c'", code);
15054 return;
15057 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15058 return;
15060 case 'R':
15061 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15063 output_operand_lossage ("invalid operand for code '%c'", code);
15064 return;
15067 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15068 return;
15070 case 'H':
15071 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15073 output_operand_lossage ("invalid operand for code '%c'", code);
15074 return;
15077 asm_fprintf (stream, "%r", REGNO (x) + 1);
15078 return;
15080 case 'J':
15081 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15083 output_operand_lossage ("invalid operand for code '%c'", code);
15084 return;
15087 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15088 return;
15090 case 'K':
15091 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15093 output_operand_lossage ("invalid operand for code '%c'", code);
15094 return;
15097 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15098 return;
15100 case 'm':
15101 asm_fprintf (stream, "%r",
15102 GET_CODE (XEXP (x, 0)) == REG
15103 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15104 return;
15106 case 'M':
15107 asm_fprintf (stream, "{%r-%r}",
15108 REGNO (x),
15109 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15110 return;
15112 /* Like 'M', but writing doubleword vector registers, for use by Neon
15113 insns. */
15114 case 'h':
15116 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15117 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15118 if (numregs == 1)
15119 asm_fprintf (stream, "{d%d}", regno);
15120 else
15121 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15123 return;
15125 case 'd':
15126 /* CONST_TRUE_RTX means always -- that's the default. */
15127 if (x == const_true_rtx)
15128 return;
15130 if (!COMPARISON_P (x))
15132 output_operand_lossage ("invalid operand for code '%c'", code);
15133 return;
15136 fputs (arm_condition_codes[get_arm_condition_code (x)],
15137 stream);
15138 return;
15140 case 'D':
15141 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15142 want to do that. */
15143 if (x == const_true_rtx)
15145 output_operand_lossage ("instruction never executed");
15146 return;
15148 if (!COMPARISON_P (x))
15150 output_operand_lossage ("invalid operand for code '%c'", code);
15151 return;
15154 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15155 (get_arm_condition_code (x))],
15156 stream);
15157 return;
15159 /* Cirrus registers can be accessed in a variety of ways:
15160 single floating point (f)
15161 double floating point (d)
15162 32bit integer (fx)
15163 64bit integer (dx). */
15164 case 'W': /* Cirrus register in F mode. */
15165 case 'X': /* Cirrus register in D mode. */
15166 case 'Y': /* Cirrus register in FX mode. */
15167 case 'Z': /* Cirrus register in DX mode. */
15168 gcc_assert (GET_CODE (x) == REG
15169 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15171 fprintf (stream, "mv%s%s",
15172 code == 'W' ? "f"
15173 : code == 'X' ? "d"
15174 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15176 return;
15178 /* Print cirrus register in the mode specified by the register's mode. */
15179 case 'V':
15181 int mode = GET_MODE (x);
15183 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15185 output_operand_lossage ("invalid operand for code '%c'", code);
15186 return;
15189 fprintf (stream, "mv%s%s",
15190 mode == DFmode ? "d"
15191 : mode == SImode ? "fx"
15192 : mode == DImode ? "dx"
15193 : "f", reg_names[REGNO (x)] + 2);
15195 return;
15198 case 'U':
15199 if (GET_CODE (x) != REG
15200 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15201 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15202 /* Bad value for wCG register number. */
15204 output_operand_lossage ("invalid operand for code '%c'", code);
15205 return;
15208 else
15209 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15210 return;
15212 /* Print an iWMMXt control register name. */
15213 case 'w':
15214 if (GET_CODE (x) != CONST_INT
15215 || INTVAL (x) < 0
15216 || INTVAL (x) >= 16)
15217 /* Bad value for wC register number. */
15219 output_operand_lossage ("invalid operand for code '%c'", code);
15220 return;
15223 else
15225 static const char * wc_reg_names [16] =
15227 "wCID", "wCon", "wCSSF", "wCASF",
15228 "wC4", "wC5", "wC6", "wC7",
15229 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15230 "wC12", "wC13", "wC14", "wC15"
15233 fprintf (stream, wc_reg_names [INTVAL (x)]);
15235 return;
15237 /* Print the high single-precision register of a VFP double-precision
15238 register. */
15239 case 'p':
15241 int mode = GET_MODE (x);
15242 int regno;
15244 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15246 output_operand_lossage ("invalid operand for code '%c'", code);
15247 return;
15250 regno = REGNO (x);
15251 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15253 output_operand_lossage ("invalid operand for code '%c'", code);
15254 return;
15257 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15259 return;
15261 /* Print a VFP/Neon double precision or quad precision register name. */
15262 case 'P':
15263 case 'q':
15265 int mode = GET_MODE (x);
15266 int is_quad = (code == 'q');
15267 int regno;
15269 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15271 output_operand_lossage ("invalid operand for code '%c'", code);
15272 return;
15275 if (GET_CODE (x) != REG
15276 || !IS_VFP_REGNUM (REGNO (x)))
15278 output_operand_lossage ("invalid operand for code '%c'", code);
15279 return;
15282 regno = REGNO (x);
15283 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15284 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15286 output_operand_lossage ("invalid operand for code '%c'", code);
15287 return;
15290 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15291 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15293 return;
15295 /* These two codes print the low/high doubleword register of a Neon quad
15296 register, respectively. For pair-structure types, can also print
15297 low/high quadword registers. */
15298 case 'e':
15299 case 'f':
15301 int mode = GET_MODE (x);
15302 int regno;
15304 if ((GET_MODE_SIZE (mode) != 16
15305 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15307 output_operand_lossage ("invalid operand for code '%c'", code);
15308 return;
15311 regno = REGNO (x);
15312 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15314 output_operand_lossage ("invalid operand for code '%c'", code);
15315 return;
15318 if (GET_MODE_SIZE (mode) == 16)
15319 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15320 + (code == 'f' ? 1 : 0));
15321 else
15322 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15323 + (code == 'f' ? 1 : 0));
15325 return;
15327 /* Print a VFPv3 floating-point constant, represented as an integer
15328 index. */
15329 case 'G':
15331 int index = vfp3_const_double_index (x);
15332 gcc_assert (index != -1);
15333 fprintf (stream, "%d", index);
15335 return;
15337 /* Print bits representing opcode features for Neon.
15339 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15340 and polynomials as unsigned.
15342 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15344 Bit 2 is 1 for rounding functions, 0 otherwise. */
15346 /* Identify the type as 's', 'u', 'p' or 'f'. */
15347 case 'T':
15349 HOST_WIDE_INT bits = INTVAL (x);
15350 fputc ("uspf"[bits & 3], stream);
15352 return;
15354 /* Likewise, but signed and unsigned integers are both 'i'. */
15355 case 'F':
15357 HOST_WIDE_INT bits = INTVAL (x);
15358 fputc ("iipf"[bits & 3], stream);
15360 return;
15362 /* As for 'T', but emit 'u' instead of 'p'. */
15363 case 't':
15365 HOST_WIDE_INT bits = INTVAL (x);
15366 fputc ("usuf"[bits & 3], stream);
15368 return;
15370 /* Bit 2: rounding (vs none). */
15371 case 'O':
15373 HOST_WIDE_INT bits = INTVAL (x);
15374 fputs ((bits & 4) != 0 ? "r" : "", stream);
15376 return;
15378 /* Memory operand for vld1/vst1 instruction. */
15379 case 'A':
15381 rtx addr;
15382 bool postinc = FALSE;
15383 gcc_assert (GET_CODE (x) == MEM);
15384 addr = XEXP (x, 0);
15385 if (GET_CODE (addr) == POST_INC)
15387 postinc = 1;
15388 addr = XEXP (addr, 0);
15390 asm_fprintf (stream, "[%r]", REGNO (addr));
15391 if (postinc)
15392 fputs("!", stream);
15394 return;
15396 /* Translate an S register number into a D register number and element index. */
15397 case 'y':
15399 int mode = GET_MODE (x);
15400 int regno;
15402 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15404 output_operand_lossage ("invalid operand for code '%c'", code);
15405 return;
15408 regno = REGNO (x);
15409 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15411 output_operand_lossage ("invalid operand for code '%c'", code);
15412 return;
15415 regno = regno - FIRST_VFP_REGNUM;
15416 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15418 return;
15420 /* Register specifier for vld1.16/vst1.16. Translate the S register
15421 number into a D register number and element index. */
15422 case 'z':
15424 int mode = GET_MODE (x);
15425 int regno;
15427 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15429 output_operand_lossage ("invalid operand for code '%c'", code);
15430 return;
15433 regno = REGNO (x);
15434 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15436 output_operand_lossage ("invalid operand for code '%c'", code);
15437 return;
15440 regno = regno - FIRST_VFP_REGNUM;
15441 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15443 return;
15445 default:
15446 if (x == 0)
15448 output_operand_lossage ("missing operand");
15449 return;
15452 switch (GET_CODE (x))
15454 case REG:
15455 asm_fprintf (stream, "%r", REGNO (x));
15456 break;
15458 case MEM:
15459 output_memory_reference_mode = GET_MODE (x);
15460 output_address (XEXP (x, 0));
15461 break;
15463 case CONST_DOUBLE:
15464 if (TARGET_NEON)
15466 char fpstr[20];
15467 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15468 sizeof (fpstr), 0, 1);
15469 fprintf (stream, "#%s", fpstr);
15471 else
15472 fprintf (stream, "#%s", fp_immediate_constant (x));
15473 break;
15475 default:
15476 gcc_assert (GET_CODE (x) != NEG);
15477 fputc ('#', stream);
15478 if (GET_CODE (x) == HIGH)
15480 fputs (":lower16:", stream);
15481 x = XEXP (x, 0);
15484 output_addr_const (stream, x);
15485 break;
15490 /* Target hook for assembling integer objects. The ARM version needs to
15491 handle word-sized values specially. */
15492 static bool
15493 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15495 enum machine_mode mode;
15497 if (size == UNITS_PER_WORD && aligned_p)
15499 fputs ("\t.word\t", asm_out_file);
15500 output_addr_const (asm_out_file, x);
15502 /* Mark symbols as position independent. We only do this in the
15503 .text segment, not in the .data segment. */
15504 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15505 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15507 /* See legitimize_pic_address for an explanation of the
15508 TARGET_VXWORKS_RTP check. */
15509 if (TARGET_VXWORKS_RTP
15510 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15511 fputs ("(GOT)", asm_out_file);
15512 else
15513 fputs ("(GOTOFF)", asm_out_file);
15515 fputc ('\n', asm_out_file);
15516 return true;
15519 mode = GET_MODE (x);
15521 if (arm_vector_mode_supported_p (mode))
15523 int i, units;
15525 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15527 units = CONST_VECTOR_NUNITS (x);
15528 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15530 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15531 for (i = 0; i < units; i++)
15533 rtx elt = CONST_VECTOR_ELT (x, i);
15534 assemble_integer
15535 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15537 else
15538 for (i = 0; i < units; i++)
15540 rtx elt = CONST_VECTOR_ELT (x, i);
15541 REAL_VALUE_TYPE rval;
15543 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15545 assemble_real
15546 (rval, GET_MODE_INNER (mode),
15547 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15550 return true;
15553 return default_assemble_integer (x, size, aligned_p);
15556 static void
15557 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15559 section *s;
15561 if (!TARGET_AAPCS_BASED)
15563 (is_ctor ?
15564 default_named_section_asm_out_constructor
15565 : default_named_section_asm_out_destructor) (symbol, priority);
15566 return;
15569 /* Put these in the .init_array section, using a special relocation. */
15570 if (priority != DEFAULT_INIT_PRIORITY)
15572 char buf[18];
15573 sprintf (buf, "%s.%.5u",
15574 is_ctor ? ".init_array" : ".fini_array",
15575 priority);
15576 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15578 else if (is_ctor)
15579 s = ctors_section;
15580 else
15581 s = dtors_section;
15583 switch_to_section (s);
15584 assemble_align (POINTER_SIZE);
15585 fputs ("\t.word\t", asm_out_file);
15586 output_addr_const (asm_out_file, symbol);
15587 fputs ("(target1)\n", asm_out_file);
15590 /* Add a function to the list of static constructors. */
15592 static void
15593 arm_elf_asm_constructor (rtx symbol, int priority)
15595 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15598 /* Add a function to the list of static destructors. */
15600 static void
15601 arm_elf_asm_destructor (rtx symbol, int priority)
15603 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15606 /* A finite state machine takes care of noticing whether or not instructions
15607 can be conditionally executed, and thus decrease execution time and code
15608 size by deleting branch instructions. The fsm is controlled by
15609 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15611 /* The state of the fsm controlling condition codes are:
15612 0: normal, do nothing special
15613 1: make ASM_OUTPUT_OPCODE not output this instruction
15614 2: make ASM_OUTPUT_OPCODE not output this instruction
15615 3: make instructions conditional
15616 4: make instructions conditional
15618 State transitions (state->state by whom under condition):
15619 0 -> 1 final_prescan_insn if the `target' is a label
15620 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15621 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15622 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15623 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15624 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15625 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15626 (the target insn is arm_target_insn).
15628 If the jump clobbers the conditions then we use states 2 and 4.
15630 A similar thing can be done with conditional return insns.
15632 XXX In case the `target' is an unconditional branch, this conditionalising
15633 of the instructions always reduces code size, but not always execution
15634 time. But then, I want to reduce the code size to somewhere near what
15635 /bin/cc produces. */
15637 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15638 instructions. When a COND_EXEC instruction is seen the subsequent
15639 instructions are scanned so that multiple conditional instructions can be
15640 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15641 specify the length and true/false mask for the IT block. These will be
15642 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15644 /* Returns the index of the ARM condition code string in
15645 `arm_condition_codes'. COMPARISON should be an rtx like
15646 `(eq (...) (...))'. */
15647 static enum arm_cond_code
15648 get_arm_condition_code (rtx comparison)
15650 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15651 enum arm_cond_code code;
15652 enum rtx_code comp_code = GET_CODE (comparison);
15654 if (GET_MODE_CLASS (mode) != MODE_CC)
15655 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15656 XEXP (comparison, 1));
15658 switch (mode)
15660 case CC_DNEmode: code = ARM_NE; goto dominance;
15661 case CC_DEQmode: code = ARM_EQ; goto dominance;
15662 case CC_DGEmode: code = ARM_GE; goto dominance;
15663 case CC_DGTmode: code = ARM_GT; goto dominance;
15664 case CC_DLEmode: code = ARM_LE; goto dominance;
15665 case CC_DLTmode: code = ARM_LT; goto dominance;
15666 case CC_DGEUmode: code = ARM_CS; goto dominance;
15667 case CC_DGTUmode: code = ARM_HI; goto dominance;
15668 case CC_DLEUmode: code = ARM_LS; goto dominance;
15669 case CC_DLTUmode: code = ARM_CC;
15671 dominance:
15672 gcc_assert (comp_code == EQ || comp_code == NE);
15674 if (comp_code == EQ)
15675 return ARM_INVERSE_CONDITION_CODE (code);
15676 return code;
15678 case CC_NOOVmode:
15679 switch (comp_code)
15681 case NE: return ARM_NE;
15682 case EQ: return ARM_EQ;
15683 case GE: return ARM_PL;
15684 case LT: return ARM_MI;
15685 default: gcc_unreachable ();
15688 case CC_Zmode:
15689 switch (comp_code)
15691 case NE: return ARM_NE;
15692 case EQ: return ARM_EQ;
15693 default: gcc_unreachable ();
15696 case CC_Nmode:
15697 switch (comp_code)
15699 case NE: return ARM_MI;
15700 case EQ: return ARM_PL;
15701 default: gcc_unreachable ();
15704 case CCFPEmode:
15705 case CCFPmode:
15706 /* These encodings assume that AC=1 in the FPA system control
15707 byte. This allows us to handle all cases except UNEQ and
15708 LTGT. */
15709 switch (comp_code)
15711 case GE: return ARM_GE;
15712 case GT: return ARM_GT;
15713 case LE: return ARM_LS;
15714 case LT: return ARM_MI;
15715 case NE: return ARM_NE;
15716 case EQ: return ARM_EQ;
15717 case ORDERED: return ARM_VC;
15718 case UNORDERED: return ARM_VS;
15719 case UNLT: return ARM_LT;
15720 case UNLE: return ARM_LE;
15721 case UNGT: return ARM_HI;
15722 case UNGE: return ARM_PL;
15723 /* UNEQ and LTGT do not have a representation. */
15724 case UNEQ: /* Fall through. */
15725 case LTGT: /* Fall through. */
15726 default: gcc_unreachable ();
15729 case CC_SWPmode:
15730 switch (comp_code)
15732 case NE: return ARM_NE;
15733 case EQ: return ARM_EQ;
15734 case GE: return ARM_LE;
15735 case GT: return ARM_LT;
15736 case LE: return ARM_GE;
15737 case LT: return ARM_GT;
15738 case GEU: return ARM_LS;
15739 case GTU: return ARM_CC;
15740 case LEU: return ARM_CS;
15741 case LTU: return ARM_HI;
15742 default: gcc_unreachable ();
15745 case CC_Cmode:
15746 switch (comp_code)
15748 case LTU: return ARM_CS;
15749 case GEU: return ARM_CC;
15750 default: gcc_unreachable ();
15753 case CCmode:
15754 switch (comp_code)
15756 case NE: return ARM_NE;
15757 case EQ: return ARM_EQ;
15758 case GE: return ARM_GE;
15759 case GT: return ARM_GT;
15760 case LE: return ARM_LE;
15761 case LT: return ARM_LT;
15762 case GEU: return ARM_CS;
15763 case GTU: return ARM_HI;
15764 case LEU: return ARM_LS;
15765 case LTU: return ARM_CC;
15766 default: gcc_unreachable ();
15769 default: gcc_unreachable ();
15773 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15774 instructions. */
15775 void
15776 thumb2_final_prescan_insn (rtx insn)
15778 rtx first_insn = insn;
15779 rtx body = PATTERN (insn);
15780 rtx predicate;
15781 enum arm_cond_code code;
15782 int n;
15783 int mask;
15785 /* Remove the previous insn from the count of insns to be output. */
15786 if (arm_condexec_count)
15787 arm_condexec_count--;
15789 /* Nothing to do if we are already inside a conditional block. */
15790 if (arm_condexec_count)
15791 return;
15793 if (GET_CODE (body) != COND_EXEC)
15794 return;
15796 /* Conditional jumps are implemented directly. */
15797 if (GET_CODE (insn) == JUMP_INSN)
15798 return;
15800 predicate = COND_EXEC_TEST (body);
15801 arm_current_cc = get_arm_condition_code (predicate);
15803 n = get_attr_ce_count (insn);
15804 arm_condexec_count = 1;
15805 arm_condexec_mask = (1 << n) - 1;
15806 arm_condexec_masklen = n;
15807 /* See if subsequent instructions can be combined into the same block. */
15808 for (;;)
15810 insn = next_nonnote_insn (insn);
15812 /* Jumping into the middle of an IT block is illegal, so a label or
15813 barrier terminates the block. */
15814 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
15815 break;
15817 body = PATTERN (insn);
15818 /* USE and CLOBBER aren't really insns, so just skip them. */
15819 if (GET_CODE (body) == USE
15820 || GET_CODE (body) == CLOBBER)
15821 continue;
15823 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
15824 if (GET_CODE (body) != COND_EXEC)
15825 break;
15826 /* Allow up to 4 conditionally executed instructions in a block. */
15827 n = get_attr_ce_count (insn);
15828 if (arm_condexec_masklen + n > 4)
15829 break;
15831 predicate = COND_EXEC_TEST (body);
15832 code = get_arm_condition_code (predicate);
15833 mask = (1 << n) - 1;
15834 if (arm_current_cc == code)
15835 arm_condexec_mask |= (mask << arm_condexec_masklen);
15836 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
15837 break;
15839 arm_condexec_count++;
15840 arm_condexec_masklen += n;
15842 /* A jump must be the last instruction in a conditional block. */
15843 if (GET_CODE(insn) == JUMP_INSN)
15844 break;
15846 /* Restore recog_data (getting the attributes of other insns can
15847 destroy this array, but final.c assumes that it remains intact
15848 across this call). */
15849 extract_constrain_insn_cached (first_insn);
15852 void
15853 arm_final_prescan_insn (rtx insn)
15855 /* BODY will hold the body of INSN. */
15856 rtx body = PATTERN (insn);
15858 /* This will be 1 if trying to repeat the trick, and things need to be
15859 reversed if it appears to fail. */
15860 int reverse = 0;
15862 /* If we start with a return insn, we only succeed if we find another one. */
15863 int seeking_return = 0;
15865 /* START_INSN will hold the insn from where we start looking. This is the
15866 first insn after the following code_label if REVERSE is true. */
15867 rtx start_insn = insn;
15869 /* If in state 4, check if the target branch is reached, in order to
15870 change back to state 0. */
15871 if (arm_ccfsm_state == 4)
15873 if (insn == arm_target_insn)
15875 arm_target_insn = NULL;
15876 arm_ccfsm_state = 0;
15878 return;
15881 /* If in state 3, it is possible to repeat the trick, if this insn is an
15882 unconditional branch to a label, and immediately following this branch
15883 is the previous target label which is only used once, and the label this
15884 branch jumps to is not too far off. */
15885 if (arm_ccfsm_state == 3)
15887 if (simplejump_p (insn))
15889 start_insn = next_nonnote_insn (start_insn);
15890 if (GET_CODE (start_insn) == BARRIER)
15892 /* XXX Isn't this always a barrier? */
15893 start_insn = next_nonnote_insn (start_insn);
15895 if (GET_CODE (start_insn) == CODE_LABEL
15896 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15897 && LABEL_NUSES (start_insn) == 1)
15898 reverse = TRUE;
15899 else
15900 return;
15902 else if (GET_CODE (body) == RETURN)
15904 start_insn = next_nonnote_insn (start_insn);
15905 if (GET_CODE (start_insn) == BARRIER)
15906 start_insn = next_nonnote_insn (start_insn);
15907 if (GET_CODE (start_insn) == CODE_LABEL
15908 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15909 && LABEL_NUSES (start_insn) == 1)
15911 reverse = TRUE;
15912 seeking_return = 1;
15914 else
15915 return;
15917 else
15918 return;
15921 gcc_assert (!arm_ccfsm_state || reverse);
15922 if (GET_CODE (insn) != JUMP_INSN)
15923 return;
15925 /* This jump might be paralleled with a clobber of the condition codes
15926 the jump should always come first */
15927 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
15928 body = XVECEXP (body, 0, 0);
15930 if (reverse
15931 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
15932 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
15934 int insns_skipped;
15935 int fail = FALSE, succeed = FALSE;
15936 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
15937 int then_not_else = TRUE;
15938 rtx this_insn = start_insn, label = 0;
15940 /* Register the insn jumped to. */
15941 if (reverse)
15943 if (!seeking_return)
15944 label = XEXP (SET_SRC (body), 0);
15946 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
15947 label = XEXP (XEXP (SET_SRC (body), 1), 0);
15948 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
15950 label = XEXP (XEXP (SET_SRC (body), 2), 0);
15951 then_not_else = FALSE;
15953 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
15954 seeking_return = 1;
15955 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
15957 seeking_return = 1;
15958 then_not_else = FALSE;
15960 else
15961 gcc_unreachable ();
15963 /* See how many insns this branch skips, and what kind of insns. If all
15964 insns are okay, and the label or unconditional branch to the same
15965 label is not too far away, succeed. */
15966 for (insns_skipped = 0;
15967 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
15969 rtx scanbody;
15971 this_insn = next_nonnote_insn (this_insn);
15972 if (!this_insn)
15973 break;
15975 switch (GET_CODE (this_insn))
15977 case CODE_LABEL:
15978 /* Succeed if it is the target label, otherwise fail since
15979 control falls in from somewhere else. */
15980 if (this_insn == label)
15982 arm_ccfsm_state = 1;
15983 succeed = TRUE;
15985 else
15986 fail = TRUE;
15987 break;
15989 case BARRIER:
15990 /* Succeed if the following insn is the target label.
15991 Otherwise fail.
15992 If return insns are used then the last insn in a function
15993 will be a barrier. */
15994 this_insn = next_nonnote_insn (this_insn);
15995 if (this_insn && this_insn == label)
15997 arm_ccfsm_state = 1;
15998 succeed = TRUE;
16000 else
16001 fail = TRUE;
16002 break;
16004 case CALL_INSN:
16005 /* The AAPCS says that conditional calls should not be
16006 used since they make interworking inefficient (the
16007 linker can't transform BL<cond> into BLX). That's
16008 only a problem if the machine has BLX. */
16009 if (arm_arch5)
16011 fail = TRUE;
16012 break;
16015 /* Succeed if the following insn is the target label, or
16016 if the following two insns are a barrier and the
16017 target label. */
16018 this_insn = next_nonnote_insn (this_insn);
16019 if (this_insn && GET_CODE (this_insn) == BARRIER)
16020 this_insn = next_nonnote_insn (this_insn);
16022 if (this_insn && this_insn == label
16023 && insns_skipped < max_insns_skipped)
16025 arm_ccfsm_state = 1;
16026 succeed = TRUE;
16028 else
16029 fail = TRUE;
16030 break;
16032 case JUMP_INSN:
16033 /* If this is an unconditional branch to the same label, succeed.
16034 If it is to another label, do nothing. If it is conditional,
16035 fail. */
16036 /* XXX Probably, the tests for SET and the PC are
16037 unnecessary. */
16039 scanbody = PATTERN (this_insn);
16040 if (GET_CODE (scanbody) == SET
16041 && GET_CODE (SET_DEST (scanbody)) == PC)
16043 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16044 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16046 arm_ccfsm_state = 2;
16047 succeed = TRUE;
16049 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16050 fail = TRUE;
16052 /* Fail if a conditional return is undesirable (e.g. on a
16053 StrongARM), but still allow this if optimizing for size. */
16054 else if (GET_CODE (scanbody) == RETURN
16055 && !use_return_insn (TRUE, NULL)
16056 && !optimize_size)
16057 fail = TRUE;
16058 else if (GET_CODE (scanbody) == RETURN
16059 && seeking_return)
16061 arm_ccfsm_state = 2;
16062 succeed = TRUE;
16064 else if (GET_CODE (scanbody) == PARALLEL)
16066 switch (get_attr_conds (this_insn))
16068 case CONDS_NOCOND:
16069 break;
16070 default:
16071 fail = TRUE;
16072 break;
16075 else
16076 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16078 break;
16080 case INSN:
16081 /* Instructions using or affecting the condition codes make it
16082 fail. */
16083 scanbody = PATTERN (this_insn);
16084 if (!(GET_CODE (scanbody) == SET
16085 || GET_CODE (scanbody) == PARALLEL)
16086 || get_attr_conds (this_insn) != CONDS_NOCOND)
16087 fail = TRUE;
16089 /* A conditional cirrus instruction must be followed by
16090 a non Cirrus instruction. However, since we
16091 conditionalize instructions in this function and by
16092 the time we get here we can't add instructions
16093 (nops), because shorten_branches() has already been
16094 called, we will disable conditionalizing Cirrus
16095 instructions to be safe. */
16096 if (GET_CODE (scanbody) != USE
16097 && GET_CODE (scanbody) != CLOBBER
16098 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16099 fail = TRUE;
16100 break;
16102 default:
16103 break;
16106 if (succeed)
16108 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16109 arm_target_label = CODE_LABEL_NUMBER (label);
16110 else
16112 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16114 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16116 this_insn = next_nonnote_insn (this_insn);
16117 gcc_assert (!this_insn
16118 || (GET_CODE (this_insn) != BARRIER
16119 && GET_CODE (this_insn) != CODE_LABEL));
16121 if (!this_insn)
16123 /* Oh, dear! we ran off the end.. give up. */
16124 extract_constrain_insn_cached (insn);
16125 arm_ccfsm_state = 0;
16126 arm_target_insn = NULL;
16127 return;
16129 arm_target_insn = this_insn;
16132 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16133 what it was. */
16134 if (!reverse)
16135 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16137 if (reverse || then_not_else)
16138 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16141 /* Restore recog_data (getting the attributes of other insns can
16142 destroy this array, but final.c assumes that it remains intact
16143 across this call. */
16144 extract_constrain_insn_cached (insn);
16148 /* Output IT instructions. */
16149 void
16150 thumb2_asm_output_opcode (FILE * stream)
16152 char buff[5];
16153 int n;
16155 if (arm_condexec_mask)
16157 for (n = 0; n < arm_condexec_masklen; n++)
16158 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16159 buff[n] = 0;
16160 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16161 arm_condition_codes[arm_current_cc]);
16162 arm_condexec_mask = 0;
16166 /* Returns true if REGNO is a valid register
16167 for holding a quantity of type MODE. */
16169 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16171 if (GET_MODE_CLASS (mode) == MODE_CC)
16172 return (regno == CC_REGNUM
16173 || (TARGET_HARD_FLOAT && TARGET_VFP
16174 && regno == VFPCC_REGNUM));
16176 if (TARGET_THUMB1)
16177 /* For the Thumb we only allow values bigger than SImode in
16178 registers 0 - 6, so that there is always a second low
16179 register available to hold the upper part of the value.
16180 We probably we ought to ensure that the register is the
16181 start of an even numbered register pair. */
16182 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16184 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16185 && IS_CIRRUS_REGNUM (regno))
16186 /* We have outlawed SI values in Cirrus registers because they
16187 reside in the lower 32 bits, but SF values reside in the
16188 upper 32 bits. This causes gcc all sorts of grief. We can't
16189 even split the registers into pairs because Cirrus SI values
16190 get sign extended to 64bits-- aldyh. */
16191 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16193 if (TARGET_HARD_FLOAT && TARGET_VFP
16194 && IS_VFP_REGNUM (regno))
16196 if (mode == SFmode || mode == SImode)
16197 return VFP_REGNO_OK_FOR_SINGLE (regno);
16199 if (mode == DFmode)
16200 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16202 /* VFP registers can hold HFmode values, but there is no point in
16203 putting them there unless we have hardware conversion insns. */
16204 if (mode == HFmode)
16205 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16207 if (TARGET_NEON)
16208 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16209 || (VALID_NEON_QREG_MODE (mode)
16210 && NEON_REGNO_OK_FOR_QUAD (regno))
16211 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16212 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16213 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16214 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16215 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16217 return FALSE;
16220 if (TARGET_REALLY_IWMMXT)
16222 if (IS_IWMMXT_GR_REGNUM (regno))
16223 return mode == SImode;
16225 if (IS_IWMMXT_REGNUM (regno))
16226 return VALID_IWMMXT_REG_MODE (mode);
16229 /* We allow almost any value to be stored in the general registers.
16230 Restrict doubleword quantities to even register pairs so that we can
16231 use ldrd. Do not allow very large Neon structure opaque modes in
16232 general registers; they would use too many. */
16233 if (regno <= LAST_ARM_REGNUM)
16234 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16235 && ARM_NUM_REGS (mode) <= 4;
16237 if (regno == FRAME_POINTER_REGNUM
16238 || regno == ARG_POINTER_REGNUM)
16239 /* We only allow integers in the fake hard registers. */
16240 return GET_MODE_CLASS (mode) == MODE_INT;
16242 /* The only registers left are the FPA registers
16243 which we only allow to hold FP values. */
16244 return (TARGET_HARD_FLOAT && TARGET_FPA
16245 && GET_MODE_CLASS (mode) == MODE_FLOAT
16246 && regno >= FIRST_FPA_REGNUM
16247 && regno <= LAST_FPA_REGNUM);
16250 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16251 not used in arm mode. */
16253 enum reg_class
16254 arm_regno_class (int regno)
16256 if (TARGET_THUMB1)
16258 if (regno == STACK_POINTER_REGNUM)
16259 return STACK_REG;
16260 if (regno == CC_REGNUM)
16261 return CC_REG;
16262 if (regno < 8)
16263 return LO_REGS;
16264 return HI_REGS;
16267 if (TARGET_THUMB2 && regno < 8)
16268 return LO_REGS;
16270 if ( regno <= LAST_ARM_REGNUM
16271 || regno == FRAME_POINTER_REGNUM
16272 || regno == ARG_POINTER_REGNUM)
16273 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16275 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16276 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16278 if (IS_CIRRUS_REGNUM (regno))
16279 return CIRRUS_REGS;
16281 if (IS_VFP_REGNUM (regno))
16283 if (regno <= D7_VFP_REGNUM)
16284 return VFP_D0_D7_REGS;
16285 else if (regno <= LAST_LO_VFP_REGNUM)
16286 return VFP_LO_REGS;
16287 else
16288 return VFP_HI_REGS;
16291 if (IS_IWMMXT_REGNUM (regno))
16292 return IWMMXT_REGS;
16294 if (IS_IWMMXT_GR_REGNUM (regno))
16295 return IWMMXT_GR_REGS;
16297 return FPA_REGS;
16300 /* Handle a special case when computing the offset
16301 of an argument from the frame pointer. */
16303 arm_debugger_arg_offset (int value, rtx addr)
16305 rtx insn;
16307 /* We are only interested if dbxout_parms() failed to compute the offset. */
16308 if (value != 0)
16309 return 0;
16311 /* We can only cope with the case where the address is held in a register. */
16312 if (GET_CODE (addr) != REG)
16313 return 0;
16315 /* If we are using the frame pointer to point at the argument, then
16316 an offset of 0 is correct. */
16317 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16318 return 0;
16320 /* If we are using the stack pointer to point at the
16321 argument, then an offset of 0 is correct. */
16322 /* ??? Check this is consistent with thumb2 frame layout. */
16323 if ((TARGET_THUMB || !frame_pointer_needed)
16324 && REGNO (addr) == SP_REGNUM)
16325 return 0;
16327 /* Oh dear. The argument is pointed to by a register rather
16328 than being held in a register, or being stored at a known
16329 offset from the frame pointer. Since GDB only understands
16330 those two kinds of argument we must translate the address
16331 held in the register into an offset from the frame pointer.
16332 We do this by searching through the insns for the function
16333 looking to see where this register gets its value. If the
16334 register is initialized from the frame pointer plus an offset
16335 then we are in luck and we can continue, otherwise we give up.
16337 This code is exercised by producing debugging information
16338 for a function with arguments like this:
16340 double func (double a, double b, int c, double d) {return d;}
16342 Without this code the stab for parameter 'd' will be set to
16343 an offset of 0 from the frame pointer, rather than 8. */
16345 /* The if() statement says:
16347 If the insn is a normal instruction
16348 and if the insn is setting the value in a register
16349 and if the register being set is the register holding the address of the argument
16350 and if the address is computing by an addition
16351 that involves adding to a register
16352 which is the frame pointer
16353 a constant integer
16355 then... */
16357 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16359 if ( GET_CODE (insn) == INSN
16360 && GET_CODE (PATTERN (insn)) == SET
16361 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16362 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16363 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16364 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16365 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16368 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16370 break;
16374 if (value == 0)
16376 debug_rtx (addr);
16377 warning (0, "unable to compute real location of stacked parameter");
16378 value = 8; /* XXX magic hack */
16381 return value;
16384 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16385 do \
16387 if ((MASK) & insn_flags) \
16388 add_builtin_function ((NAME), (TYPE), (CODE), \
16389 BUILT_IN_MD, NULL, NULL_TREE); \
16391 while (0)
16393 struct builtin_description
16395 const unsigned int mask;
16396 const enum insn_code icode;
16397 const char * const name;
16398 const enum arm_builtins code;
16399 const enum rtx_code comparison;
16400 const unsigned int flag;
16403 static const struct builtin_description bdesc_2arg[] =
16405 #define IWMMXT_BUILTIN(code, string, builtin) \
16406 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16407 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16409 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16410 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16411 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16412 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16413 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16414 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16415 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16416 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16417 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16418 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16419 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16420 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16421 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16422 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16423 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16424 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16425 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16426 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16427 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16428 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16429 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16430 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16431 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16432 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16433 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16434 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16435 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16436 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16437 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16438 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16439 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16440 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16441 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16442 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16443 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16444 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16445 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16446 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16447 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16448 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16449 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16450 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16451 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16452 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16453 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16454 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16455 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16456 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16457 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16458 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16459 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16460 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16461 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16462 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16463 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16464 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16465 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16466 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16468 #define IWMMXT_BUILTIN2(code, builtin) \
16469 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16471 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16472 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16473 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16474 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16475 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16476 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16477 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16478 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16479 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16480 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16481 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16482 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16483 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16484 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16485 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16486 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16487 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16488 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16489 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16490 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16491 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16492 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16493 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16494 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16495 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16496 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16497 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16498 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16499 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16500 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16501 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16502 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16505 static const struct builtin_description bdesc_1arg[] =
16507 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16508 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16509 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16510 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16511 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16512 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16513 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16514 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16515 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16516 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16517 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16518 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16519 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16520 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16521 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16522 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16523 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16524 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16527 /* Set up all the iWMMXt builtins. This is
16528 not called if TARGET_IWMMXT is zero. */
16530 static void
16531 arm_init_iwmmxt_builtins (void)
16533 const struct builtin_description * d;
16534 size_t i;
16535 tree endlink = void_list_node;
16537 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16538 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16539 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16541 tree int_ftype_int
16542 = build_function_type (integer_type_node,
16543 tree_cons (NULL_TREE, integer_type_node, endlink));
16544 tree v8qi_ftype_v8qi_v8qi_int
16545 = build_function_type (V8QI_type_node,
16546 tree_cons (NULL_TREE, V8QI_type_node,
16547 tree_cons (NULL_TREE, V8QI_type_node,
16548 tree_cons (NULL_TREE,
16549 integer_type_node,
16550 endlink))));
16551 tree v4hi_ftype_v4hi_int
16552 = build_function_type (V4HI_type_node,
16553 tree_cons (NULL_TREE, V4HI_type_node,
16554 tree_cons (NULL_TREE, integer_type_node,
16555 endlink)));
16556 tree v2si_ftype_v2si_int
16557 = build_function_type (V2SI_type_node,
16558 tree_cons (NULL_TREE, V2SI_type_node,
16559 tree_cons (NULL_TREE, integer_type_node,
16560 endlink)));
16561 tree v2si_ftype_di_di
16562 = build_function_type (V2SI_type_node,
16563 tree_cons (NULL_TREE, long_long_integer_type_node,
16564 tree_cons (NULL_TREE, long_long_integer_type_node,
16565 endlink)));
16566 tree di_ftype_di_int
16567 = build_function_type (long_long_integer_type_node,
16568 tree_cons (NULL_TREE, long_long_integer_type_node,
16569 tree_cons (NULL_TREE, integer_type_node,
16570 endlink)));
16571 tree di_ftype_di_int_int
16572 = build_function_type (long_long_integer_type_node,
16573 tree_cons (NULL_TREE, long_long_integer_type_node,
16574 tree_cons (NULL_TREE, integer_type_node,
16575 tree_cons (NULL_TREE,
16576 integer_type_node,
16577 endlink))));
16578 tree int_ftype_v8qi
16579 = build_function_type (integer_type_node,
16580 tree_cons (NULL_TREE, V8QI_type_node,
16581 endlink));
16582 tree int_ftype_v4hi
16583 = build_function_type (integer_type_node,
16584 tree_cons (NULL_TREE, V4HI_type_node,
16585 endlink));
16586 tree int_ftype_v2si
16587 = build_function_type (integer_type_node,
16588 tree_cons (NULL_TREE, V2SI_type_node,
16589 endlink));
16590 tree int_ftype_v8qi_int
16591 = build_function_type (integer_type_node,
16592 tree_cons (NULL_TREE, V8QI_type_node,
16593 tree_cons (NULL_TREE, integer_type_node,
16594 endlink)));
16595 tree int_ftype_v4hi_int
16596 = build_function_type (integer_type_node,
16597 tree_cons (NULL_TREE, V4HI_type_node,
16598 tree_cons (NULL_TREE, integer_type_node,
16599 endlink)));
16600 tree int_ftype_v2si_int
16601 = build_function_type (integer_type_node,
16602 tree_cons (NULL_TREE, V2SI_type_node,
16603 tree_cons (NULL_TREE, integer_type_node,
16604 endlink)));
16605 tree v8qi_ftype_v8qi_int_int
16606 = build_function_type (V8QI_type_node,
16607 tree_cons (NULL_TREE, V8QI_type_node,
16608 tree_cons (NULL_TREE, integer_type_node,
16609 tree_cons (NULL_TREE,
16610 integer_type_node,
16611 endlink))));
16612 tree v4hi_ftype_v4hi_int_int
16613 = build_function_type (V4HI_type_node,
16614 tree_cons (NULL_TREE, V4HI_type_node,
16615 tree_cons (NULL_TREE, integer_type_node,
16616 tree_cons (NULL_TREE,
16617 integer_type_node,
16618 endlink))));
16619 tree v2si_ftype_v2si_int_int
16620 = build_function_type (V2SI_type_node,
16621 tree_cons (NULL_TREE, V2SI_type_node,
16622 tree_cons (NULL_TREE, integer_type_node,
16623 tree_cons (NULL_TREE,
16624 integer_type_node,
16625 endlink))));
16626 /* Miscellaneous. */
16627 tree v8qi_ftype_v4hi_v4hi
16628 = build_function_type (V8QI_type_node,
16629 tree_cons (NULL_TREE, V4HI_type_node,
16630 tree_cons (NULL_TREE, V4HI_type_node,
16631 endlink)));
16632 tree v4hi_ftype_v2si_v2si
16633 = build_function_type (V4HI_type_node,
16634 tree_cons (NULL_TREE, V2SI_type_node,
16635 tree_cons (NULL_TREE, V2SI_type_node,
16636 endlink)));
16637 tree v2si_ftype_v4hi_v4hi
16638 = build_function_type (V2SI_type_node,
16639 tree_cons (NULL_TREE, V4HI_type_node,
16640 tree_cons (NULL_TREE, V4HI_type_node,
16641 endlink)));
16642 tree v2si_ftype_v8qi_v8qi
16643 = build_function_type (V2SI_type_node,
16644 tree_cons (NULL_TREE, V8QI_type_node,
16645 tree_cons (NULL_TREE, V8QI_type_node,
16646 endlink)));
16647 tree v4hi_ftype_v4hi_di
16648 = build_function_type (V4HI_type_node,
16649 tree_cons (NULL_TREE, V4HI_type_node,
16650 tree_cons (NULL_TREE,
16651 long_long_integer_type_node,
16652 endlink)));
16653 tree v2si_ftype_v2si_di
16654 = build_function_type (V2SI_type_node,
16655 tree_cons (NULL_TREE, V2SI_type_node,
16656 tree_cons (NULL_TREE,
16657 long_long_integer_type_node,
16658 endlink)));
16659 tree void_ftype_int_int
16660 = build_function_type (void_type_node,
16661 tree_cons (NULL_TREE, integer_type_node,
16662 tree_cons (NULL_TREE, integer_type_node,
16663 endlink)));
16664 tree di_ftype_void
16665 = build_function_type (long_long_unsigned_type_node, endlink);
16666 tree di_ftype_v8qi
16667 = build_function_type (long_long_integer_type_node,
16668 tree_cons (NULL_TREE, V8QI_type_node,
16669 endlink));
16670 tree di_ftype_v4hi
16671 = build_function_type (long_long_integer_type_node,
16672 tree_cons (NULL_TREE, V4HI_type_node,
16673 endlink));
16674 tree di_ftype_v2si
16675 = build_function_type (long_long_integer_type_node,
16676 tree_cons (NULL_TREE, V2SI_type_node,
16677 endlink));
16678 tree v2si_ftype_v4hi
16679 = build_function_type (V2SI_type_node,
16680 tree_cons (NULL_TREE, V4HI_type_node,
16681 endlink));
16682 tree v4hi_ftype_v8qi
16683 = build_function_type (V4HI_type_node,
16684 tree_cons (NULL_TREE, V8QI_type_node,
16685 endlink));
16687 tree di_ftype_di_v4hi_v4hi
16688 = build_function_type (long_long_unsigned_type_node,
16689 tree_cons (NULL_TREE,
16690 long_long_unsigned_type_node,
16691 tree_cons (NULL_TREE, V4HI_type_node,
16692 tree_cons (NULL_TREE,
16693 V4HI_type_node,
16694 endlink))));
16696 tree di_ftype_v4hi_v4hi
16697 = build_function_type (long_long_unsigned_type_node,
16698 tree_cons (NULL_TREE, V4HI_type_node,
16699 tree_cons (NULL_TREE, V4HI_type_node,
16700 endlink)));
16702 /* Normal vector binops. */
16703 tree v8qi_ftype_v8qi_v8qi
16704 = build_function_type (V8QI_type_node,
16705 tree_cons (NULL_TREE, V8QI_type_node,
16706 tree_cons (NULL_TREE, V8QI_type_node,
16707 endlink)));
16708 tree v4hi_ftype_v4hi_v4hi
16709 = build_function_type (V4HI_type_node,
16710 tree_cons (NULL_TREE, V4HI_type_node,
16711 tree_cons (NULL_TREE, V4HI_type_node,
16712 endlink)));
16713 tree v2si_ftype_v2si_v2si
16714 = build_function_type (V2SI_type_node,
16715 tree_cons (NULL_TREE, V2SI_type_node,
16716 tree_cons (NULL_TREE, V2SI_type_node,
16717 endlink)));
16718 tree di_ftype_di_di
16719 = build_function_type (long_long_unsigned_type_node,
16720 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16721 tree_cons (NULL_TREE,
16722 long_long_unsigned_type_node,
16723 endlink)));
16725 /* Add all builtins that are more or less simple operations on two
16726 operands. */
16727 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16729 /* Use one of the operands; the target can have a different mode for
16730 mask-generating compares. */
16731 enum machine_mode mode;
16732 tree type;
16734 if (d->name == 0)
16735 continue;
16737 mode = insn_data[d->icode].operand[1].mode;
16739 switch (mode)
16741 case V8QImode:
16742 type = v8qi_ftype_v8qi_v8qi;
16743 break;
16744 case V4HImode:
16745 type = v4hi_ftype_v4hi_v4hi;
16746 break;
16747 case V2SImode:
16748 type = v2si_ftype_v2si_v2si;
16749 break;
16750 case DImode:
16751 type = di_ftype_di_di;
16752 break;
16754 default:
16755 gcc_unreachable ();
16758 def_mbuiltin (d->mask, d->name, type, d->code);
16761 /* Add the remaining MMX insns with somewhat more complicated types. */
16762 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16763 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16764 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16766 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16767 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16768 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16769 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16770 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16771 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16773 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16774 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16775 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16776 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16777 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16778 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16780 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16781 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16782 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16783 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16784 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16785 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16787 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16788 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16789 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16790 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16791 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16792 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16794 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16796 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16797 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16798 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16799 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16801 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16802 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16803 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16804 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16805 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
16806 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
16807 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
16808 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
16809 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
16811 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
16812 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
16813 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
16815 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
16816 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
16817 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
16819 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
16820 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
16821 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
16822 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
16823 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
16824 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
16826 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
16827 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
16828 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
16829 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
16830 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
16831 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
16832 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
16833 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
16834 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
16835 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
16836 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
16837 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
16839 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
16840 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
16841 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
16842 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
16844 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
16845 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
16846 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
16847 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
16848 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
16849 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
16850 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
16853 static void
16854 arm_init_tls_builtins (void)
16856 tree ftype, decl;
16858 ftype = build_function_type (ptr_type_node, void_list_node);
16859 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
16860 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
16861 NULL, NULL_TREE);
16862 TREE_NOTHROW (decl) = 1;
16863 TREE_READONLY (decl) = 1;
16866 enum neon_builtin_type_bits {
16867 T_V8QI = 0x0001,
16868 T_V4HI = 0x0002,
16869 T_V2SI = 0x0004,
16870 T_V2SF = 0x0008,
16871 T_DI = 0x0010,
16872 T_V16QI = 0x0020,
16873 T_V8HI = 0x0040,
16874 T_V4SI = 0x0080,
16875 T_V4SF = 0x0100,
16876 T_V2DI = 0x0200,
16877 T_TI = 0x0400,
16878 T_EI = 0x0800,
16879 T_OI = 0x1000
16882 #define v8qi_UP T_V8QI
16883 #define v4hi_UP T_V4HI
16884 #define v2si_UP T_V2SI
16885 #define v2sf_UP T_V2SF
16886 #define di_UP T_DI
16887 #define v16qi_UP T_V16QI
16888 #define v8hi_UP T_V8HI
16889 #define v4si_UP T_V4SI
16890 #define v4sf_UP T_V4SF
16891 #define v2di_UP T_V2DI
16892 #define ti_UP T_TI
16893 #define ei_UP T_EI
16894 #define oi_UP T_OI
16896 #define UP(X) X##_UP
16898 #define T_MAX 13
16900 typedef enum {
16901 NEON_BINOP,
16902 NEON_TERNOP,
16903 NEON_UNOP,
16904 NEON_GETLANE,
16905 NEON_SETLANE,
16906 NEON_CREATE,
16907 NEON_DUP,
16908 NEON_DUPLANE,
16909 NEON_COMBINE,
16910 NEON_SPLIT,
16911 NEON_LANEMUL,
16912 NEON_LANEMULL,
16913 NEON_LANEMULH,
16914 NEON_LANEMAC,
16915 NEON_SCALARMUL,
16916 NEON_SCALARMULL,
16917 NEON_SCALARMULH,
16918 NEON_SCALARMAC,
16919 NEON_CONVERT,
16920 NEON_FIXCONV,
16921 NEON_SELECT,
16922 NEON_RESULTPAIR,
16923 NEON_REINTERP,
16924 NEON_VTBL,
16925 NEON_VTBX,
16926 NEON_LOAD1,
16927 NEON_LOAD1LANE,
16928 NEON_STORE1,
16929 NEON_STORE1LANE,
16930 NEON_LOADSTRUCT,
16931 NEON_LOADSTRUCTLANE,
16932 NEON_STORESTRUCT,
16933 NEON_STORESTRUCTLANE,
16934 NEON_LOGICBINOP,
16935 NEON_SHIFTINSERT,
16936 NEON_SHIFTIMM,
16937 NEON_SHIFTACC
16938 } neon_itype;
16940 typedef struct {
16941 const char *name;
16942 const neon_itype itype;
16943 const int bits;
16944 const enum insn_code codes[T_MAX];
16945 const unsigned int num_vars;
16946 unsigned int base_fcode;
16947 } neon_builtin_datum;
16949 #define CF(N,X) CODE_FOR_neon_##N##X
16951 #define VAR1(T, N, A) \
16952 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
16953 #define VAR2(T, N, A, B) \
16954 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
16955 #define VAR3(T, N, A, B, C) \
16956 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
16957 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
16958 #define VAR4(T, N, A, B, C, D) \
16959 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
16960 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
16961 #define VAR5(T, N, A, B, C, D, E) \
16962 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
16963 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
16964 #define VAR6(T, N, A, B, C, D, E, F) \
16965 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
16966 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
16967 #define VAR7(T, N, A, B, C, D, E, F, G) \
16968 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
16969 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16970 CF (N, G) }, 7, 0
16971 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
16972 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16973 | UP (H), \
16974 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16975 CF (N, G), CF (N, H) }, 8, 0
16976 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
16977 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16978 | UP (H) | UP (I), \
16979 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16980 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
16981 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
16982 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16983 | UP (H) | UP (I) | UP (J), \
16984 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16985 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
16987 /* The mode entries in the following table correspond to the "key" type of the
16988 instruction variant, i.e. equivalent to that which would be specified after
16989 the assembler mnemonic, which usually refers to the last vector operand.
16990 (Signed/unsigned/polynomial types are not differentiated between though, and
16991 are all mapped onto the same mode for a given element size.) The modes
16992 listed per instruction should be the same as those defined for that
16993 instruction's pattern in neon.md.
16994 WARNING: Variants should be listed in the same increasing order as
16995 neon_builtin_type_bits. */
16997 static neon_builtin_datum neon_builtin_data[] =
16999 { VAR10 (BINOP, vadd,
17000 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17001 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17002 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17003 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17004 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17005 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17006 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17007 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17008 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17009 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17010 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17011 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17012 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17013 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17014 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17015 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17016 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17017 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17018 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17019 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17020 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17021 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17022 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17023 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17024 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17025 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17026 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17027 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17028 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17029 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17030 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17031 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17032 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17033 { VAR10 (BINOP, vsub,
17034 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17035 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17036 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17037 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17038 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17039 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17040 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17041 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17042 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17043 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17044 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17045 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17046 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17047 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17048 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17049 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17050 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17051 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17052 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17053 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17054 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17055 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17056 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17057 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17058 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17059 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17060 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17061 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17062 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17063 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17064 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17065 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17066 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17067 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17068 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17069 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17070 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17071 /* FIXME: vget_lane supports more variants than this! */
17072 { VAR10 (GETLANE, vget_lane,
17073 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17074 { VAR10 (SETLANE, vset_lane,
17075 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17076 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17077 { VAR10 (DUP, vdup_n,
17078 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17079 { VAR10 (DUPLANE, vdup_lane,
17080 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17081 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17082 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17083 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17084 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17085 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17086 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17087 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17088 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17089 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17090 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17091 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17092 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17093 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17094 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17095 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17096 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17097 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17098 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17099 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17100 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17101 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17102 { VAR10 (BINOP, vext,
17103 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17104 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17105 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17106 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17107 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17108 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17109 { VAR10 (SELECT, vbsl,
17110 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17111 { VAR1 (VTBL, vtbl1, v8qi) },
17112 { VAR1 (VTBL, vtbl2, v8qi) },
17113 { VAR1 (VTBL, vtbl3, v8qi) },
17114 { VAR1 (VTBL, vtbl4, v8qi) },
17115 { VAR1 (VTBX, vtbx1, v8qi) },
17116 { VAR1 (VTBX, vtbx2, v8qi) },
17117 { VAR1 (VTBX, vtbx3, v8qi) },
17118 { VAR1 (VTBX, vtbx4, v8qi) },
17119 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17120 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17121 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17122 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17123 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17124 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17125 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17126 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17127 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17128 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17129 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17130 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17131 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17132 { VAR10 (LOAD1, vld1,
17133 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17134 { VAR10 (LOAD1LANE, vld1_lane,
17135 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17136 { VAR10 (LOAD1, vld1_dup,
17137 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17138 { VAR10 (STORE1, vst1,
17139 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17140 { VAR10 (STORE1LANE, vst1_lane,
17141 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17142 { VAR9 (LOADSTRUCT,
17143 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17144 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17145 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17146 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17147 { VAR9 (STORESTRUCT, vst2,
17148 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17149 { VAR7 (STORESTRUCTLANE, vst2_lane,
17150 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17151 { VAR9 (LOADSTRUCT,
17152 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17153 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17154 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17155 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17156 { VAR9 (STORESTRUCT, vst3,
17157 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17158 { VAR7 (STORESTRUCTLANE, vst3_lane,
17159 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17160 { VAR9 (LOADSTRUCT, vld4,
17161 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17162 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17163 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17164 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17165 { VAR9 (STORESTRUCT, vst4,
17166 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17167 { VAR7 (STORESTRUCTLANE, vst4_lane,
17168 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17169 { VAR10 (LOGICBINOP, vand,
17170 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17171 { VAR10 (LOGICBINOP, vorr,
17172 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17173 { VAR10 (BINOP, veor,
17174 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17175 { VAR10 (LOGICBINOP, vbic,
17176 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17177 { VAR10 (LOGICBINOP, vorn,
17178 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17181 #undef CF
17182 #undef VAR1
17183 #undef VAR2
17184 #undef VAR3
17185 #undef VAR4
17186 #undef VAR5
17187 #undef VAR6
17188 #undef VAR7
17189 #undef VAR8
17190 #undef VAR9
17191 #undef VAR10
17193 static void
17194 arm_init_neon_builtins (void)
17196 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17198 tree neon_intQI_type_node;
17199 tree neon_intHI_type_node;
17200 tree neon_polyQI_type_node;
17201 tree neon_polyHI_type_node;
17202 tree neon_intSI_type_node;
17203 tree neon_intDI_type_node;
17204 tree neon_float_type_node;
17206 tree intQI_pointer_node;
17207 tree intHI_pointer_node;
17208 tree intSI_pointer_node;
17209 tree intDI_pointer_node;
17210 tree float_pointer_node;
17212 tree const_intQI_node;
17213 tree const_intHI_node;
17214 tree const_intSI_node;
17215 tree const_intDI_node;
17216 tree const_float_node;
17218 tree const_intQI_pointer_node;
17219 tree const_intHI_pointer_node;
17220 tree const_intSI_pointer_node;
17221 tree const_intDI_pointer_node;
17222 tree const_float_pointer_node;
17224 tree V8QI_type_node;
17225 tree V4HI_type_node;
17226 tree V2SI_type_node;
17227 tree V2SF_type_node;
17228 tree V16QI_type_node;
17229 tree V8HI_type_node;
17230 tree V4SI_type_node;
17231 tree V4SF_type_node;
17232 tree V2DI_type_node;
17234 tree intUQI_type_node;
17235 tree intUHI_type_node;
17236 tree intUSI_type_node;
17237 tree intUDI_type_node;
17239 tree intEI_type_node;
17240 tree intOI_type_node;
17241 tree intCI_type_node;
17242 tree intXI_type_node;
17244 tree V8QI_pointer_node;
17245 tree V4HI_pointer_node;
17246 tree V2SI_pointer_node;
17247 tree V2SF_pointer_node;
17248 tree V16QI_pointer_node;
17249 tree V8HI_pointer_node;
17250 tree V4SI_pointer_node;
17251 tree V4SF_pointer_node;
17252 tree V2DI_pointer_node;
17254 tree void_ftype_pv8qi_v8qi_v8qi;
17255 tree void_ftype_pv4hi_v4hi_v4hi;
17256 tree void_ftype_pv2si_v2si_v2si;
17257 tree void_ftype_pv2sf_v2sf_v2sf;
17258 tree void_ftype_pdi_di_di;
17259 tree void_ftype_pv16qi_v16qi_v16qi;
17260 tree void_ftype_pv8hi_v8hi_v8hi;
17261 tree void_ftype_pv4si_v4si_v4si;
17262 tree void_ftype_pv4sf_v4sf_v4sf;
17263 tree void_ftype_pv2di_v2di_v2di;
17265 tree reinterp_ftype_dreg[5][5];
17266 tree reinterp_ftype_qreg[5][5];
17267 tree dreg_types[5], qreg_types[5];
17269 /* Create distinguished type nodes for NEON vector element types,
17270 and pointers to values of such types, so we can detect them later. */
17271 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17272 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17273 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17274 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17275 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17276 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17277 neon_float_type_node = make_node (REAL_TYPE);
17278 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17279 layout_type (neon_float_type_node);
17281 /* Define typedefs which exactly correspond to the modes we are basing vector
17282 types on. If you change these names you'll need to change
17283 the table used by arm_mangle_type too. */
17284 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17285 "__builtin_neon_qi");
17286 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17287 "__builtin_neon_hi");
17288 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17289 "__builtin_neon_si");
17290 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17291 "__builtin_neon_sf");
17292 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17293 "__builtin_neon_di");
17294 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17295 "__builtin_neon_poly8");
17296 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17297 "__builtin_neon_poly16");
17299 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17300 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17301 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17302 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17303 float_pointer_node = build_pointer_type (neon_float_type_node);
17305 /* Next create constant-qualified versions of the above types. */
17306 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17307 TYPE_QUAL_CONST);
17308 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17309 TYPE_QUAL_CONST);
17310 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17311 TYPE_QUAL_CONST);
17312 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17313 TYPE_QUAL_CONST);
17314 const_float_node = build_qualified_type (neon_float_type_node,
17315 TYPE_QUAL_CONST);
17317 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17318 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17319 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17320 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17321 const_float_pointer_node = build_pointer_type (const_float_node);
17323 /* Now create vector types based on our NEON element types. */
17324 /* 64-bit vectors. */
17325 V8QI_type_node =
17326 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17327 V4HI_type_node =
17328 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17329 V2SI_type_node =
17330 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17331 V2SF_type_node =
17332 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17333 /* 128-bit vectors. */
17334 V16QI_type_node =
17335 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17336 V8HI_type_node =
17337 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17338 V4SI_type_node =
17339 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17340 V4SF_type_node =
17341 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17342 V2DI_type_node =
17343 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17345 /* Unsigned integer types for various mode sizes. */
17346 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17347 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17348 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17349 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17351 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17352 "__builtin_neon_uqi");
17353 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17354 "__builtin_neon_uhi");
17355 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17356 "__builtin_neon_usi");
17357 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17358 "__builtin_neon_udi");
17360 /* Opaque integer types for structures of vectors. */
17361 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17362 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17363 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17364 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17366 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17367 "__builtin_neon_ti");
17368 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17369 "__builtin_neon_ei");
17370 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17371 "__builtin_neon_oi");
17372 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17373 "__builtin_neon_ci");
17374 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17375 "__builtin_neon_xi");
17377 /* Pointers to vector types. */
17378 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17379 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17380 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17381 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17382 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17383 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17384 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17385 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17386 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17388 /* Operations which return results as pairs. */
17389 void_ftype_pv8qi_v8qi_v8qi =
17390 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17391 V8QI_type_node, NULL);
17392 void_ftype_pv4hi_v4hi_v4hi =
17393 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17394 V4HI_type_node, NULL);
17395 void_ftype_pv2si_v2si_v2si =
17396 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17397 V2SI_type_node, NULL);
17398 void_ftype_pv2sf_v2sf_v2sf =
17399 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17400 V2SF_type_node, NULL);
17401 void_ftype_pdi_di_di =
17402 build_function_type_list (void_type_node, intDI_pointer_node,
17403 neon_intDI_type_node, neon_intDI_type_node, NULL);
17404 void_ftype_pv16qi_v16qi_v16qi =
17405 build_function_type_list (void_type_node, V16QI_pointer_node,
17406 V16QI_type_node, V16QI_type_node, NULL);
17407 void_ftype_pv8hi_v8hi_v8hi =
17408 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17409 V8HI_type_node, NULL);
17410 void_ftype_pv4si_v4si_v4si =
17411 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17412 V4SI_type_node, NULL);
17413 void_ftype_pv4sf_v4sf_v4sf =
17414 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17415 V4SF_type_node, NULL);
17416 void_ftype_pv2di_v2di_v2di =
17417 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17418 V2DI_type_node, NULL);
17420 dreg_types[0] = V8QI_type_node;
17421 dreg_types[1] = V4HI_type_node;
17422 dreg_types[2] = V2SI_type_node;
17423 dreg_types[3] = V2SF_type_node;
17424 dreg_types[4] = neon_intDI_type_node;
17426 qreg_types[0] = V16QI_type_node;
17427 qreg_types[1] = V8HI_type_node;
17428 qreg_types[2] = V4SI_type_node;
17429 qreg_types[3] = V4SF_type_node;
17430 qreg_types[4] = V2DI_type_node;
17432 for (i = 0; i < 5; i++)
17434 int j;
17435 for (j = 0; j < 5; j++)
17437 reinterp_ftype_dreg[i][j]
17438 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17439 reinterp_ftype_qreg[i][j]
17440 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17444 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17446 neon_builtin_datum *d = &neon_builtin_data[i];
17447 unsigned int j, codeidx = 0;
17449 d->base_fcode = fcode;
17451 for (j = 0; j < T_MAX; j++)
17453 const char* const modenames[] = {
17454 "v8qi", "v4hi", "v2si", "v2sf", "di",
17455 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17457 char namebuf[60];
17458 tree ftype = NULL;
17459 enum insn_code icode;
17460 int is_load = 0, is_store = 0;
17462 if ((d->bits & (1 << j)) == 0)
17463 continue;
17465 icode = d->codes[codeidx++];
17467 switch (d->itype)
17469 case NEON_LOAD1:
17470 case NEON_LOAD1LANE:
17471 case NEON_LOADSTRUCT:
17472 case NEON_LOADSTRUCTLANE:
17473 is_load = 1;
17474 /* Fall through. */
17475 case NEON_STORE1:
17476 case NEON_STORE1LANE:
17477 case NEON_STORESTRUCT:
17478 case NEON_STORESTRUCTLANE:
17479 if (!is_load)
17480 is_store = 1;
17481 /* Fall through. */
17482 case NEON_UNOP:
17483 case NEON_BINOP:
17484 case NEON_LOGICBINOP:
17485 case NEON_SHIFTINSERT:
17486 case NEON_TERNOP:
17487 case NEON_GETLANE:
17488 case NEON_SETLANE:
17489 case NEON_CREATE:
17490 case NEON_DUP:
17491 case NEON_DUPLANE:
17492 case NEON_SHIFTIMM:
17493 case NEON_SHIFTACC:
17494 case NEON_COMBINE:
17495 case NEON_SPLIT:
17496 case NEON_CONVERT:
17497 case NEON_FIXCONV:
17498 case NEON_LANEMUL:
17499 case NEON_LANEMULL:
17500 case NEON_LANEMULH:
17501 case NEON_LANEMAC:
17502 case NEON_SCALARMUL:
17503 case NEON_SCALARMULL:
17504 case NEON_SCALARMULH:
17505 case NEON_SCALARMAC:
17506 case NEON_SELECT:
17507 case NEON_VTBL:
17508 case NEON_VTBX:
17510 int k;
17511 tree return_type = void_type_node, args = void_list_node;
17513 /* Build a function type directly from the insn_data for this
17514 builtin. The build_function_type() function takes care of
17515 removing duplicates for us. */
17516 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17518 tree eltype;
17520 if (is_load && k == 1)
17522 /* Neon load patterns always have the memory operand
17523 (a SImode pointer) in the operand 1 position. We
17524 want a const pointer to the element type in that
17525 position. */
17526 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17528 switch (1 << j)
17530 case T_V8QI:
17531 case T_V16QI:
17532 eltype = const_intQI_pointer_node;
17533 break;
17535 case T_V4HI:
17536 case T_V8HI:
17537 eltype = const_intHI_pointer_node;
17538 break;
17540 case T_V2SI:
17541 case T_V4SI:
17542 eltype = const_intSI_pointer_node;
17543 break;
17545 case T_V2SF:
17546 case T_V4SF:
17547 eltype = const_float_pointer_node;
17548 break;
17550 case T_DI:
17551 case T_V2DI:
17552 eltype = const_intDI_pointer_node;
17553 break;
17555 default: gcc_unreachable ();
17558 else if (is_store && k == 0)
17560 /* Similarly, Neon store patterns use operand 0 as
17561 the memory location to store to (a SImode pointer).
17562 Use a pointer to the element type of the store in
17563 that position. */
17564 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17566 switch (1 << j)
17568 case T_V8QI:
17569 case T_V16QI:
17570 eltype = intQI_pointer_node;
17571 break;
17573 case T_V4HI:
17574 case T_V8HI:
17575 eltype = intHI_pointer_node;
17576 break;
17578 case T_V2SI:
17579 case T_V4SI:
17580 eltype = intSI_pointer_node;
17581 break;
17583 case T_V2SF:
17584 case T_V4SF:
17585 eltype = float_pointer_node;
17586 break;
17588 case T_DI:
17589 case T_V2DI:
17590 eltype = intDI_pointer_node;
17591 break;
17593 default: gcc_unreachable ();
17596 else
17598 switch (insn_data[icode].operand[k].mode)
17600 case VOIDmode: eltype = void_type_node; break;
17601 /* Scalars. */
17602 case QImode: eltype = neon_intQI_type_node; break;
17603 case HImode: eltype = neon_intHI_type_node; break;
17604 case SImode: eltype = neon_intSI_type_node; break;
17605 case SFmode: eltype = neon_float_type_node; break;
17606 case DImode: eltype = neon_intDI_type_node; break;
17607 case TImode: eltype = intTI_type_node; break;
17608 case EImode: eltype = intEI_type_node; break;
17609 case OImode: eltype = intOI_type_node; break;
17610 case CImode: eltype = intCI_type_node; break;
17611 case XImode: eltype = intXI_type_node; break;
17612 /* 64-bit vectors. */
17613 case V8QImode: eltype = V8QI_type_node; break;
17614 case V4HImode: eltype = V4HI_type_node; break;
17615 case V2SImode: eltype = V2SI_type_node; break;
17616 case V2SFmode: eltype = V2SF_type_node; break;
17617 /* 128-bit vectors. */
17618 case V16QImode: eltype = V16QI_type_node; break;
17619 case V8HImode: eltype = V8HI_type_node; break;
17620 case V4SImode: eltype = V4SI_type_node; break;
17621 case V4SFmode: eltype = V4SF_type_node; break;
17622 case V2DImode: eltype = V2DI_type_node; break;
17623 default: gcc_unreachable ();
17627 if (k == 0 && !is_store)
17628 return_type = eltype;
17629 else
17630 args = tree_cons (NULL_TREE, eltype, args);
17633 ftype = build_function_type (return_type, args);
17635 break;
17637 case NEON_RESULTPAIR:
17639 switch (insn_data[icode].operand[1].mode)
17641 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17642 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17643 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17644 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17645 case DImode: ftype = void_ftype_pdi_di_di; break;
17646 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17647 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17648 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17649 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17650 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17651 default: gcc_unreachable ();
17654 break;
17656 case NEON_REINTERP:
17658 /* We iterate over 5 doubleword types, then 5 quadword
17659 types. */
17660 int rhs = j % 5;
17661 switch (insn_data[icode].operand[0].mode)
17663 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17664 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17665 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17666 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17667 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17668 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17669 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17670 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17671 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17672 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17673 default: gcc_unreachable ();
17676 break;
17678 default:
17679 gcc_unreachable ();
17682 gcc_assert (ftype != NULL);
17684 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17686 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17687 NULL_TREE);
17692 static void
17693 arm_init_fp16_builtins (void)
17695 tree fp16_type = make_node (REAL_TYPE);
17696 TYPE_PRECISION (fp16_type) = 16;
17697 layout_type (fp16_type);
17698 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17701 static void
17702 arm_init_builtins (void)
17704 arm_init_tls_builtins ();
17706 if (TARGET_REALLY_IWMMXT)
17707 arm_init_iwmmxt_builtins ();
17709 if (TARGET_NEON)
17710 arm_init_neon_builtins ();
17712 if (arm_fp16_format)
17713 arm_init_fp16_builtins ();
17716 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17718 static const char *
17719 arm_invalid_parameter_type (const_tree t)
17721 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17722 return N_("function parameters cannot have __fp16 type");
17723 return NULL;
17726 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17728 static const char *
17729 arm_invalid_return_type (const_tree t)
17731 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17732 return N_("functions cannot return __fp16 type");
17733 return NULL;
17736 /* Implement TARGET_PROMOTED_TYPE. */
17738 static tree
17739 arm_promoted_type (const_tree t)
17741 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17742 return float_type_node;
17743 return NULL_TREE;
17746 /* Implement TARGET_CONVERT_TO_TYPE.
17747 Specifically, this hook implements the peculiarity of the ARM
17748 half-precision floating-point C semantics that requires conversions between
17749 __fp16 to or from double to do an intermediate conversion to float. */
17751 static tree
17752 arm_convert_to_type (tree type, tree expr)
17754 tree fromtype = TREE_TYPE (expr);
17755 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17756 return NULL_TREE;
17757 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17758 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17759 return convert (type, convert (float_type_node, expr));
17760 return NULL_TREE;
17763 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17764 This simply adds HFmode as a supported mode; even though we don't
17765 implement arithmetic on this type directly, it's supported by
17766 optabs conversions, much the way the double-word arithmetic is
17767 special-cased in the default hook. */
17769 static bool
17770 arm_scalar_mode_supported_p (enum machine_mode mode)
17772 if (mode == HFmode)
17773 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17774 else
17775 return default_scalar_mode_supported_p (mode);
17778 /* Errors in the source file can cause expand_expr to return const0_rtx
17779 where we expect a vector. To avoid crashing, use one of the vector
17780 clear instructions. */
17782 static rtx
17783 safe_vector_operand (rtx x, enum machine_mode mode)
17785 if (x != const0_rtx)
17786 return x;
17787 x = gen_reg_rtx (mode);
17789 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17790 : gen_rtx_SUBREG (DImode, x, 0)));
17791 return x;
17794 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17796 static rtx
17797 arm_expand_binop_builtin (enum insn_code icode,
17798 tree exp, rtx target)
17800 rtx pat;
17801 tree arg0 = CALL_EXPR_ARG (exp, 0);
17802 tree arg1 = CALL_EXPR_ARG (exp, 1);
17803 rtx op0 = expand_normal (arg0);
17804 rtx op1 = expand_normal (arg1);
17805 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17806 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17807 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17809 if (VECTOR_MODE_P (mode0))
17810 op0 = safe_vector_operand (op0, mode0);
17811 if (VECTOR_MODE_P (mode1))
17812 op1 = safe_vector_operand (op1, mode1);
17814 if (! target
17815 || GET_MODE (target) != tmode
17816 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17817 target = gen_reg_rtx (tmode);
17819 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
17821 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17822 op0 = copy_to_mode_reg (mode0, op0);
17823 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17824 op1 = copy_to_mode_reg (mode1, op1);
17826 pat = GEN_FCN (icode) (target, op0, op1);
17827 if (! pat)
17828 return 0;
17829 emit_insn (pat);
17830 return target;
17833 /* Subroutine of arm_expand_builtin to take care of unop insns. */
17835 static rtx
17836 arm_expand_unop_builtin (enum insn_code icode,
17837 tree exp, rtx target, int do_load)
17839 rtx pat;
17840 tree arg0 = CALL_EXPR_ARG (exp, 0);
17841 rtx op0 = expand_normal (arg0);
17842 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17843 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17845 if (! target
17846 || GET_MODE (target) != tmode
17847 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17848 target = gen_reg_rtx (tmode);
17849 if (do_load)
17850 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17851 else
17853 if (VECTOR_MODE_P (mode0))
17854 op0 = safe_vector_operand (op0, mode0);
17856 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17857 op0 = copy_to_mode_reg (mode0, op0);
17860 pat = GEN_FCN (icode) (target, op0);
17861 if (! pat)
17862 return 0;
17863 emit_insn (pat);
17864 return target;
17867 static int
17868 neon_builtin_compare (const void *a, const void *b)
17870 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
17871 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
17872 unsigned int soughtcode = key->base_fcode;
17874 if (soughtcode >= memb->base_fcode
17875 && soughtcode < memb->base_fcode + memb->num_vars)
17876 return 0;
17877 else if (soughtcode < memb->base_fcode)
17878 return -1;
17879 else
17880 return 1;
17883 static enum insn_code
17884 locate_neon_builtin_icode (int fcode, neon_itype *itype)
17886 neon_builtin_datum key, *found;
17887 int idx;
17889 key.base_fcode = fcode;
17890 found = (neon_builtin_datum *)
17891 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
17892 sizeof (neon_builtin_data[0]), neon_builtin_compare);
17893 gcc_assert (found);
17894 idx = fcode - (int) found->base_fcode;
17895 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
17897 if (itype)
17898 *itype = found->itype;
17900 return found->codes[idx];
17903 typedef enum {
17904 NEON_ARG_COPY_TO_REG,
17905 NEON_ARG_CONSTANT,
17906 NEON_ARG_STOP
17907 } builtin_arg;
17909 #define NEON_MAX_BUILTIN_ARGS 5
17911 /* Expand a Neon builtin. */
17912 static rtx
17913 arm_expand_neon_args (rtx target, int icode, int have_retval,
17914 tree exp, ...)
17916 va_list ap;
17917 rtx pat;
17918 tree arg[NEON_MAX_BUILTIN_ARGS];
17919 rtx op[NEON_MAX_BUILTIN_ARGS];
17920 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17921 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
17922 int argc = 0;
17924 if (have_retval
17925 && (!target
17926 || GET_MODE (target) != tmode
17927 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
17928 target = gen_reg_rtx (tmode);
17930 va_start (ap, exp);
17932 for (;;)
17934 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
17936 if (thisarg == NEON_ARG_STOP)
17937 break;
17938 else
17940 arg[argc] = CALL_EXPR_ARG (exp, argc);
17941 op[argc] = expand_normal (arg[argc]);
17942 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
17944 switch (thisarg)
17946 case NEON_ARG_COPY_TO_REG:
17947 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
17948 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17949 (op[argc], mode[argc]))
17950 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
17951 break;
17953 case NEON_ARG_CONSTANT:
17954 /* FIXME: This error message is somewhat unhelpful. */
17955 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17956 (op[argc], mode[argc]))
17957 error ("argument must be a constant");
17958 break;
17960 case NEON_ARG_STOP:
17961 gcc_unreachable ();
17964 argc++;
17968 va_end (ap);
17970 if (have_retval)
17971 switch (argc)
17973 case 1:
17974 pat = GEN_FCN (icode) (target, op[0]);
17975 break;
17977 case 2:
17978 pat = GEN_FCN (icode) (target, op[0], op[1]);
17979 break;
17981 case 3:
17982 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
17983 break;
17985 case 4:
17986 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
17987 break;
17989 case 5:
17990 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
17991 break;
17993 default:
17994 gcc_unreachable ();
17996 else
17997 switch (argc)
17999 case 1:
18000 pat = GEN_FCN (icode) (op[0]);
18001 break;
18003 case 2:
18004 pat = GEN_FCN (icode) (op[0], op[1]);
18005 break;
18007 case 3:
18008 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18009 break;
18011 case 4:
18012 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18013 break;
18015 case 5:
18016 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18017 break;
18019 default:
18020 gcc_unreachable ();
18023 if (!pat)
18024 return 0;
18026 emit_insn (pat);
18028 return target;
18031 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18032 constants defined per-instruction or per instruction-variant. Instead, the
18033 required info is looked up in the table neon_builtin_data. */
18034 static rtx
18035 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18037 neon_itype itype;
18038 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18040 switch (itype)
18042 case NEON_UNOP:
18043 case NEON_CONVERT:
18044 case NEON_DUPLANE:
18045 return arm_expand_neon_args (target, icode, 1, exp,
18046 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18048 case NEON_BINOP:
18049 case NEON_SETLANE:
18050 case NEON_SCALARMUL:
18051 case NEON_SCALARMULL:
18052 case NEON_SCALARMULH:
18053 case NEON_SHIFTINSERT:
18054 case NEON_LOGICBINOP:
18055 return arm_expand_neon_args (target, icode, 1, exp,
18056 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18057 NEON_ARG_STOP);
18059 case NEON_TERNOP:
18060 return arm_expand_neon_args (target, icode, 1, exp,
18061 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18062 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18064 case NEON_GETLANE:
18065 case NEON_FIXCONV:
18066 case NEON_SHIFTIMM:
18067 return arm_expand_neon_args (target, icode, 1, exp,
18068 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18069 NEON_ARG_STOP);
18071 case NEON_CREATE:
18072 return arm_expand_neon_args (target, icode, 1, exp,
18073 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18075 case NEON_DUP:
18076 case NEON_SPLIT:
18077 case NEON_REINTERP:
18078 return arm_expand_neon_args (target, icode, 1, exp,
18079 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18081 case NEON_COMBINE:
18082 case NEON_VTBL:
18083 return arm_expand_neon_args (target, icode, 1, exp,
18084 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18086 case NEON_RESULTPAIR:
18087 return arm_expand_neon_args (target, icode, 0, exp,
18088 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18089 NEON_ARG_STOP);
18091 case NEON_LANEMUL:
18092 case NEON_LANEMULL:
18093 case NEON_LANEMULH:
18094 return arm_expand_neon_args (target, icode, 1, exp,
18095 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18096 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18098 case NEON_LANEMAC:
18099 return arm_expand_neon_args (target, icode, 1, exp,
18100 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18101 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18103 case NEON_SHIFTACC:
18104 return arm_expand_neon_args (target, icode, 1, exp,
18105 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18106 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18108 case NEON_SCALARMAC:
18109 return arm_expand_neon_args (target, icode, 1, exp,
18110 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18111 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18113 case NEON_SELECT:
18114 case NEON_VTBX:
18115 return arm_expand_neon_args (target, icode, 1, exp,
18116 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18117 NEON_ARG_STOP);
18119 case NEON_LOAD1:
18120 case NEON_LOADSTRUCT:
18121 return arm_expand_neon_args (target, icode, 1, exp,
18122 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18124 case NEON_LOAD1LANE:
18125 case NEON_LOADSTRUCTLANE:
18126 return arm_expand_neon_args (target, icode, 1, exp,
18127 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18128 NEON_ARG_STOP);
18130 case NEON_STORE1:
18131 case NEON_STORESTRUCT:
18132 return arm_expand_neon_args (target, icode, 0, exp,
18133 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18135 case NEON_STORE1LANE:
18136 case NEON_STORESTRUCTLANE:
18137 return arm_expand_neon_args (target, icode, 0, exp,
18138 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18139 NEON_ARG_STOP);
18142 gcc_unreachable ();
18145 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18146 void
18147 neon_reinterpret (rtx dest, rtx src)
18149 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18152 /* Emit code to place a Neon pair result in memory locations (with equal
18153 registers). */
18154 void
18155 neon_emit_pair_result_insn (enum machine_mode mode,
18156 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18157 rtx op1, rtx op2)
18159 rtx mem = gen_rtx_MEM (mode, destaddr);
18160 rtx tmp1 = gen_reg_rtx (mode);
18161 rtx tmp2 = gen_reg_rtx (mode);
18163 emit_insn (intfn (tmp1, op1, tmp2, op2));
18165 emit_move_insn (mem, tmp1);
18166 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18167 emit_move_insn (mem, tmp2);
18170 /* Set up operands for a register copy from src to dest, taking care not to
18171 clobber registers in the process.
18172 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18173 be called with a large N, so that should be OK. */
18175 void
18176 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18178 unsigned int copied = 0, opctr = 0;
18179 unsigned int done = (1 << count) - 1;
18180 unsigned int i, j;
18182 while (copied != done)
18184 for (i = 0; i < count; i++)
18186 int good = 1;
18188 for (j = 0; good && j < count; j++)
18189 if (i != j && (copied & (1 << j)) == 0
18190 && reg_overlap_mentioned_p (src[j], dest[i]))
18191 good = 0;
18193 if (good)
18195 operands[opctr++] = dest[i];
18196 operands[opctr++] = src[i];
18197 copied |= 1 << i;
18202 gcc_assert (opctr == count * 2);
18205 /* Expand an expression EXP that calls a built-in function,
18206 with result going to TARGET if that's convenient
18207 (and in mode MODE if that's convenient).
18208 SUBTARGET may be used as the target for computing one of EXP's operands.
18209 IGNORE is nonzero if the value is to be ignored. */
18211 static rtx
18212 arm_expand_builtin (tree exp,
18213 rtx target,
18214 rtx subtarget ATTRIBUTE_UNUSED,
18215 enum machine_mode mode ATTRIBUTE_UNUSED,
18216 int ignore ATTRIBUTE_UNUSED)
18218 const struct builtin_description * d;
18219 enum insn_code icode;
18220 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18221 tree arg0;
18222 tree arg1;
18223 tree arg2;
18224 rtx op0;
18225 rtx op1;
18226 rtx op2;
18227 rtx pat;
18228 int fcode = DECL_FUNCTION_CODE (fndecl);
18229 size_t i;
18230 enum machine_mode tmode;
18231 enum machine_mode mode0;
18232 enum machine_mode mode1;
18233 enum machine_mode mode2;
18235 if (fcode >= ARM_BUILTIN_NEON_BASE)
18236 return arm_expand_neon_builtin (fcode, exp, target);
18238 switch (fcode)
18240 case ARM_BUILTIN_TEXTRMSB:
18241 case ARM_BUILTIN_TEXTRMUB:
18242 case ARM_BUILTIN_TEXTRMSH:
18243 case ARM_BUILTIN_TEXTRMUH:
18244 case ARM_BUILTIN_TEXTRMSW:
18245 case ARM_BUILTIN_TEXTRMUW:
18246 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18247 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18248 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18249 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18250 : CODE_FOR_iwmmxt_textrmw);
18252 arg0 = CALL_EXPR_ARG (exp, 0);
18253 arg1 = CALL_EXPR_ARG (exp, 1);
18254 op0 = expand_normal (arg0);
18255 op1 = expand_normal (arg1);
18256 tmode = insn_data[icode].operand[0].mode;
18257 mode0 = insn_data[icode].operand[1].mode;
18258 mode1 = insn_data[icode].operand[2].mode;
18260 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18261 op0 = copy_to_mode_reg (mode0, op0);
18262 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18264 /* @@@ better error message */
18265 error ("selector must be an immediate");
18266 return gen_reg_rtx (tmode);
18268 if (target == 0
18269 || GET_MODE (target) != tmode
18270 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18271 target = gen_reg_rtx (tmode);
18272 pat = GEN_FCN (icode) (target, op0, op1);
18273 if (! pat)
18274 return 0;
18275 emit_insn (pat);
18276 return target;
18278 case ARM_BUILTIN_TINSRB:
18279 case ARM_BUILTIN_TINSRH:
18280 case ARM_BUILTIN_TINSRW:
18281 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18282 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18283 : CODE_FOR_iwmmxt_tinsrw);
18284 arg0 = CALL_EXPR_ARG (exp, 0);
18285 arg1 = CALL_EXPR_ARG (exp, 1);
18286 arg2 = CALL_EXPR_ARG (exp, 2);
18287 op0 = expand_normal (arg0);
18288 op1 = expand_normal (arg1);
18289 op2 = expand_normal (arg2);
18290 tmode = insn_data[icode].operand[0].mode;
18291 mode0 = insn_data[icode].operand[1].mode;
18292 mode1 = insn_data[icode].operand[2].mode;
18293 mode2 = insn_data[icode].operand[3].mode;
18295 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18296 op0 = copy_to_mode_reg (mode0, op0);
18297 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18298 op1 = copy_to_mode_reg (mode1, op1);
18299 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18301 /* @@@ better error message */
18302 error ("selector must be an immediate");
18303 return const0_rtx;
18305 if (target == 0
18306 || GET_MODE (target) != tmode
18307 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18308 target = gen_reg_rtx (tmode);
18309 pat = GEN_FCN (icode) (target, op0, op1, op2);
18310 if (! pat)
18311 return 0;
18312 emit_insn (pat);
18313 return target;
18315 case ARM_BUILTIN_SETWCX:
18316 arg0 = CALL_EXPR_ARG (exp, 0);
18317 arg1 = CALL_EXPR_ARG (exp, 1);
18318 op0 = force_reg (SImode, expand_normal (arg0));
18319 op1 = expand_normal (arg1);
18320 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18321 return 0;
18323 case ARM_BUILTIN_GETWCX:
18324 arg0 = CALL_EXPR_ARG (exp, 0);
18325 op0 = expand_normal (arg0);
18326 target = gen_reg_rtx (SImode);
18327 emit_insn (gen_iwmmxt_tmrc (target, op0));
18328 return target;
18330 case ARM_BUILTIN_WSHUFH:
18331 icode = CODE_FOR_iwmmxt_wshufh;
18332 arg0 = CALL_EXPR_ARG (exp, 0);
18333 arg1 = CALL_EXPR_ARG (exp, 1);
18334 op0 = expand_normal (arg0);
18335 op1 = expand_normal (arg1);
18336 tmode = insn_data[icode].operand[0].mode;
18337 mode1 = insn_data[icode].operand[1].mode;
18338 mode2 = insn_data[icode].operand[2].mode;
18340 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18341 op0 = copy_to_mode_reg (mode1, op0);
18342 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18344 /* @@@ better error message */
18345 error ("mask must be an immediate");
18346 return const0_rtx;
18348 if (target == 0
18349 || GET_MODE (target) != tmode
18350 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18351 target = gen_reg_rtx (tmode);
18352 pat = GEN_FCN (icode) (target, op0, op1);
18353 if (! pat)
18354 return 0;
18355 emit_insn (pat);
18356 return target;
18358 case ARM_BUILTIN_WSADB:
18359 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18360 case ARM_BUILTIN_WSADH:
18361 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18362 case ARM_BUILTIN_WSADBZ:
18363 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18364 case ARM_BUILTIN_WSADHZ:
18365 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18367 /* Several three-argument builtins. */
18368 case ARM_BUILTIN_WMACS:
18369 case ARM_BUILTIN_WMACU:
18370 case ARM_BUILTIN_WALIGN:
18371 case ARM_BUILTIN_TMIA:
18372 case ARM_BUILTIN_TMIAPH:
18373 case ARM_BUILTIN_TMIATT:
18374 case ARM_BUILTIN_TMIATB:
18375 case ARM_BUILTIN_TMIABT:
18376 case ARM_BUILTIN_TMIABB:
18377 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18378 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18379 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18380 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18381 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18382 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18383 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18384 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18385 : CODE_FOR_iwmmxt_walign);
18386 arg0 = CALL_EXPR_ARG (exp, 0);
18387 arg1 = CALL_EXPR_ARG (exp, 1);
18388 arg2 = CALL_EXPR_ARG (exp, 2);
18389 op0 = expand_normal (arg0);
18390 op1 = expand_normal (arg1);
18391 op2 = expand_normal (arg2);
18392 tmode = insn_data[icode].operand[0].mode;
18393 mode0 = insn_data[icode].operand[1].mode;
18394 mode1 = insn_data[icode].operand[2].mode;
18395 mode2 = insn_data[icode].operand[3].mode;
18397 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18398 op0 = copy_to_mode_reg (mode0, op0);
18399 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18400 op1 = copy_to_mode_reg (mode1, op1);
18401 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18402 op2 = copy_to_mode_reg (mode2, op2);
18403 if (target == 0
18404 || GET_MODE (target) != tmode
18405 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18406 target = gen_reg_rtx (tmode);
18407 pat = GEN_FCN (icode) (target, op0, op1, op2);
18408 if (! pat)
18409 return 0;
18410 emit_insn (pat);
18411 return target;
18413 case ARM_BUILTIN_WZERO:
18414 target = gen_reg_rtx (DImode);
18415 emit_insn (gen_iwmmxt_clrdi (target));
18416 return target;
18418 case ARM_BUILTIN_THREAD_POINTER:
18419 return arm_load_tp (target);
18421 default:
18422 break;
18425 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18426 if (d->code == (const enum arm_builtins) fcode)
18427 return arm_expand_binop_builtin (d->icode, exp, target);
18429 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18430 if (d->code == (const enum arm_builtins) fcode)
18431 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18433 /* @@@ Should really do something sensible here. */
18434 return NULL_RTX;
18437 /* Return the number (counting from 0) of
18438 the least significant set bit in MASK. */
18440 inline static int
18441 number_of_first_bit_set (unsigned mask)
18443 int bit;
18445 for (bit = 0;
18446 (mask & (1 << bit)) == 0;
18447 ++bit)
18448 continue;
18450 return bit;
18453 /* Emit code to push or pop registers to or from the stack. F is the
18454 assembly file. MASK is the registers to push or pop. PUSH is
18455 nonzero if we should push, and zero if we should pop. For debugging
18456 output, if pushing, adjust CFA_OFFSET by the amount of space added
18457 to the stack. REAL_REGS should have the same number of bits set as
18458 MASK, and will be used instead (in the same order) to describe which
18459 registers were saved - this is used to mark the save slots when we
18460 push high registers after moving them to low registers. */
18461 static void
18462 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18463 unsigned long real_regs)
18465 int regno;
18466 int lo_mask = mask & 0xFF;
18467 int pushed_words = 0;
18469 gcc_assert (mask);
18471 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18473 /* Special case. Do not generate a POP PC statement here, do it in
18474 thumb_exit() */
18475 thumb_exit (f, -1);
18476 return;
18479 if (ARM_EABI_UNWIND_TABLES && push)
18481 fprintf (f, "\t.save\t{");
18482 for (regno = 0; regno < 15; regno++)
18484 if (real_regs & (1 << regno))
18486 if (real_regs & ((1 << regno) -1))
18487 fprintf (f, ", ");
18488 asm_fprintf (f, "%r", regno);
18491 fprintf (f, "}\n");
18494 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18496 /* Look at the low registers first. */
18497 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18499 if (lo_mask & 1)
18501 asm_fprintf (f, "%r", regno);
18503 if ((lo_mask & ~1) != 0)
18504 fprintf (f, ", ");
18506 pushed_words++;
18510 if (push && (mask & (1 << LR_REGNUM)))
18512 /* Catch pushing the LR. */
18513 if (mask & 0xFF)
18514 fprintf (f, ", ");
18516 asm_fprintf (f, "%r", LR_REGNUM);
18518 pushed_words++;
18520 else if (!push && (mask & (1 << PC_REGNUM)))
18522 /* Catch popping the PC. */
18523 if (TARGET_INTERWORK || TARGET_BACKTRACE
18524 || crtl->calls_eh_return)
18526 /* The PC is never poped directly, instead
18527 it is popped into r3 and then BX is used. */
18528 fprintf (f, "}\n");
18530 thumb_exit (f, -1);
18532 return;
18534 else
18536 if (mask & 0xFF)
18537 fprintf (f, ", ");
18539 asm_fprintf (f, "%r", PC_REGNUM);
18543 fprintf (f, "}\n");
18545 if (push && pushed_words && dwarf2out_do_frame ())
18547 char *l = dwarf2out_cfi_label (false);
18548 int pushed_mask = real_regs;
18550 *cfa_offset += pushed_words * 4;
18551 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18553 pushed_words = 0;
18554 pushed_mask = real_regs;
18555 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18557 if (pushed_mask & 1)
18558 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18563 /* Generate code to return from a thumb function.
18564 If 'reg_containing_return_addr' is -1, then the return address is
18565 actually on the stack, at the stack pointer. */
18566 static void
18567 thumb_exit (FILE *f, int reg_containing_return_addr)
18569 unsigned regs_available_for_popping;
18570 unsigned regs_to_pop;
18571 int pops_needed;
18572 unsigned available;
18573 unsigned required;
18574 int mode;
18575 int size;
18576 int restore_a4 = FALSE;
18578 /* Compute the registers we need to pop. */
18579 regs_to_pop = 0;
18580 pops_needed = 0;
18582 if (reg_containing_return_addr == -1)
18584 regs_to_pop |= 1 << LR_REGNUM;
18585 ++pops_needed;
18588 if (TARGET_BACKTRACE)
18590 /* Restore the (ARM) frame pointer and stack pointer. */
18591 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18592 pops_needed += 2;
18595 /* If there is nothing to pop then just emit the BX instruction and
18596 return. */
18597 if (pops_needed == 0)
18599 if (crtl->calls_eh_return)
18600 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18602 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18603 return;
18605 /* Otherwise if we are not supporting interworking and we have not created
18606 a backtrace structure and the function was not entered in ARM mode then
18607 just pop the return address straight into the PC. */
18608 else if (!TARGET_INTERWORK
18609 && !TARGET_BACKTRACE
18610 && !is_called_in_ARM_mode (current_function_decl)
18611 && !crtl->calls_eh_return)
18613 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18614 return;
18617 /* Find out how many of the (return) argument registers we can corrupt. */
18618 regs_available_for_popping = 0;
18620 /* If returning via __builtin_eh_return, the bottom three registers
18621 all contain information needed for the return. */
18622 if (crtl->calls_eh_return)
18623 size = 12;
18624 else
18626 /* If we can deduce the registers used from the function's
18627 return value. This is more reliable that examining
18628 df_regs_ever_live_p () because that will be set if the register is
18629 ever used in the function, not just if the register is used
18630 to hold a return value. */
18632 if (crtl->return_rtx != 0)
18633 mode = GET_MODE (crtl->return_rtx);
18634 else
18635 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18637 size = GET_MODE_SIZE (mode);
18639 if (size == 0)
18641 /* In a void function we can use any argument register.
18642 In a function that returns a structure on the stack
18643 we can use the second and third argument registers. */
18644 if (mode == VOIDmode)
18645 regs_available_for_popping =
18646 (1 << ARG_REGISTER (1))
18647 | (1 << ARG_REGISTER (2))
18648 | (1 << ARG_REGISTER (3));
18649 else
18650 regs_available_for_popping =
18651 (1 << ARG_REGISTER (2))
18652 | (1 << ARG_REGISTER (3));
18654 else if (size <= 4)
18655 regs_available_for_popping =
18656 (1 << ARG_REGISTER (2))
18657 | (1 << ARG_REGISTER (3));
18658 else if (size <= 8)
18659 regs_available_for_popping =
18660 (1 << ARG_REGISTER (3));
18663 /* Match registers to be popped with registers into which we pop them. */
18664 for (available = regs_available_for_popping,
18665 required = regs_to_pop;
18666 required != 0 && available != 0;
18667 available &= ~(available & - available),
18668 required &= ~(required & - required))
18669 -- pops_needed;
18671 /* If we have any popping registers left over, remove them. */
18672 if (available > 0)
18673 regs_available_for_popping &= ~available;
18675 /* Otherwise if we need another popping register we can use
18676 the fourth argument register. */
18677 else if (pops_needed)
18679 /* If we have not found any free argument registers and
18680 reg a4 contains the return address, we must move it. */
18681 if (regs_available_for_popping == 0
18682 && reg_containing_return_addr == LAST_ARG_REGNUM)
18684 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18685 reg_containing_return_addr = LR_REGNUM;
18687 else if (size > 12)
18689 /* Register a4 is being used to hold part of the return value,
18690 but we have dire need of a free, low register. */
18691 restore_a4 = TRUE;
18693 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18696 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18698 /* The fourth argument register is available. */
18699 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18701 --pops_needed;
18705 /* Pop as many registers as we can. */
18706 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18707 regs_available_for_popping);
18709 /* Process the registers we popped. */
18710 if (reg_containing_return_addr == -1)
18712 /* The return address was popped into the lowest numbered register. */
18713 regs_to_pop &= ~(1 << LR_REGNUM);
18715 reg_containing_return_addr =
18716 number_of_first_bit_set (regs_available_for_popping);
18718 /* Remove this register for the mask of available registers, so that
18719 the return address will not be corrupted by further pops. */
18720 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18723 /* If we popped other registers then handle them here. */
18724 if (regs_available_for_popping)
18726 int frame_pointer;
18728 /* Work out which register currently contains the frame pointer. */
18729 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18731 /* Move it into the correct place. */
18732 asm_fprintf (f, "\tmov\t%r, %r\n",
18733 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18735 /* (Temporarily) remove it from the mask of popped registers. */
18736 regs_available_for_popping &= ~(1 << frame_pointer);
18737 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18739 if (regs_available_for_popping)
18741 int stack_pointer;
18743 /* We popped the stack pointer as well,
18744 find the register that contains it. */
18745 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18747 /* Move it into the stack register. */
18748 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18750 /* At this point we have popped all necessary registers, so
18751 do not worry about restoring regs_available_for_popping
18752 to its correct value:
18754 assert (pops_needed == 0)
18755 assert (regs_available_for_popping == (1 << frame_pointer))
18756 assert (regs_to_pop == (1 << STACK_POINTER)) */
18758 else
18760 /* Since we have just move the popped value into the frame
18761 pointer, the popping register is available for reuse, and
18762 we know that we still have the stack pointer left to pop. */
18763 regs_available_for_popping |= (1 << frame_pointer);
18767 /* If we still have registers left on the stack, but we no longer have
18768 any registers into which we can pop them, then we must move the return
18769 address into the link register and make available the register that
18770 contained it. */
18771 if (regs_available_for_popping == 0 && pops_needed > 0)
18773 regs_available_for_popping |= 1 << reg_containing_return_addr;
18775 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18776 reg_containing_return_addr);
18778 reg_containing_return_addr = LR_REGNUM;
18781 /* If we have registers left on the stack then pop some more.
18782 We know that at most we will want to pop FP and SP. */
18783 if (pops_needed > 0)
18785 int popped_into;
18786 int move_to;
18788 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18789 regs_available_for_popping);
18791 /* We have popped either FP or SP.
18792 Move whichever one it is into the correct register. */
18793 popped_into = number_of_first_bit_set (regs_available_for_popping);
18794 move_to = number_of_first_bit_set (regs_to_pop);
18796 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18798 regs_to_pop &= ~(1 << move_to);
18800 --pops_needed;
18803 /* If we still have not popped everything then we must have only
18804 had one register available to us and we are now popping the SP. */
18805 if (pops_needed > 0)
18807 int popped_into;
18809 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18810 regs_available_for_popping);
18812 popped_into = number_of_first_bit_set (regs_available_for_popping);
18814 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
18816 assert (regs_to_pop == (1 << STACK_POINTER))
18817 assert (pops_needed == 1)
18821 /* If necessary restore the a4 register. */
18822 if (restore_a4)
18824 if (reg_containing_return_addr != LR_REGNUM)
18826 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18827 reg_containing_return_addr = LR_REGNUM;
18830 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
18833 if (crtl->calls_eh_return)
18834 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18836 /* Return to caller. */
18837 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18841 void
18842 thumb1_final_prescan_insn (rtx insn)
18844 if (flag_print_asm_name)
18845 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
18846 INSN_ADDRESSES (INSN_UID (insn)));
18850 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
18852 unsigned HOST_WIDE_INT mask = 0xff;
18853 int i;
18855 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
18856 if (val == 0) /* XXX */
18857 return 0;
18859 for (i = 0; i < 25; i++)
18860 if ((val & (mask << i)) == val)
18861 return 1;
18863 return 0;
18866 /* Returns nonzero if the current function contains,
18867 or might contain a far jump. */
18868 static int
18869 thumb_far_jump_used_p (void)
18871 rtx insn;
18873 /* This test is only important for leaf functions. */
18874 /* assert (!leaf_function_p ()); */
18876 /* If we have already decided that far jumps may be used,
18877 do not bother checking again, and always return true even if
18878 it turns out that they are not being used. Once we have made
18879 the decision that far jumps are present (and that hence the link
18880 register will be pushed onto the stack) we cannot go back on it. */
18881 if (cfun->machine->far_jump_used)
18882 return 1;
18884 /* If this function is not being called from the prologue/epilogue
18885 generation code then it must be being called from the
18886 INITIAL_ELIMINATION_OFFSET macro. */
18887 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
18889 /* In this case we know that we are being asked about the elimination
18890 of the arg pointer register. If that register is not being used,
18891 then there are no arguments on the stack, and we do not have to
18892 worry that a far jump might force the prologue to push the link
18893 register, changing the stack offsets. In this case we can just
18894 return false, since the presence of far jumps in the function will
18895 not affect stack offsets.
18897 If the arg pointer is live (or if it was live, but has now been
18898 eliminated and so set to dead) then we do have to test to see if
18899 the function might contain a far jump. This test can lead to some
18900 false negatives, since before reload is completed, then length of
18901 branch instructions is not known, so gcc defaults to returning their
18902 longest length, which in turn sets the far jump attribute to true.
18904 A false negative will not result in bad code being generated, but it
18905 will result in a needless push and pop of the link register. We
18906 hope that this does not occur too often.
18908 If we need doubleword stack alignment this could affect the other
18909 elimination offsets so we can't risk getting it wrong. */
18910 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
18911 cfun->machine->arg_pointer_live = 1;
18912 else if (!cfun->machine->arg_pointer_live)
18913 return 0;
18916 /* Check to see if the function contains a branch
18917 insn with the far jump attribute set. */
18918 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18920 if (GET_CODE (insn) == JUMP_INSN
18921 /* Ignore tablejump patterns. */
18922 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18923 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
18924 && get_attr_far_jump (insn) == FAR_JUMP_YES
18927 /* Record the fact that we have decided that
18928 the function does use far jumps. */
18929 cfun->machine->far_jump_used = 1;
18930 return 1;
18934 return 0;
18937 /* Return nonzero if FUNC must be entered in ARM mode. */
18939 is_called_in_ARM_mode (tree func)
18941 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
18943 /* Ignore the problem about functions whose address is taken. */
18944 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
18945 return TRUE;
18947 #ifdef ARM_PE
18948 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
18949 #else
18950 return FALSE;
18951 #endif
18954 /* The bits which aren't usefully expanded as rtl. */
18955 const char *
18956 thumb_unexpanded_epilogue (void)
18958 arm_stack_offsets *offsets;
18959 int regno;
18960 unsigned long live_regs_mask = 0;
18961 int high_regs_pushed = 0;
18962 int had_to_push_lr;
18963 int size;
18965 if (cfun->machine->return_used_this_function != 0)
18966 return "";
18968 if (IS_NAKED (arm_current_func_type ()))
18969 return "";
18971 offsets = arm_get_frame_offsets ();
18972 live_regs_mask = offsets->saved_regs_mask;
18973 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
18975 /* If we can deduce the registers used from the function's return value.
18976 This is more reliable that examining df_regs_ever_live_p () because that
18977 will be set if the register is ever used in the function, not just if
18978 the register is used to hold a return value. */
18979 size = arm_size_return_regs ();
18981 /* The prolog may have pushed some high registers to use as
18982 work registers. e.g. the testsuite file:
18983 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
18984 compiles to produce:
18985 push {r4, r5, r6, r7, lr}
18986 mov r7, r9
18987 mov r6, r8
18988 push {r6, r7}
18989 as part of the prolog. We have to undo that pushing here. */
18991 if (high_regs_pushed)
18993 unsigned long mask = live_regs_mask & 0xff;
18994 int next_hi_reg;
18996 /* The available low registers depend on the size of the value we are
18997 returning. */
18998 if (size <= 12)
18999 mask |= 1 << 3;
19000 if (size <= 8)
19001 mask |= 1 << 2;
19003 if (mask == 0)
19004 /* Oh dear! We have no low registers into which we can pop
19005 high registers! */
19006 internal_error
19007 ("no low registers available for popping high registers");
19009 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19010 if (live_regs_mask & (1 << next_hi_reg))
19011 break;
19013 while (high_regs_pushed)
19015 /* Find lo register(s) into which the high register(s) can
19016 be popped. */
19017 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19019 if (mask & (1 << regno))
19020 high_regs_pushed--;
19021 if (high_regs_pushed == 0)
19022 break;
19025 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19027 /* Pop the values into the low register(s). */
19028 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19030 /* Move the value(s) into the high registers. */
19031 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19033 if (mask & (1 << regno))
19035 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19036 regno);
19038 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19039 if (live_regs_mask & (1 << next_hi_reg))
19040 break;
19044 live_regs_mask &= ~0x0f00;
19047 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19048 live_regs_mask &= 0xff;
19050 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19052 /* Pop the return address into the PC. */
19053 if (had_to_push_lr)
19054 live_regs_mask |= 1 << PC_REGNUM;
19056 /* Either no argument registers were pushed or a backtrace
19057 structure was created which includes an adjusted stack
19058 pointer, so just pop everything. */
19059 if (live_regs_mask)
19060 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19061 live_regs_mask);
19063 /* We have either just popped the return address into the
19064 PC or it is was kept in LR for the entire function. */
19065 if (!had_to_push_lr)
19066 thumb_exit (asm_out_file, LR_REGNUM);
19068 else
19070 /* Pop everything but the return address. */
19071 if (live_regs_mask)
19072 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19073 live_regs_mask);
19075 if (had_to_push_lr)
19077 if (size > 12)
19079 /* We have no free low regs, so save one. */
19080 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19081 LAST_ARG_REGNUM);
19084 /* Get the return address into a temporary register. */
19085 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19086 1 << LAST_ARG_REGNUM);
19088 if (size > 12)
19090 /* Move the return address to lr. */
19091 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19092 LAST_ARG_REGNUM);
19093 /* Restore the low register. */
19094 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19095 IP_REGNUM);
19096 regno = LR_REGNUM;
19098 else
19099 regno = LAST_ARG_REGNUM;
19101 else
19102 regno = LR_REGNUM;
19104 /* Remove the argument registers that were pushed onto the stack. */
19105 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19106 SP_REGNUM, SP_REGNUM,
19107 crtl->args.pretend_args_size);
19109 thumb_exit (asm_out_file, regno);
19112 return "";
19115 /* Functions to save and restore machine-specific function data. */
19116 static struct machine_function *
19117 arm_init_machine_status (void)
19119 struct machine_function *machine;
19120 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
19122 #if ARM_FT_UNKNOWN != 0
19123 machine->func_type = ARM_FT_UNKNOWN;
19124 #endif
19125 return machine;
19128 /* Return an RTX indicating where the return address to the
19129 calling function can be found. */
19131 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19133 if (count != 0)
19134 return NULL_RTX;
19136 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19139 /* Do anything needed before RTL is emitted for each function. */
19140 void
19141 arm_init_expanders (void)
19143 /* Arrange to initialize and mark the machine per-function status. */
19144 init_machine_status = arm_init_machine_status;
19146 /* This is to stop the combine pass optimizing away the alignment
19147 adjustment of va_arg. */
19148 /* ??? It is claimed that this should not be necessary. */
19149 if (cfun)
19150 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19154 /* Like arm_compute_initial_elimination offset. Simpler because there
19155 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19156 to point at the base of the local variables after static stack
19157 space for a function has been allocated. */
19159 HOST_WIDE_INT
19160 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19162 arm_stack_offsets *offsets;
19164 offsets = arm_get_frame_offsets ();
19166 switch (from)
19168 case ARG_POINTER_REGNUM:
19169 switch (to)
19171 case STACK_POINTER_REGNUM:
19172 return offsets->outgoing_args - offsets->saved_args;
19174 case FRAME_POINTER_REGNUM:
19175 return offsets->soft_frame - offsets->saved_args;
19177 case ARM_HARD_FRAME_POINTER_REGNUM:
19178 return offsets->saved_regs - offsets->saved_args;
19180 case THUMB_HARD_FRAME_POINTER_REGNUM:
19181 return offsets->locals_base - offsets->saved_args;
19183 default:
19184 gcc_unreachable ();
19186 break;
19188 case FRAME_POINTER_REGNUM:
19189 switch (to)
19191 case STACK_POINTER_REGNUM:
19192 return offsets->outgoing_args - offsets->soft_frame;
19194 case ARM_HARD_FRAME_POINTER_REGNUM:
19195 return offsets->saved_regs - offsets->soft_frame;
19197 case THUMB_HARD_FRAME_POINTER_REGNUM:
19198 return offsets->locals_base - offsets->soft_frame;
19200 default:
19201 gcc_unreachable ();
19203 break;
19205 default:
19206 gcc_unreachable ();
19210 /* Generate the rest of a function's prologue. */
19211 void
19212 thumb1_expand_prologue (void)
19214 rtx insn, dwarf;
19216 HOST_WIDE_INT amount;
19217 arm_stack_offsets *offsets;
19218 unsigned long func_type;
19219 int regno;
19220 unsigned long live_regs_mask;
19222 func_type = arm_current_func_type ();
19224 /* Naked functions don't have prologues. */
19225 if (IS_NAKED (func_type))
19226 return;
19228 if (IS_INTERRUPT (func_type))
19230 error ("interrupt Service Routines cannot be coded in Thumb mode");
19231 return;
19234 offsets = arm_get_frame_offsets ();
19235 live_regs_mask = offsets->saved_regs_mask;
19236 /* Load the pic register before setting the frame pointer,
19237 so we can use r7 as a temporary work register. */
19238 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19239 arm_load_pic_register (live_regs_mask);
19241 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19242 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19243 stack_pointer_rtx);
19245 amount = offsets->outgoing_args - offsets->saved_regs;
19246 if (amount)
19248 if (amount < 512)
19250 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19251 GEN_INT (- amount)));
19252 RTX_FRAME_RELATED_P (insn) = 1;
19254 else
19256 rtx reg;
19258 /* The stack decrement is too big for an immediate value in a single
19259 insn. In theory we could issue multiple subtracts, but after
19260 three of them it becomes more space efficient to place the full
19261 value in the constant pool and load into a register. (Also the
19262 ARM debugger really likes to see only one stack decrement per
19263 function). So instead we look for a scratch register into which
19264 we can load the decrement, and then we subtract this from the
19265 stack pointer. Unfortunately on the thumb the only available
19266 scratch registers are the argument registers, and we cannot use
19267 these as they may hold arguments to the function. Instead we
19268 attempt to locate a call preserved register which is used by this
19269 function. If we can find one, then we know that it will have
19270 been pushed at the start of the prologue and so we can corrupt
19271 it now. */
19272 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19273 if (live_regs_mask & (1 << regno))
19274 break;
19276 gcc_assert(regno <= LAST_LO_REGNUM);
19278 reg = gen_rtx_REG (SImode, regno);
19280 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19282 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19283 stack_pointer_rtx, reg));
19284 RTX_FRAME_RELATED_P (insn) = 1;
19285 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19286 plus_constant (stack_pointer_rtx,
19287 -amount));
19288 RTX_FRAME_RELATED_P (dwarf) = 1;
19289 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19293 if (frame_pointer_needed)
19294 thumb_set_frame_pointer (offsets);
19296 /* If we are profiling, make sure no instructions are scheduled before
19297 the call to mcount. Similarly if the user has requested no
19298 scheduling in the prolog. Similarly if we want non-call exceptions
19299 using the EABI unwinder, to prevent faulting instructions from being
19300 swapped with a stack adjustment. */
19301 if (crtl->profile || !TARGET_SCHED_PROLOG
19302 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19303 emit_insn (gen_blockage ());
19305 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19306 if (live_regs_mask & 0xff)
19307 cfun->machine->lr_save_eliminated = 0;
19311 void
19312 thumb1_expand_epilogue (void)
19314 HOST_WIDE_INT amount;
19315 arm_stack_offsets *offsets;
19316 int regno;
19318 /* Naked functions don't have prologues. */
19319 if (IS_NAKED (arm_current_func_type ()))
19320 return;
19322 offsets = arm_get_frame_offsets ();
19323 amount = offsets->outgoing_args - offsets->saved_regs;
19325 if (frame_pointer_needed)
19327 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19328 amount = offsets->locals_base - offsets->saved_regs;
19331 gcc_assert (amount >= 0);
19332 if (amount)
19334 if (amount < 512)
19335 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19336 GEN_INT (amount)));
19337 else
19339 /* r3 is always free in the epilogue. */
19340 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19342 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19343 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19347 /* Emit a USE (stack_pointer_rtx), so that
19348 the stack adjustment will not be deleted. */
19349 emit_insn (gen_prologue_use (stack_pointer_rtx));
19351 if (crtl->profile || !TARGET_SCHED_PROLOG)
19352 emit_insn (gen_blockage ());
19354 /* Emit a clobber for each insn that will be restored in the epilogue,
19355 so that flow2 will get register lifetimes correct. */
19356 for (regno = 0; regno < 13; regno++)
19357 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19358 emit_clobber (gen_rtx_REG (SImode, regno));
19360 if (! df_regs_ever_live_p (LR_REGNUM))
19361 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19364 static void
19365 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19367 arm_stack_offsets *offsets;
19368 unsigned long live_regs_mask = 0;
19369 unsigned long l_mask;
19370 unsigned high_regs_pushed = 0;
19371 int cfa_offset = 0;
19372 int regno;
19374 if (IS_NAKED (arm_current_func_type ()))
19375 return;
19377 if (is_called_in_ARM_mode (current_function_decl))
19379 const char * name;
19381 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19382 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19383 == SYMBOL_REF);
19384 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19386 /* Generate code sequence to switch us into Thumb mode. */
19387 /* The .code 32 directive has already been emitted by
19388 ASM_DECLARE_FUNCTION_NAME. */
19389 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19390 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19392 /* Generate a label, so that the debugger will notice the
19393 change in instruction sets. This label is also used by
19394 the assembler to bypass the ARM code when this function
19395 is called from a Thumb encoded function elsewhere in the
19396 same file. Hence the definition of STUB_NAME here must
19397 agree with the definition in gas/config/tc-arm.c. */
19399 #define STUB_NAME ".real_start_of"
19401 fprintf (f, "\t.code\t16\n");
19402 #ifdef ARM_PE
19403 if (arm_dllexport_name_p (name))
19404 name = arm_strip_name_encoding (name);
19405 #endif
19406 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19407 fprintf (f, "\t.thumb_func\n");
19408 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19411 if (crtl->args.pretend_args_size)
19413 /* Output unwind directive for the stack adjustment. */
19414 if (ARM_EABI_UNWIND_TABLES)
19415 fprintf (f, "\t.pad #%d\n",
19416 crtl->args.pretend_args_size);
19418 if (cfun->machine->uses_anonymous_args)
19420 int num_pushes;
19422 fprintf (f, "\tpush\t{");
19424 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19426 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19427 regno <= LAST_ARG_REGNUM;
19428 regno++)
19429 asm_fprintf (f, "%r%s", regno,
19430 regno == LAST_ARG_REGNUM ? "" : ", ");
19432 fprintf (f, "}\n");
19434 else
19435 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19436 SP_REGNUM, SP_REGNUM,
19437 crtl->args.pretend_args_size);
19439 /* We don't need to record the stores for unwinding (would it
19440 help the debugger any if we did?), but record the change in
19441 the stack pointer. */
19442 if (dwarf2out_do_frame ())
19444 char *l = dwarf2out_cfi_label (false);
19446 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19447 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19451 /* Get the registers we are going to push. */
19452 offsets = arm_get_frame_offsets ();
19453 live_regs_mask = offsets->saved_regs_mask;
19454 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19455 l_mask = live_regs_mask & 0x40ff;
19456 /* Then count how many other high registers will need to be pushed. */
19457 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19459 if (TARGET_BACKTRACE)
19461 unsigned offset;
19462 unsigned work_register;
19464 /* We have been asked to create a stack backtrace structure.
19465 The code looks like this:
19467 0 .align 2
19468 0 func:
19469 0 sub SP, #16 Reserve space for 4 registers.
19470 2 push {R7} Push low registers.
19471 4 add R7, SP, #20 Get the stack pointer before the push.
19472 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19473 8 mov R7, PC Get hold of the start of this code plus 12.
19474 10 str R7, [SP, #16] Store it.
19475 12 mov R7, FP Get hold of the current frame pointer.
19476 14 str R7, [SP, #4] Store it.
19477 16 mov R7, LR Get hold of the current return address.
19478 18 str R7, [SP, #12] Store it.
19479 20 add R7, SP, #16 Point at the start of the backtrace structure.
19480 22 mov FP, R7 Put this value into the frame pointer. */
19482 work_register = thumb_find_work_register (live_regs_mask);
19484 if (ARM_EABI_UNWIND_TABLES)
19485 asm_fprintf (f, "\t.pad #16\n");
19487 asm_fprintf
19488 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19489 SP_REGNUM, SP_REGNUM);
19491 if (dwarf2out_do_frame ())
19493 char *l = dwarf2out_cfi_label (false);
19495 cfa_offset = cfa_offset + 16;
19496 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19499 if (l_mask)
19501 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19502 offset = bit_count (l_mask) * UNITS_PER_WORD;
19504 else
19505 offset = 0;
19507 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19508 offset + 16 + crtl->args.pretend_args_size);
19510 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19511 offset + 4);
19513 /* Make sure that the instruction fetching the PC is in the right place
19514 to calculate "start of backtrace creation code + 12". */
19515 if (l_mask)
19517 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19518 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19519 offset + 12);
19520 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19521 ARM_HARD_FRAME_POINTER_REGNUM);
19522 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19523 offset);
19525 else
19527 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19528 ARM_HARD_FRAME_POINTER_REGNUM);
19529 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19530 offset);
19531 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19532 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19533 offset + 12);
19536 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19537 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19538 offset + 8);
19539 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19540 offset + 12);
19541 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19542 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19544 /* Optimization: If we are not pushing any low registers but we are going
19545 to push some high registers then delay our first push. This will just
19546 be a push of LR and we can combine it with the push of the first high
19547 register. */
19548 else if ((l_mask & 0xff) != 0
19549 || (high_regs_pushed == 0 && l_mask))
19550 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19552 if (high_regs_pushed)
19554 unsigned pushable_regs;
19555 unsigned next_hi_reg;
19557 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19558 if (live_regs_mask & (1 << next_hi_reg))
19559 break;
19561 pushable_regs = l_mask & 0xff;
19563 if (pushable_regs == 0)
19564 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19566 while (high_regs_pushed > 0)
19568 unsigned long real_regs_mask = 0;
19570 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19572 if (pushable_regs & (1 << regno))
19574 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19576 high_regs_pushed --;
19577 real_regs_mask |= (1 << next_hi_reg);
19579 if (high_regs_pushed)
19581 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19582 next_hi_reg --)
19583 if (live_regs_mask & (1 << next_hi_reg))
19584 break;
19586 else
19588 pushable_regs &= ~((1 << regno) - 1);
19589 break;
19594 /* If we had to find a work register and we have not yet
19595 saved the LR then add it to the list of regs to push. */
19596 if (l_mask == (1 << LR_REGNUM))
19598 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19599 1, &cfa_offset,
19600 real_regs_mask | (1 << LR_REGNUM));
19601 l_mask = 0;
19603 else
19604 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19609 /* Handle the case of a double word load into a low register from
19610 a computed memory address. The computed address may involve a
19611 register which is overwritten by the load. */
19612 const char *
19613 thumb_load_double_from_address (rtx *operands)
19615 rtx addr;
19616 rtx base;
19617 rtx offset;
19618 rtx arg1;
19619 rtx arg2;
19621 gcc_assert (GET_CODE (operands[0]) == REG);
19622 gcc_assert (GET_CODE (operands[1]) == MEM);
19624 /* Get the memory address. */
19625 addr = XEXP (operands[1], 0);
19627 /* Work out how the memory address is computed. */
19628 switch (GET_CODE (addr))
19630 case REG:
19631 operands[2] = adjust_address (operands[1], SImode, 4);
19633 if (REGNO (operands[0]) == REGNO (addr))
19635 output_asm_insn ("ldr\t%H0, %2", operands);
19636 output_asm_insn ("ldr\t%0, %1", operands);
19638 else
19640 output_asm_insn ("ldr\t%0, %1", operands);
19641 output_asm_insn ("ldr\t%H0, %2", operands);
19643 break;
19645 case CONST:
19646 /* Compute <address> + 4 for the high order load. */
19647 operands[2] = adjust_address (operands[1], SImode, 4);
19649 output_asm_insn ("ldr\t%0, %1", operands);
19650 output_asm_insn ("ldr\t%H0, %2", operands);
19651 break;
19653 case PLUS:
19654 arg1 = XEXP (addr, 0);
19655 arg2 = XEXP (addr, 1);
19657 if (CONSTANT_P (arg1))
19658 base = arg2, offset = arg1;
19659 else
19660 base = arg1, offset = arg2;
19662 gcc_assert (GET_CODE (base) == REG);
19664 /* Catch the case of <address> = <reg> + <reg> */
19665 if (GET_CODE (offset) == REG)
19667 int reg_offset = REGNO (offset);
19668 int reg_base = REGNO (base);
19669 int reg_dest = REGNO (operands[0]);
19671 /* Add the base and offset registers together into the
19672 higher destination register. */
19673 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19674 reg_dest + 1, reg_base, reg_offset);
19676 /* Load the lower destination register from the address in
19677 the higher destination register. */
19678 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19679 reg_dest, reg_dest + 1);
19681 /* Load the higher destination register from its own address
19682 plus 4. */
19683 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19684 reg_dest + 1, reg_dest + 1);
19686 else
19688 /* Compute <address> + 4 for the high order load. */
19689 operands[2] = adjust_address (operands[1], SImode, 4);
19691 /* If the computed address is held in the low order register
19692 then load the high order register first, otherwise always
19693 load the low order register first. */
19694 if (REGNO (operands[0]) == REGNO (base))
19696 output_asm_insn ("ldr\t%H0, %2", operands);
19697 output_asm_insn ("ldr\t%0, %1", operands);
19699 else
19701 output_asm_insn ("ldr\t%0, %1", operands);
19702 output_asm_insn ("ldr\t%H0, %2", operands);
19705 break;
19707 case LABEL_REF:
19708 /* With no registers to worry about we can just load the value
19709 directly. */
19710 operands[2] = adjust_address (operands[1], SImode, 4);
19712 output_asm_insn ("ldr\t%H0, %2", operands);
19713 output_asm_insn ("ldr\t%0, %1", operands);
19714 break;
19716 default:
19717 gcc_unreachable ();
19720 return "";
19723 const char *
19724 thumb_output_move_mem_multiple (int n, rtx *operands)
19726 rtx tmp;
19728 switch (n)
19730 case 2:
19731 if (REGNO (operands[4]) > REGNO (operands[5]))
19733 tmp = operands[4];
19734 operands[4] = operands[5];
19735 operands[5] = tmp;
19737 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19738 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19739 break;
19741 case 3:
19742 if (REGNO (operands[4]) > REGNO (operands[5]))
19744 tmp = operands[4];
19745 operands[4] = operands[5];
19746 operands[5] = tmp;
19748 if (REGNO (operands[5]) > REGNO (operands[6]))
19750 tmp = operands[5];
19751 operands[5] = operands[6];
19752 operands[6] = tmp;
19754 if (REGNO (operands[4]) > REGNO (operands[5]))
19756 tmp = operands[4];
19757 operands[4] = operands[5];
19758 operands[5] = tmp;
19761 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19762 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19763 break;
19765 default:
19766 gcc_unreachable ();
19769 return "";
19772 /* Output a call-via instruction for thumb state. */
19773 const char *
19774 thumb_call_via_reg (rtx reg)
19776 int regno = REGNO (reg);
19777 rtx *labelp;
19779 gcc_assert (regno < LR_REGNUM);
19781 /* If we are in the normal text section we can use a single instance
19782 per compilation unit. If we are doing function sections, then we need
19783 an entry per section, since we can't rely on reachability. */
19784 if (in_section == text_section)
19786 thumb_call_reg_needed = 1;
19788 if (thumb_call_via_label[regno] == NULL)
19789 thumb_call_via_label[regno] = gen_label_rtx ();
19790 labelp = thumb_call_via_label + regno;
19792 else
19794 if (cfun->machine->call_via[regno] == NULL)
19795 cfun->machine->call_via[regno] = gen_label_rtx ();
19796 labelp = cfun->machine->call_via + regno;
19799 output_asm_insn ("bl\t%a0", labelp);
19800 return "";
19803 /* Routines for generating rtl. */
19804 void
19805 thumb_expand_movmemqi (rtx *operands)
19807 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
19808 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
19809 HOST_WIDE_INT len = INTVAL (operands[2]);
19810 HOST_WIDE_INT offset = 0;
19812 while (len >= 12)
19814 emit_insn (gen_movmem12b (out, in, out, in));
19815 len -= 12;
19818 if (len >= 8)
19820 emit_insn (gen_movmem8b (out, in, out, in));
19821 len -= 8;
19824 if (len >= 4)
19826 rtx reg = gen_reg_rtx (SImode);
19827 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
19828 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
19829 len -= 4;
19830 offset += 4;
19833 if (len >= 2)
19835 rtx reg = gen_reg_rtx (HImode);
19836 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
19837 plus_constant (in, offset))));
19838 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
19839 reg));
19840 len -= 2;
19841 offset += 2;
19844 if (len)
19846 rtx reg = gen_reg_rtx (QImode);
19847 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
19848 plus_constant (in, offset))));
19849 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
19850 reg));
19854 void
19855 thumb_reload_out_hi (rtx *operands)
19857 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
19860 /* Handle reading a half-word from memory during reload. */
19861 void
19862 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
19864 gcc_unreachable ();
19867 /* Return the length of a function name prefix
19868 that starts with the character 'c'. */
19869 static int
19870 arm_get_strip_length (int c)
19872 switch (c)
19874 ARM_NAME_ENCODING_LENGTHS
19875 default: return 0;
19879 /* Return a pointer to a function's name with any
19880 and all prefix encodings stripped from it. */
19881 const char *
19882 arm_strip_name_encoding (const char *name)
19884 int skip;
19886 while ((skip = arm_get_strip_length (* name)))
19887 name += skip;
19889 return name;
19892 /* If there is a '*' anywhere in the name's prefix, then
19893 emit the stripped name verbatim, otherwise prepend an
19894 underscore if leading underscores are being used. */
19895 void
19896 arm_asm_output_labelref (FILE *stream, const char *name)
19898 int skip;
19899 int verbatim = 0;
19901 while ((skip = arm_get_strip_length (* name)))
19903 verbatim |= (*name == '*');
19904 name += skip;
19907 if (verbatim)
19908 fputs (name, stream);
19909 else
19910 asm_fprintf (stream, "%U%s", name);
19913 static void
19914 arm_file_start (void)
19916 int val;
19918 if (TARGET_UNIFIED_ASM)
19919 asm_fprintf (asm_out_file, "\t.syntax unified\n");
19921 if (TARGET_BPABI)
19923 const char *fpu_name;
19924 if (arm_select[0].string)
19925 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
19926 else if (arm_select[1].string)
19927 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
19928 else
19929 asm_fprintf (asm_out_file, "\t.cpu %s\n",
19930 all_cores[arm_default_cpu].name);
19932 if (TARGET_SOFT_FLOAT)
19934 if (TARGET_VFP)
19935 fpu_name = "softvfp";
19936 else
19937 fpu_name = "softfpa";
19939 else
19941 fpu_name = arm_fpu_desc->name;
19942 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
19944 if (TARGET_HARD_FLOAT)
19945 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
19946 if (TARGET_HARD_FLOAT_ABI)
19947 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
19950 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
19952 /* Some of these attributes only apply when the corresponding features
19953 are used. However we don't have any easy way of figuring this out.
19954 Conservatively record the setting that would have been used. */
19956 /* Tag_ABI_FP_rounding. */
19957 if (flag_rounding_math)
19958 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
19959 if (!flag_unsafe_math_optimizations)
19961 /* Tag_ABI_FP_denomal. */
19962 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
19963 /* Tag_ABI_FP_exceptions. */
19964 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
19966 /* Tag_ABI_FP_user_exceptions. */
19967 if (flag_signaling_nans)
19968 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
19969 /* Tag_ABI_FP_number_model. */
19970 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
19971 flag_finite_math_only ? 1 : 3);
19973 /* Tag_ABI_align8_needed. */
19974 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
19975 /* Tag_ABI_align8_preserved. */
19976 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
19977 /* Tag_ABI_enum_size. */
19978 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
19979 flag_short_enums ? 1 : 2);
19981 /* Tag_ABI_optimization_goals. */
19982 if (optimize_size)
19983 val = 4;
19984 else if (optimize >= 2)
19985 val = 2;
19986 else if (optimize)
19987 val = 1;
19988 else
19989 val = 6;
19990 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
19992 /* Tag_ABI_FP_16bit_format. */
19993 if (arm_fp16_format)
19994 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
19995 (int)arm_fp16_format);
19997 if (arm_lang_output_object_attributes_hook)
19998 arm_lang_output_object_attributes_hook();
20000 default_file_start();
20003 static void
20004 arm_file_end (void)
20006 int regno;
20008 if (NEED_INDICATE_EXEC_STACK)
20009 /* Add .note.GNU-stack. */
20010 file_end_indicate_exec_stack ();
20012 if (! thumb_call_reg_needed)
20013 return;
20015 switch_to_section (text_section);
20016 asm_fprintf (asm_out_file, "\t.code 16\n");
20017 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20019 for (regno = 0; regno < LR_REGNUM; regno++)
20021 rtx label = thumb_call_via_label[regno];
20023 if (label != 0)
20025 targetm.asm_out.internal_label (asm_out_file, "L",
20026 CODE_LABEL_NUMBER (label));
20027 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20032 #ifndef ARM_PE
20033 /* Symbols in the text segment can be accessed without indirecting via the
20034 constant pool; it may take an extra binary operation, but this is still
20035 faster than indirecting via memory. Don't do this when not optimizing,
20036 since we won't be calculating al of the offsets necessary to do this
20037 simplification. */
20039 static void
20040 arm_encode_section_info (tree decl, rtx rtl, int first)
20042 if (optimize > 0 && TREE_CONSTANT (decl))
20043 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20045 default_encode_section_info (decl, rtl, first);
20047 #endif /* !ARM_PE */
20049 static void
20050 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20052 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20053 && !strcmp (prefix, "L"))
20055 arm_ccfsm_state = 0;
20056 arm_target_insn = NULL;
20058 default_internal_label (stream, prefix, labelno);
20061 /* Output code to add DELTA to the first argument, and then jump
20062 to FUNCTION. Used for C++ multiple inheritance. */
20063 static void
20064 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20065 HOST_WIDE_INT delta,
20066 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20067 tree function)
20069 static int thunk_label = 0;
20070 char label[256];
20071 char labelpc[256];
20072 int mi_delta = delta;
20073 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20074 int shift = 0;
20075 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20076 ? 1 : 0);
20077 if (mi_delta < 0)
20078 mi_delta = - mi_delta;
20080 if (TARGET_THUMB1)
20082 int labelno = thunk_label++;
20083 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20084 /* Thunks are entered in arm mode when avaiable. */
20085 if (TARGET_THUMB1_ONLY)
20087 /* push r3 so we can use it as a temporary. */
20088 /* TODO: Omit this save if r3 is not used. */
20089 fputs ("\tpush {r3}\n", file);
20090 fputs ("\tldr\tr3, ", file);
20092 else
20094 fputs ("\tldr\tr12, ", file);
20096 assemble_name (file, label);
20097 fputc ('\n', file);
20098 if (flag_pic)
20100 /* If we are generating PIC, the ldr instruction below loads
20101 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20102 the address of the add + 8, so we have:
20104 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20105 = target + 1.
20107 Note that we have "+ 1" because some versions of GNU ld
20108 don't set the low bit of the result for R_ARM_REL32
20109 relocations against thumb function symbols.
20110 On ARMv6M this is +4, not +8. */
20111 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20112 assemble_name (file, labelpc);
20113 fputs (":\n", file);
20114 if (TARGET_THUMB1_ONLY)
20116 /* This is 2 insns after the start of the thunk, so we know it
20117 is 4-byte aligned. */
20118 fputs ("\tadd\tr3, pc, r3\n", file);
20119 fputs ("\tmov r12, r3\n", file);
20121 else
20122 fputs ("\tadd\tr12, pc, r12\n", file);
20124 else if (TARGET_THUMB1_ONLY)
20125 fputs ("\tmov r12, r3\n", file);
20127 if (TARGET_THUMB1_ONLY)
20129 if (mi_delta > 255)
20131 fputs ("\tldr\tr3, ", file);
20132 assemble_name (file, label);
20133 fputs ("+4\n", file);
20134 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20135 mi_op, this_regno, this_regno);
20137 else if (mi_delta != 0)
20139 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20140 mi_op, this_regno, this_regno,
20141 mi_delta);
20144 else
20146 /* TODO: Use movw/movt for large constants when available. */
20147 while (mi_delta != 0)
20149 if ((mi_delta & (3 << shift)) == 0)
20150 shift += 2;
20151 else
20153 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20154 mi_op, this_regno, this_regno,
20155 mi_delta & (0xff << shift));
20156 mi_delta &= ~(0xff << shift);
20157 shift += 8;
20161 if (TARGET_THUMB1)
20163 if (TARGET_THUMB1_ONLY)
20164 fputs ("\tpop\t{r3}\n", file);
20166 fprintf (file, "\tbx\tr12\n");
20167 ASM_OUTPUT_ALIGN (file, 2);
20168 assemble_name (file, label);
20169 fputs (":\n", file);
20170 if (flag_pic)
20172 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20173 rtx tem = XEXP (DECL_RTL (function), 0);
20174 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20175 tem = gen_rtx_MINUS (GET_MODE (tem),
20176 tem,
20177 gen_rtx_SYMBOL_REF (Pmode,
20178 ggc_strdup (labelpc)));
20179 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20181 else
20182 /* Output ".word .LTHUNKn". */
20183 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20185 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20186 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20188 else
20190 fputs ("\tb\t", file);
20191 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20192 if (NEED_PLT_RELOC)
20193 fputs ("(PLT)", file);
20194 fputc ('\n', file);
20199 arm_emit_vector_const (FILE *file, rtx x)
20201 int i;
20202 const char * pattern;
20204 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20206 switch (GET_MODE (x))
20208 case V2SImode: pattern = "%08x"; break;
20209 case V4HImode: pattern = "%04x"; break;
20210 case V8QImode: pattern = "%02x"; break;
20211 default: gcc_unreachable ();
20214 fprintf (file, "0x");
20215 for (i = CONST_VECTOR_NUNITS (x); i--;)
20217 rtx element;
20219 element = CONST_VECTOR_ELT (x, i);
20220 fprintf (file, pattern, INTVAL (element));
20223 return 1;
20226 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20227 HFmode constant pool entries are actually loaded with ldr. */
20228 void
20229 arm_emit_fp16_const (rtx c)
20231 REAL_VALUE_TYPE r;
20232 long bits;
20234 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20235 bits = real_to_target (NULL, &r, HFmode);
20236 if (WORDS_BIG_ENDIAN)
20237 assemble_zeros (2);
20238 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20239 if (!WORDS_BIG_ENDIAN)
20240 assemble_zeros (2);
20243 const char *
20244 arm_output_load_gr (rtx *operands)
20246 rtx reg;
20247 rtx offset;
20248 rtx wcgr;
20249 rtx sum;
20251 if (GET_CODE (operands [1]) != MEM
20252 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20253 || GET_CODE (reg = XEXP (sum, 0)) != REG
20254 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20255 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20256 return "wldrw%?\t%0, %1";
20258 /* Fix up an out-of-range load of a GR register. */
20259 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20260 wcgr = operands[0];
20261 operands[0] = reg;
20262 output_asm_insn ("ldr%?\t%0, %1", operands);
20264 operands[0] = wcgr;
20265 operands[1] = reg;
20266 output_asm_insn ("tmcr%?\t%0, %1", operands);
20267 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20269 return "";
20272 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20274 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20275 named arg and all anonymous args onto the stack.
20276 XXX I know the prologue shouldn't be pushing registers, but it is faster
20277 that way. */
20279 static void
20280 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20281 enum machine_mode mode,
20282 tree type,
20283 int *pretend_size,
20284 int second_time ATTRIBUTE_UNUSED)
20286 int nregs;
20288 cfun->machine->uses_anonymous_args = 1;
20289 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20291 nregs = pcum->aapcs_ncrn;
20292 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20293 nregs++;
20295 else
20296 nregs = pcum->nregs;
20298 if (nregs < NUM_ARG_REGS)
20299 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20302 /* Return nonzero if the CONSUMER instruction (a store) does not need
20303 PRODUCER's value to calculate the address. */
20306 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20308 rtx value = PATTERN (producer);
20309 rtx addr = PATTERN (consumer);
20311 if (GET_CODE (value) == COND_EXEC)
20312 value = COND_EXEC_CODE (value);
20313 if (GET_CODE (value) == PARALLEL)
20314 value = XVECEXP (value, 0, 0);
20315 value = XEXP (value, 0);
20316 if (GET_CODE (addr) == COND_EXEC)
20317 addr = COND_EXEC_CODE (addr);
20318 if (GET_CODE (addr) == PARALLEL)
20319 addr = XVECEXP (addr, 0, 0);
20320 addr = XEXP (addr, 0);
20322 return !reg_overlap_mentioned_p (value, addr);
20325 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20326 have an early register shift value or amount dependency on the
20327 result of PRODUCER. */
20330 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20332 rtx value = PATTERN (producer);
20333 rtx op = PATTERN (consumer);
20334 rtx early_op;
20336 if (GET_CODE (value) == COND_EXEC)
20337 value = COND_EXEC_CODE (value);
20338 if (GET_CODE (value) == PARALLEL)
20339 value = XVECEXP (value, 0, 0);
20340 value = XEXP (value, 0);
20341 if (GET_CODE (op) == COND_EXEC)
20342 op = COND_EXEC_CODE (op);
20343 if (GET_CODE (op) == PARALLEL)
20344 op = XVECEXP (op, 0, 0);
20345 op = XEXP (op, 1);
20347 early_op = XEXP (op, 0);
20348 /* This is either an actual independent shift, or a shift applied to
20349 the first operand of another operation. We want the whole shift
20350 operation. */
20351 if (GET_CODE (early_op) == REG)
20352 early_op = op;
20354 return !reg_overlap_mentioned_p (value, early_op);
20357 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20358 have an early register shift value dependency on the result of
20359 PRODUCER. */
20362 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20364 rtx value = PATTERN (producer);
20365 rtx op = PATTERN (consumer);
20366 rtx early_op;
20368 if (GET_CODE (value) == COND_EXEC)
20369 value = COND_EXEC_CODE (value);
20370 if (GET_CODE (value) == PARALLEL)
20371 value = XVECEXP (value, 0, 0);
20372 value = XEXP (value, 0);
20373 if (GET_CODE (op) == COND_EXEC)
20374 op = COND_EXEC_CODE (op);
20375 if (GET_CODE (op) == PARALLEL)
20376 op = XVECEXP (op, 0, 0);
20377 op = XEXP (op, 1);
20379 early_op = XEXP (op, 0);
20381 /* This is either an actual independent shift, or a shift applied to
20382 the first operand of another operation. We want the value being
20383 shifted, in either case. */
20384 if (GET_CODE (early_op) != REG)
20385 early_op = XEXP (early_op, 0);
20387 return !reg_overlap_mentioned_p (value, early_op);
20390 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20391 have an early register mult dependency on the result of
20392 PRODUCER. */
20395 arm_no_early_mul_dep (rtx producer, rtx consumer)
20397 rtx value = PATTERN (producer);
20398 rtx op = PATTERN (consumer);
20400 if (GET_CODE (value) == COND_EXEC)
20401 value = COND_EXEC_CODE (value);
20402 if (GET_CODE (value) == PARALLEL)
20403 value = XVECEXP (value, 0, 0);
20404 value = XEXP (value, 0);
20405 if (GET_CODE (op) == COND_EXEC)
20406 op = COND_EXEC_CODE (op);
20407 if (GET_CODE (op) == PARALLEL)
20408 op = XVECEXP (op, 0, 0);
20409 op = XEXP (op, 1);
20411 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20413 if (GET_CODE (XEXP (op, 0)) == MULT)
20414 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20415 else
20416 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20419 return 0;
20422 /* We can't rely on the caller doing the proper promotion when
20423 using APCS or ATPCS. */
20425 static bool
20426 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20428 return !TARGET_AAPCS_BASED;
20431 static enum machine_mode
20432 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20433 enum machine_mode mode,
20434 int *punsignedp ATTRIBUTE_UNUSED,
20435 const_tree fntype ATTRIBUTE_UNUSED,
20436 int for_return ATTRIBUTE_UNUSED)
20438 if (GET_MODE_CLASS (mode) == MODE_INT
20439 && GET_MODE_SIZE (mode) < 4)
20440 return SImode;
20442 return mode;
20445 /* AAPCS based ABIs use short enums by default. */
20447 static bool
20448 arm_default_short_enums (void)
20450 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20454 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20456 static bool
20457 arm_align_anon_bitfield (void)
20459 return TARGET_AAPCS_BASED;
20463 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20465 static tree
20466 arm_cxx_guard_type (void)
20468 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20471 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20472 has an accumulator dependency on the result of the producer (a
20473 multiplication instruction) and no other dependency on that result. */
20475 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20477 rtx mul = PATTERN (producer);
20478 rtx mac = PATTERN (consumer);
20479 rtx mul_result;
20480 rtx mac_op0, mac_op1, mac_acc;
20482 if (GET_CODE (mul) == COND_EXEC)
20483 mul = COND_EXEC_CODE (mul);
20484 if (GET_CODE (mac) == COND_EXEC)
20485 mac = COND_EXEC_CODE (mac);
20487 /* Check that mul is of the form (set (...) (mult ...))
20488 and mla is of the form (set (...) (plus (mult ...) (...))). */
20489 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20490 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20491 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20492 return 0;
20494 mul_result = XEXP (mul, 0);
20495 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20496 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20497 mac_acc = XEXP (XEXP (mac, 1), 1);
20499 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20500 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20501 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20505 /* The EABI says test the least significant bit of a guard variable. */
20507 static bool
20508 arm_cxx_guard_mask_bit (void)
20510 return TARGET_AAPCS_BASED;
20514 /* The EABI specifies that all array cookies are 8 bytes long. */
20516 static tree
20517 arm_get_cookie_size (tree type)
20519 tree size;
20521 if (!TARGET_AAPCS_BASED)
20522 return default_cxx_get_cookie_size (type);
20524 size = build_int_cst (sizetype, 8);
20525 return size;
20529 /* The EABI says that array cookies should also contain the element size. */
20531 static bool
20532 arm_cookie_has_size (void)
20534 return TARGET_AAPCS_BASED;
20538 /* The EABI says constructors and destructors should return a pointer to
20539 the object constructed/destroyed. */
20541 static bool
20542 arm_cxx_cdtor_returns_this (void)
20544 return TARGET_AAPCS_BASED;
20547 /* The EABI says that an inline function may never be the key
20548 method. */
20550 static bool
20551 arm_cxx_key_method_may_be_inline (void)
20553 return !TARGET_AAPCS_BASED;
20556 static void
20557 arm_cxx_determine_class_data_visibility (tree decl)
20559 if (!TARGET_AAPCS_BASED
20560 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20561 return;
20563 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20564 is exported. However, on systems without dynamic vague linkage,
20565 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20566 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20567 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20568 else
20569 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20570 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20573 static bool
20574 arm_cxx_class_data_always_comdat (void)
20576 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20577 vague linkage if the class has no key function. */
20578 return !TARGET_AAPCS_BASED;
20582 /* The EABI says __aeabi_atexit should be used to register static
20583 destructors. */
20585 static bool
20586 arm_cxx_use_aeabi_atexit (void)
20588 return TARGET_AAPCS_BASED;
20592 void
20593 arm_set_return_address (rtx source, rtx scratch)
20595 arm_stack_offsets *offsets;
20596 HOST_WIDE_INT delta;
20597 rtx addr;
20598 unsigned long saved_regs;
20600 offsets = arm_get_frame_offsets ();
20601 saved_regs = offsets->saved_regs_mask;
20603 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20604 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20605 else
20607 if (frame_pointer_needed)
20608 addr = plus_constant(hard_frame_pointer_rtx, -4);
20609 else
20611 /* LR will be the first saved register. */
20612 delta = offsets->outgoing_args - (offsets->frame + 4);
20615 if (delta >= 4096)
20617 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20618 GEN_INT (delta & ~4095)));
20619 addr = scratch;
20620 delta &= 4095;
20622 else
20623 addr = stack_pointer_rtx;
20625 addr = plus_constant (addr, delta);
20627 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20632 void
20633 thumb_set_return_address (rtx source, rtx scratch)
20635 arm_stack_offsets *offsets;
20636 HOST_WIDE_INT delta;
20637 HOST_WIDE_INT limit;
20638 int reg;
20639 rtx addr;
20640 unsigned long mask;
20642 emit_use (source);
20644 offsets = arm_get_frame_offsets ();
20645 mask = offsets->saved_regs_mask;
20646 if (mask & (1 << LR_REGNUM))
20648 limit = 1024;
20649 /* Find the saved regs. */
20650 if (frame_pointer_needed)
20652 delta = offsets->soft_frame - offsets->saved_args;
20653 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20654 if (TARGET_THUMB1)
20655 limit = 128;
20657 else
20659 delta = offsets->outgoing_args - offsets->saved_args;
20660 reg = SP_REGNUM;
20662 /* Allow for the stack frame. */
20663 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20664 delta -= 16;
20665 /* The link register is always the first saved register. */
20666 delta -= 4;
20668 /* Construct the address. */
20669 addr = gen_rtx_REG (SImode, reg);
20670 if (delta > limit)
20672 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20673 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20674 addr = scratch;
20676 else
20677 addr = plus_constant (addr, delta);
20679 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20681 else
20682 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20685 /* Implements target hook vector_mode_supported_p. */
20686 bool
20687 arm_vector_mode_supported_p (enum machine_mode mode)
20689 /* Neon also supports V2SImode, etc. listed in the clause below. */
20690 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20691 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20692 return true;
20694 if ((TARGET_NEON || TARGET_IWMMXT)
20695 && ((mode == V2SImode)
20696 || (mode == V4HImode)
20697 || (mode == V8QImode)))
20698 return true;
20700 return false;
20703 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20704 ARM insns and therefore guarantee that the shift count is modulo 256.
20705 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20706 guarantee no particular behavior for out-of-range counts. */
20708 static unsigned HOST_WIDE_INT
20709 arm_shift_truncation_mask (enum machine_mode mode)
20711 return mode == SImode ? 255 : 0;
20715 /* Map internal gcc register numbers to DWARF2 register numbers. */
20717 unsigned int
20718 arm_dbx_register_number (unsigned int regno)
20720 if (regno < 16)
20721 return regno;
20723 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20724 compatibility. The EABI defines them as registers 96-103. */
20725 if (IS_FPA_REGNUM (regno))
20726 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20728 if (IS_VFP_REGNUM (regno))
20730 /* See comment in arm_dwarf_register_span. */
20731 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20732 return 64 + regno - FIRST_VFP_REGNUM;
20733 else
20734 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
20737 if (IS_IWMMXT_GR_REGNUM (regno))
20738 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20740 if (IS_IWMMXT_REGNUM (regno))
20741 return 112 + regno - FIRST_IWMMXT_REGNUM;
20743 gcc_unreachable ();
20746 /* Dwarf models VFPv3 registers as 32 64-bit registers.
20747 GCC models tham as 64 32-bit registers, so we need to describe this to
20748 the DWARF generation code. Other registers can use the default. */
20749 static rtx
20750 arm_dwarf_register_span (rtx rtl)
20752 unsigned regno;
20753 int nregs;
20754 int i;
20755 rtx p;
20757 regno = REGNO (rtl);
20758 if (!IS_VFP_REGNUM (regno))
20759 return NULL_RTX;
20761 /* XXX FIXME: The EABI defines two VFP register ranges:
20762 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
20763 256-287: D0-D31
20764 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
20765 corresponding D register. Until GDB supports this, we shall use the
20766 legacy encodings. We also use these encodings for D0-D15 for
20767 compatibility with older debuggers. */
20768 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20769 return NULL_RTX;
20771 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
20772 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
20773 regno = (regno - FIRST_VFP_REGNUM) / 2;
20774 for (i = 0; i < nregs; i++)
20775 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
20777 return p;
20780 #ifdef TARGET_UNWIND_INFO
20781 /* Emit unwind directives for a store-multiple instruction or stack pointer
20782 push during alignment.
20783 These should only ever be generated by the function prologue code, so
20784 expect them to have a particular form. */
20786 static void
20787 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
20789 int i;
20790 HOST_WIDE_INT offset;
20791 HOST_WIDE_INT nregs;
20792 int reg_size;
20793 unsigned reg;
20794 unsigned lastreg;
20795 rtx e;
20797 e = XVECEXP (p, 0, 0);
20798 if (GET_CODE (e) != SET)
20799 abort ();
20801 /* First insn will adjust the stack pointer. */
20802 if (GET_CODE (e) != SET
20803 || GET_CODE (XEXP (e, 0)) != REG
20804 || REGNO (XEXP (e, 0)) != SP_REGNUM
20805 || GET_CODE (XEXP (e, 1)) != PLUS)
20806 abort ();
20808 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
20809 nregs = XVECLEN (p, 0) - 1;
20811 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
20812 if (reg < 16)
20814 /* The function prologue may also push pc, but not annotate it as it is
20815 never restored. We turn this into a stack pointer adjustment. */
20816 if (nregs * 4 == offset - 4)
20818 fprintf (asm_out_file, "\t.pad #4\n");
20819 offset -= 4;
20821 reg_size = 4;
20822 fprintf (asm_out_file, "\t.save {");
20824 else if (IS_VFP_REGNUM (reg))
20826 reg_size = 8;
20827 fprintf (asm_out_file, "\t.vsave {");
20829 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
20831 /* FPA registers are done differently. */
20832 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
20833 return;
20835 else
20836 /* Unknown register type. */
20837 abort ();
20839 /* If the stack increment doesn't match the size of the saved registers,
20840 something has gone horribly wrong. */
20841 if (offset != nregs * reg_size)
20842 abort ();
20844 offset = 0;
20845 lastreg = 0;
20846 /* The remaining insns will describe the stores. */
20847 for (i = 1; i <= nregs; i++)
20849 /* Expect (set (mem <addr>) (reg)).
20850 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
20851 e = XVECEXP (p, 0, i);
20852 if (GET_CODE (e) != SET
20853 || GET_CODE (XEXP (e, 0)) != MEM
20854 || GET_CODE (XEXP (e, 1)) != REG)
20855 abort ();
20857 reg = REGNO (XEXP (e, 1));
20858 if (reg < lastreg)
20859 abort ();
20861 if (i != 1)
20862 fprintf (asm_out_file, ", ");
20863 /* We can't use %r for vfp because we need to use the
20864 double precision register names. */
20865 if (IS_VFP_REGNUM (reg))
20866 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
20867 else
20868 asm_fprintf (asm_out_file, "%r", reg);
20870 #ifdef ENABLE_CHECKING
20871 /* Check that the addresses are consecutive. */
20872 e = XEXP (XEXP (e, 0), 0);
20873 if (GET_CODE (e) == PLUS)
20875 offset += reg_size;
20876 if (GET_CODE (XEXP (e, 0)) != REG
20877 || REGNO (XEXP (e, 0)) != SP_REGNUM
20878 || GET_CODE (XEXP (e, 1)) != CONST_INT
20879 || offset != INTVAL (XEXP (e, 1)))
20880 abort ();
20882 else if (i != 1
20883 || GET_CODE (e) != REG
20884 || REGNO (e) != SP_REGNUM)
20885 abort ();
20886 #endif
20888 fprintf (asm_out_file, "}\n");
20891 /* Emit unwind directives for a SET. */
20893 static void
20894 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
20896 rtx e0;
20897 rtx e1;
20898 unsigned reg;
20900 e0 = XEXP (p, 0);
20901 e1 = XEXP (p, 1);
20902 switch (GET_CODE (e0))
20904 case MEM:
20905 /* Pushing a single register. */
20906 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
20907 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
20908 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
20909 abort ();
20911 asm_fprintf (asm_out_file, "\t.save ");
20912 if (IS_VFP_REGNUM (REGNO (e1)))
20913 asm_fprintf(asm_out_file, "{d%d}\n",
20914 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
20915 else
20916 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
20917 break;
20919 case REG:
20920 if (REGNO (e0) == SP_REGNUM)
20922 /* A stack increment. */
20923 if (GET_CODE (e1) != PLUS
20924 || GET_CODE (XEXP (e1, 0)) != REG
20925 || REGNO (XEXP (e1, 0)) != SP_REGNUM
20926 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20927 abort ();
20929 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
20930 -INTVAL (XEXP (e1, 1)));
20932 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
20934 HOST_WIDE_INT offset;
20936 if (GET_CODE (e1) == PLUS)
20938 if (GET_CODE (XEXP (e1, 0)) != REG
20939 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20940 abort ();
20941 reg = REGNO (XEXP (e1, 0));
20942 offset = INTVAL (XEXP (e1, 1));
20943 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
20944 HARD_FRAME_POINTER_REGNUM, reg,
20945 INTVAL (XEXP (e1, 1)));
20947 else if (GET_CODE (e1) == REG)
20949 reg = REGNO (e1);
20950 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
20951 HARD_FRAME_POINTER_REGNUM, reg);
20953 else
20954 abort ();
20956 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
20958 /* Move from sp to reg. */
20959 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
20961 else if (GET_CODE (e1) == PLUS
20962 && GET_CODE (XEXP (e1, 0)) == REG
20963 && REGNO (XEXP (e1, 0)) == SP_REGNUM
20964 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
20966 /* Set reg to offset from sp. */
20967 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
20968 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
20970 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
20972 /* Stack pointer save before alignment. */
20973 reg = REGNO (e0);
20974 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
20975 reg + 0x90, reg);
20977 else
20978 abort ();
20979 break;
20981 default:
20982 abort ();
20987 /* Emit unwind directives for the given insn. */
20989 static void
20990 arm_unwind_emit (FILE * asm_out_file, rtx insn)
20992 rtx pat;
20994 if (!ARM_EABI_UNWIND_TABLES)
20995 return;
20997 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
20998 && (TREE_NOTHROW (current_function_decl)
20999 || crtl->all_throwers_are_sibcalls))
21000 return;
21002 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21003 return;
21005 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21006 if (pat)
21007 pat = XEXP (pat, 0);
21008 else
21009 pat = PATTERN (insn);
21011 switch (GET_CODE (pat))
21013 case SET:
21014 arm_unwind_emit_set (asm_out_file, pat);
21015 break;
21017 case SEQUENCE:
21018 /* Store multiple. */
21019 arm_unwind_emit_sequence (asm_out_file, pat);
21020 break;
21022 default:
21023 abort();
21028 /* Output a reference from a function exception table to the type_info
21029 object X. The EABI specifies that the symbol should be relocated by
21030 an R_ARM_TARGET2 relocation. */
21032 static bool
21033 arm_output_ttype (rtx x)
21035 fputs ("\t.word\t", asm_out_file);
21036 output_addr_const (asm_out_file, x);
21037 /* Use special relocations for symbol references. */
21038 if (GET_CODE (x) != CONST_INT)
21039 fputs ("(TARGET2)", asm_out_file);
21040 fputc ('\n', asm_out_file);
21042 return TRUE;
21044 #endif /* TARGET_UNWIND_INFO */
21047 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21048 stack alignment. */
21050 static void
21051 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21053 rtx unspec = SET_SRC (pattern);
21054 gcc_assert (GET_CODE (unspec) == UNSPEC);
21056 switch (index)
21058 case UNSPEC_STACK_ALIGN:
21059 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21060 put anything on the stack, so hopefully it won't matter.
21061 CFA = SP will be correct after alignment. */
21062 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21063 SET_DEST (pattern));
21064 break;
21065 default:
21066 gcc_unreachable ();
21071 /* Output unwind directives for the start/end of a function. */
21073 void
21074 arm_output_fn_unwind (FILE * f, bool prologue)
21076 if (!ARM_EABI_UNWIND_TABLES)
21077 return;
21079 if (prologue)
21080 fputs ("\t.fnstart\n", f);
21081 else
21083 /* If this function will never be unwound, then mark it as such.
21084 The came condition is used in arm_unwind_emit to suppress
21085 the frame annotations. */
21086 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21087 && (TREE_NOTHROW (current_function_decl)
21088 || crtl->all_throwers_are_sibcalls))
21089 fputs("\t.cantunwind\n", f);
21091 fputs ("\t.fnend\n", f);
21095 static bool
21096 arm_emit_tls_decoration (FILE *fp, rtx x)
21098 enum tls_reloc reloc;
21099 rtx val;
21101 val = XVECEXP (x, 0, 0);
21102 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21104 output_addr_const (fp, val);
21106 switch (reloc)
21108 case TLS_GD32:
21109 fputs ("(tlsgd)", fp);
21110 break;
21111 case TLS_LDM32:
21112 fputs ("(tlsldm)", fp);
21113 break;
21114 case TLS_LDO32:
21115 fputs ("(tlsldo)", fp);
21116 break;
21117 case TLS_IE32:
21118 fputs ("(gottpoff)", fp);
21119 break;
21120 case TLS_LE32:
21121 fputs ("(tpoff)", fp);
21122 break;
21123 default:
21124 gcc_unreachable ();
21127 switch (reloc)
21129 case TLS_GD32:
21130 case TLS_LDM32:
21131 case TLS_IE32:
21132 fputs (" + (. - ", fp);
21133 output_addr_const (fp, XVECEXP (x, 0, 2));
21134 fputs (" - ", fp);
21135 output_addr_const (fp, XVECEXP (x, 0, 3));
21136 fputc (')', fp);
21137 break;
21138 default:
21139 break;
21142 return TRUE;
21145 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21147 static void
21148 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21150 gcc_assert (size == 4);
21151 fputs ("\t.word\t", file);
21152 output_addr_const (file, x);
21153 fputs ("(tlsldo)", file);
21156 bool
21157 arm_output_addr_const_extra (FILE *fp, rtx x)
21159 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21160 return arm_emit_tls_decoration (fp, x);
21161 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21163 char label[256];
21164 int labelno = INTVAL (XVECEXP (x, 0, 0));
21166 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21167 assemble_name_raw (fp, label);
21169 return TRUE;
21171 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21173 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21174 if (GOT_PCREL)
21175 fputs ("+.", fp);
21176 fputs ("-(", fp);
21177 output_addr_const (fp, XVECEXP (x, 0, 0));
21178 fputc (')', fp);
21179 return TRUE;
21181 else if (GET_CODE (x) == CONST_VECTOR)
21182 return arm_emit_vector_const (fp, x);
21184 return FALSE;
21187 /* Output assembly for a shift instruction.
21188 SET_FLAGS determines how the instruction modifies the condition codes.
21189 0 - Do not set condition codes.
21190 1 - Set condition codes.
21191 2 - Use smallest instruction. */
21192 const char *
21193 arm_output_shift(rtx * operands, int set_flags)
21195 char pattern[100];
21196 static const char flag_chars[3] = {'?', '.', '!'};
21197 const char *shift;
21198 HOST_WIDE_INT val;
21199 char c;
21201 c = flag_chars[set_flags];
21202 if (TARGET_UNIFIED_ASM)
21204 shift = shift_op(operands[3], &val);
21205 if (shift)
21207 if (val != -1)
21208 operands[2] = GEN_INT(val);
21209 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21211 else
21212 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21214 else
21215 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21216 output_asm_insn (pattern, operands);
21217 return "";
21220 /* Output a Thumb-1 casesi dispatch sequence. */
21221 const char *
21222 thumb1_output_casesi (rtx *operands)
21224 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21225 addr_diff_vec_flags flags;
21227 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21229 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
21231 switch (GET_MODE(diff_vec))
21233 case QImode:
21234 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21235 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21236 case HImode:
21237 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21238 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21239 case SImode:
21240 return "bl\t%___gnu_thumb1_case_si";
21241 default:
21242 gcc_unreachable ();
21246 /* Output a Thumb-2 casesi instruction. */
21247 const char *
21248 thumb2_output_casesi (rtx *operands)
21250 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21252 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21254 output_asm_insn ("cmp\t%0, %1", operands);
21255 output_asm_insn ("bhi\t%l3", operands);
21256 switch (GET_MODE(diff_vec))
21258 case QImode:
21259 return "tbb\t[%|pc, %0]";
21260 case HImode:
21261 return "tbh\t[%|pc, %0, lsl #1]";
21262 case SImode:
21263 if (flag_pic)
21265 output_asm_insn ("adr\t%4, %l2", operands);
21266 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21267 output_asm_insn ("add\t%4, %4, %5", operands);
21268 return "bx\t%4";
21270 else
21272 output_asm_insn ("adr\t%4, %l2", operands);
21273 return "ldr\t%|pc, [%4, %0, lsl #2]";
21275 default:
21276 gcc_unreachable ();
21280 /* Most ARM cores are single issue, but some newer ones can dual issue.
21281 The scheduler descriptions rely on this being correct. */
21282 static int
21283 arm_issue_rate (void)
21285 switch (arm_tune)
21287 case cortexr4:
21288 case cortexr4f:
21289 case cortexa8:
21290 case cortexa9:
21291 return 2;
21293 default:
21294 return 1;
21298 /* A table and a function to perform ARM-specific name mangling for
21299 NEON vector types in order to conform to the AAPCS (see "Procedure
21300 Call Standard for the ARM Architecture", Appendix A). To qualify
21301 for emission with the mangled names defined in that document, a
21302 vector type must not only be of the correct mode but also be
21303 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21304 typedef struct
21306 enum machine_mode mode;
21307 const char *element_type_name;
21308 const char *aapcs_name;
21309 } arm_mangle_map_entry;
21311 static arm_mangle_map_entry arm_mangle_map[] = {
21312 /* 64-bit containerized types. */
21313 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21314 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21315 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21316 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21317 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21318 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21319 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21320 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21321 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21322 /* 128-bit containerized types. */
21323 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21324 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21325 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21326 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21327 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21328 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21329 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21330 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21331 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21332 { VOIDmode, NULL, NULL }
21335 const char *
21336 arm_mangle_type (const_tree type)
21338 arm_mangle_map_entry *pos = arm_mangle_map;
21340 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21341 has to be managled as if it is in the "std" namespace. */
21342 if (TARGET_AAPCS_BASED
21343 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21345 static bool warned;
21346 if (!warned && warn_psabi && !in_system_header)
21348 warned = true;
21349 inform (input_location,
21350 "the mangling of %<va_list%> has changed in GCC 4.4");
21352 return "St9__va_list";
21355 /* Half-precision float. */
21356 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21357 return "Dh";
21359 if (TREE_CODE (type) != VECTOR_TYPE)
21360 return NULL;
21362 /* Check the mode of the vector type, and the name of the vector
21363 element type, against the table. */
21364 while (pos->mode != VOIDmode)
21366 tree elt_type = TREE_TYPE (type);
21368 if (pos->mode == TYPE_MODE (type)
21369 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21370 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21371 pos->element_type_name))
21372 return pos->aapcs_name;
21374 pos++;
21377 /* Use the default mangling for unrecognized (possibly user-defined)
21378 vector types. */
21379 return NULL;
21382 /* Order of allocation of core registers for Thumb: this allocation is
21383 written over the corresponding initial entries of the array
21384 initialized with REG_ALLOC_ORDER. We allocate all low registers
21385 first. Saving and restoring a low register is usually cheaper than
21386 using a call-clobbered high register. */
21388 static const int thumb_core_reg_alloc_order[] =
21390 3, 2, 1, 0, 4, 5, 6, 7,
21391 14, 12, 8, 9, 10, 11, 13, 15
21394 /* Adjust register allocation order when compiling for Thumb. */
21396 void
21397 arm_order_regs_for_local_alloc (void)
21399 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21400 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21401 if (TARGET_THUMB)
21402 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21403 sizeof (thumb_core_reg_alloc_order));
21406 /* Set default optimization options. */
21407 void
21408 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21410 /* Enable section anchors by default at -O1 or higher.
21411 Use 2 to distinguish from an explicit -fsection-anchors
21412 given on the command line. */
21413 if (level > 0)
21414 flag_section_anchors = 2;
21417 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21419 bool
21420 arm_frame_pointer_required (void)
21422 return (cfun->has_nonlocal_label
21423 || SUBTARGET_FRAME_POINTER_REQUIRED
21424 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21427 /* Only thumb1 can't support conditional execution, so return true if
21428 the target is not thumb1. */
21429 static bool
21430 arm_have_conditional_execution (void)
21432 return !TARGET_THUMB1;
21435 #include "gt-arm.h"