gcc/ChangeLog:
[official-gcc.git] / gcc / config / arm / arm.c
blob31c13543e908727ced60f3425899c4e4d014ed64
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
59 /* Forward definitions of types. */
60 typedef struct minipool_node Mnode;
61 typedef struct minipool_fixup Mfix;
63 void (*arm_lang_output_object_attributes_hook)(void);
65 /* Forward function declarations. */
66 static int arm_compute_static_chain_stack_bytes (void);
67 static arm_stack_offsets *arm_get_frame_offsets (void);
68 static void arm_add_gc_roots (void);
69 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
70 HOST_WIDE_INT, rtx, rtx, int, int);
71 static unsigned bit_count (unsigned long);
72 static int arm_address_register_rtx_p (rtx, int);
73 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
74 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
75 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
76 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
77 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
78 inline static int thumb1_index_register_rtx_p (rtx, int);
79 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
80 static int thumb_far_jump_used_p (void);
81 static bool thumb_force_lr_save (void);
82 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
83 static rtx emit_sfm (int, int);
84 static unsigned arm_size_return_regs (void);
85 static bool arm_assemble_integer (rtx, unsigned int, int);
86 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
87 static arm_cc get_arm_condition_code (rtx);
88 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
89 static rtx is_jump_table (rtx);
90 static const char *output_multi_immediate (rtx *, const char *, const char *,
91 int, HOST_WIDE_INT);
92 static const char *shift_op (rtx, HOST_WIDE_INT *);
93 static struct machine_function *arm_init_machine_status (void);
94 static void thumb_exit (FILE *, int);
95 static rtx is_jump_table (rtx);
96 static HOST_WIDE_INT get_jump_table_size (rtx);
97 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
98 static Mnode *add_minipool_forward_ref (Mfix *);
99 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
100 static Mnode *add_minipool_backward_ref (Mfix *);
101 static void assign_minipool_offsets (Mfix *);
102 static void arm_print_value (FILE *, rtx);
103 static void dump_minipool (rtx);
104 static int arm_barrier_cost (rtx);
105 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
106 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
107 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
108 rtx);
109 static void arm_reorg (void);
110 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
111 static unsigned long arm_compute_save_reg0_reg12_mask (void);
112 static unsigned long arm_compute_save_reg_mask (void);
113 static unsigned long arm_isr_value (tree);
114 static unsigned long arm_compute_func_type (void);
115 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
116 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
118 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
119 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
120 #endif
121 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
122 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
123 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static int arm_comp_type_attributes (const_tree, const_tree);
125 static void arm_set_default_type_attributes (tree);
126 static int arm_adjust_cost (rtx, rtx, rtx, int);
127 static int count_insns_for_constant (HOST_WIDE_INT, int);
128 static int arm_get_strip_length (int);
129 static bool arm_function_ok_for_sibcall (tree, tree);
130 static enum machine_mode arm_promote_function_mode (const_tree,
131 enum machine_mode, int *,
132 const_tree, int);
133 static bool arm_return_in_memory (const_tree, const_tree);
134 static rtx arm_function_value (const_tree, const_tree, bool);
135 static rtx arm_libcall_value (enum machine_mode, const_rtx);
137 static void arm_internal_label (FILE *, const char *, unsigned long);
138 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
139 tree);
140 static bool arm_have_conditional_execution (void);
141 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
142 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
143 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
144 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_rtx_costs (rtx, int, int, int *, bool);
148 static int arm_address_cost (rtx, bool);
149 static bool arm_memory_load_p (rtx);
150 static bool arm_cirrus_insn_p (rtx);
151 static void cirrus_reorg (rtx);
152 static void arm_init_builtins (void);
153 static void arm_init_iwmmxt_builtins (void);
154 static rtx safe_vector_operand (rtx, enum machine_mode);
155 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
156 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
157 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
158 static void emit_constant_insn (rtx cond, rtx pattern);
159 static rtx emit_set_insn (rtx, rtx);
160 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
161 tree, bool);
162 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
163 const_tree);
164 static int aapcs_select_return_coproc (const_tree, const_tree);
166 #ifdef OBJECT_FORMAT_ELF
167 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
168 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
169 #endif
170 #ifndef ARM_PE
171 static void arm_encode_section_info (tree, rtx, int);
172 #endif
174 static void arm_file_end (void);
175 static void arm_file_start (void);
177 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
178 tree, int *, int);
179 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
180 enum machine_mode, const_tree, bool);
181 static bool arm_promote_prototypes (const_tree);
182 static bool arm_default_short_enums (void);
183 static bool arm_align_anon_bitfield (void);
184 static bool arm_return_in_msb (const_tree);
185 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
186 static bool arm_return_in_memory (const_tree, const_tree);
187 #ifdef TARGET_UNWIND_INFO
188 static void arm_unwind_emit (FILE *, rtx);
189 static bool arm_output_ttype (rtx);
190 #endif
191 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
192 static rtx arm_dwarf_register_span (rtx);
194 static tree arm_cxx_guard_type (void);
195 static bool arm_cxx_guard_mask_bit (void);
196 static tree arm_get_cookie_size (tree);
197 static bool arm_cookie_has_size (void);
198 static bool arm_cxx_cdtor_returns_this (void);
199 static bool arm_cxx_key_method_may_be_inline (void);
200 static void arm_cxx_determine_class_data_visibility (tree);
201 static bool arm_cxx_class_data_always_comdat (void);
202 static bool arm_cxx_use_aeabi_atexit (void);
203 static void arm_init_libfuncs (void);
204 static tree arm_build_builtin_va_list (void);
205 static void arm_expand_builtin_va_start (tree, rtx);
206 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
207 static bool arm_handle_option (size_t, const char *, int);
208 static void arm_target_help (void);
209 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
210 static bool arm_cannot_copy_insn_p (rtx);
211 static bool arm_tls_symbol_p (rtx x);
212 static int arm_issue_rate (void);
213 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
214 static bool arm_allocate_stack_slots_for_args (void);
215 static const char *arm_invalid_parameter_type (const_tree t);
216 static const char *arm_invalid_return_type (const_tree t);
217 static tree arm_promoted_type (const_tree t);
218 static tree arm_convert_to_type (tree type, tree expr);
219 static bool arm_scalar_mode_supported_p (enum machine_mode);
220 static bool arm_frame_pointer_required (void);
221 static bool arm_can_eliminate (const int, const int);
222 static void arm_asm_trampoline_template (FILE *);
223 static void arm_trampoline_init (rtx, tree, rtx);
224 static rtx arm_trampoline_adjust_address (rtx);
225 static rtx arm_pic_static_addr (rtx orig, rtx reg);
228 /* Table of machine attributes. */
229 static const struct attribute_spec arm_attribute_table[] =
231 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
232 /* Function calls made to this symbol must be done indirectly, because
233 it may lie outside of the 26 bit addressing range of a normal function
234 call. */
235 { "long_call", 0, 0, false, true, true, NULL },
236 /* Whereas these functions are always known to reside within the 26 bit
237 addressing range. */
238 { "short_call", 0, 0, false, true, true, NULL },
239 /* Specify the procedure call conventions for a function. */
240 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
241 /* Interrupt Service Routines have special prologue and epilogue requirements. */
242 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
243 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
244 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
245 #ifdef ARM_PE
246 /* ARM/PE has three new attributes:
247 interfacearm - ?
248 dllexport - for exporting a function/variable that will live in a dll
249 dllimport - for importing a function/variable from a dll
251 Microsoft allows multiple declspecs in one __declspec, separating
252 them with spaces. We do NOT support this. Instead, use __declspec
253 multiple times.
255 { "dllimport", 0, 0, true, false, false, NULL },
256 { "dllexport", 0, 0, true, false, false, NULL },
257 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
258 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
259 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
260 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
261 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
262 #endif
263 { NULL, 0, 0, false, false, false, NULL }
266 /* Initialize the GCC target structure. */
267 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
268 #undef TARGET_MERGE_DECL_ATTRIBUTES
269 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
270 #endif
272 #undef TARGET_LEGITIMIZE_ADDRESS
273 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
275 #undef TARGET_ATTRIBUTE_TABLE
276 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
278 #undef TARGET_ASM_FILE_START
279 #define TARGET_ASM_FILE_START arm_file_start
280 #undef TARGET_ASM_FILE_END
281 #define TARGET_ASM_FILE_END arm_file_end
283 #undef TARGET_ASM_ALIGNED_SI_OP
284 #define TARGET_ASM_ALIGNED_SI_OP NULL
285 #undef TARGET_ASM_INTEGER
286 #define TARGET_ASM_INTEGER arm_assemble_integer
288 #undef TARGET_ASM_FUNCTION_PROLOGUE
289 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
294 #undef TARGET_DEFAULT_TARGET_FLAGS
295 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
296 #undef TARGET_HANDLE_OPTION
297 #define TARGET_HANDLE_OPTION arm_handle_option
298 #undef TARGET_HELP
299 #define TARGET_HELP arm_target_help
301 #undef TARGET_COMP_TYPE_ATTRIBUTES
302 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
304 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
305 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
307 #undef TARGET_SCHED_ADJUST_COST
308 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
310 #undef TARGET_ENCODE_SECTION_INFO
311 #ifdef ARM_PE
312 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
313 #else
314 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
315 #endif
317 #undef TARGET_STRIP_NAME_ENCODING
318 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
320 #undef TARGET_ASM_INTERNAL_LABEL
321 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
324 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
326 #undef TARGET_FUNCTION_VALUE
327 #define TARGET_FUNCTION_VALUE arm_function_value
329 #undef TARGET_LIBCALL_VALUE
330 #define TARGET_LIBCALL_VALUE arm_libcall_value
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
334 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
335 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
337 #undef TARGET_RTX_COSTS
338 #define TARGET_RTX_COSTS arm_rtx_costs
339 #undef TARGET_ADDRESS_COST
340 #define TARGET_ADDRESS_COST arm_address_cost
342 #undef TARGET_SHIFT_TRUNCATION_MASK
343 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
344 #undef TARGET_VECTOR_MODE_SUPPORTED_P
345 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
347 #undef TARGET_MACHINE_DEPENDENT_REORG
348 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
350 #undef TARGET_INIT_BUILTINS
351 #define TARGET_INIT_BUILTINS arm_init_builtins
352 #undef TARGET_EXPAND_BUILTIN
353 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
355 #undef TARGET_INIT_LIBFUNCS
356 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
358 #undef TARGET_PROMOTE_FUNCTION_MODE
359 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
360 #undef TARGET_PROMOTE_PROTOTYPES
361 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
364 #undef TARGET_ARG_PARTIAL_BYTES
365 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
367 #undef TARGET_SETUP_INCOMING_VARARGS
368 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
370 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
371 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
373 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
374 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
375 #undef TARGET_TRAMPOLINE_INIT
376 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
377 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
378 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
380 #undef TARGET_DEFAULT_SHORT_ENUMS
381 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
383 #undef TARGET_ALIGN_ANON_BITFIELD
384 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
386 #undef TARGET_NARROW_VOLATILE_BITFIELD
387 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
389 #undef TARGET_CXX_GUARD_TYPE
390 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
392 #undef TARGET_CXX_GUARD_MASK_BIT
393 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
395 #undef TARGET_CXX_GET_COOKIE_SIZE
396 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
398 #undef TARGET_CXX_COOKIE_HAS_SIZE
399 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
401 #undef TARGET_CXX_CDTOR_RETURNS_THIS
402 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
404 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
405 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
407 #undef TARGET_CXX_USE_AEABI_ATEXIT
408 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
410 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
411 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
412 arm_cxx_determine_class_data_visibility
414 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
415 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
417 #undef TARGET_RETURN_IN_MSB
418 #define TARGET_RETURN_IN_MSB arm_return_in_msb
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
423 #undef TARGET_MUST_PASS_IN_STACK
424 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
426 #ifdef TARGET_UNWIND_INFO
427 #undef TARGET_UNWIND_EMIT
428 #define TARGET_UNWIND_EMIT arm_unwind_emit
430 /* EABI unwinding tables use a different format for the typeinfo tables. */
431 #undef TARGET_ASM_TTYPE
432 #define TARGET_ASM_TTYPE arm_output_ttype
434 #undef TARGET_ARM_EABI_UNWINDER
435 #define TARGET_ARM_EABI_UNWINDER true
436 #endif /* TARGET_UNWIND_INFO */
438 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
439 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
441 #undef TARGET_DWARF_REGISTER_SPAN
442 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
444 #undef TARGET_CANNOT_COPY_INSN_P
445 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
447 #ifdef HAVE_AS_TLS
448 #undef TARGET_HAVE_TLS
449 #define TARGET_HAVE_TLS true
450 #endif
452 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
453 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
455 #undef TARGET_CANNOT_FORCE_CONST_MEM
456 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
458 #undef TARGET_MAX_ANCHOR_OFFSET
459 #define TARGET_MAX_ANCHOR_OFFSET 4095
461 /* The minimum is set such that the total size of the block
462 for a particular anchor is -4088 + 1 + 4095 bytes, which is
463 divisible by eight, ensuring natural spacing of anchors. */
464 #undef TARGET_MIN_ANCHOR_OFFSET
465 #define TARGET_MIN_ANCHOR_OFFSET -4088
467 #undef TARGET_SCHED_ISSUE_RATE
468 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
470 #undef TARGET_MANGLE_TYPE
471 #define TARGET_MANGLE_TYPE arm_mangle_type
473 #undef TARGET_BUILD_BUILTIN_VA_LIST
474 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
475 #undef TARGET_EXPAND_BUILTIN_VA_START
476 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
477 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
478 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
480 #ifdef HAVE_AS_TLS
481 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
482 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
483 #endif
485 #undef TARGET_LEGITIMATE_ADDRESS_P
486 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
488 #undef TARGET_INVALID_PARAMETER_TYPE
489 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
491 #undef TARGET_INVALID_RETURN_TYPE
492 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
494 #undef TARGET_PROMOTED_TYPE
495 #define TARGET_PROMOTED_TYPE arm_promoted_type
497 #undef TARGET_CONVERT_TO_TYPE
498 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
500 #undef TARGET_SCALAR_MODE_SUPPORTED_P
501 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
503 #undef TARGET_FRAME_POINTER_REQUIRED
504 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
506 #undef TARGET_CAN_ELIMINATE
507 #define TARGET_CAN_ELIMINATE arm_can_eliminate
509 struct gcc_target targetm = TARGET_INITIALIZER;
511 /* Obstack for minipool constant handling. */
512 static struct obstack minipool_obstack;
513 static char * minipool_startobj;
515 /* The maximum number of insns skipped which
516 will be conditionalised if possible. */
517 static int max_insns_skipped = 5;
519 extern FILE * asm_out_file;
521 /* True if we are currently building a constant table. */
522 int making_const_table;
524 /* The processor for which instructions should be scheduled. */
525 enum processor_type arm_tune = arm_none;
527 /* The current tuning set. */
528 const struct tune_params *current_tune;
530 /* Which floating point hardware to schedule for. */
531 int arm_fpu_attr;
533 /* Which floating popint hardware to use. */
534 const struct arm_fpu_desc *arm_fpu_desc;
536 /* Whether to use floating point hardware. */
537 enum float_abi_type arm_float_abi;
539 /* Which __fp16 format to use. */
540 enum arm_fp16_format_type arm_fp16_format;
542 /* Which ABI to use. */
543 enum arm_abi_type arm_abi;
545 /* Which thread pointer model to use. */
546 enum arm_tp_type target_thread_pointer = TP_AUTO;
548 /* Used to parse -mstructure_size_boundary command line option. */
549 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
551 /* Used for Thumb call_via trampolines. */
552 rtx thumb_call_via_label[14];
553 static int thumb_call_reg_needed;
555 /* Bit values used to identify processor capabilities. */
556 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
557 #define FL_ARCH3M (1 << 1) /* Extended multiply */
558 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
559 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
560 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
561 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
562 #define FL_THUMB (1 << 6) /* Thumb aware */
563 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
564 #define FL_STRONG (1 << 8) /* StrongARM */
565 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
566 #define FL_XSCALE (1 << 10) /* XScale */
567 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
568 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
569 media instructions. */
570 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
571 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
572 Note: ARM6 & 7 derivatives only. */
573 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
574 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
575 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
576 profile. */
577 #define FL_DIV (1 << 18) /* Hardware divide. */
578 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
579 #define FL_NEON (1 << 20) /* Neon instructions. */
580 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
581 architecture. */
583 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
585 /* Flags that only effect tuning, not available instructions. */
586 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
587 | FL_CO_PROC)
589 #define FL_FOR_ARCH2 FL_NOTM
590 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
591 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
592 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
593 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
594 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
595 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
596 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
597 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
598 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
599 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
600 #define FL_FOR_ARCH6J FL_FOR_ARCH6
601 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
602 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
603 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
604 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
605 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
606 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
607 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
608 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
609 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
610 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
612 /* The bits in this mask specify which
613 instructions we are allowed to generate. */
614 static unsigned long insn_flags = 0;
616 /* The bits in this mask specify which instruction scheduling options should
617 be used. */
618 static unsigned long tune_flags = 0;
620 /* The following are used in the arm.md file as equivalents to bits
621 in the above two flag variables. */
623 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
624 int arm_arch3m = 0;
626 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
627 int arm_arch4 = 0;
629 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
630 int arm_arch4t = 0;
632 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
633 int arm_arch5 = 0;
635 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
636 int arm_arch5e = 0;
638 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
639 int arm_arch6 = 0;
641 /* Nonzero if this chip supports the ARM 6K extensions. */
642 int arm_arch6k = 0;
644 /* Nonzero if instructions not present in the 'M' profile can be used. */
645 int arm_arch_notm = 0;
647 /* Nonzero if instructions present in ARMv7E-M can be used. */
648 int arm_arch7em = 0;
650 /* Nonzero if this chip can benefit from load scheduling. */
651 int arm_ld_sched = 0;
653 /* Nonzero if this chip is a StrongARM. */
654 int arm_tune_strongarm = 0;
656 /* Nonzero if this chip is a Cirrus variant. */
657 int arm_arch_cirrus = 0;
659 /* Nonzero if this chip supports Intel Wireless MMX technology. */
660 int arm_arch_iwmmxt = 0;
662 /* Nonzero if this chip is an XScale. */
663 int arm_arch_xscale = 0;
665 /* Nonzero if tuning for XScale */
666 int arm_tune_xscale = 0;
668 /* Nonzero if we want to tune for stores that access the write-buffer.
669 This typically means an ARM6 or ARM7 with MMU or MPU. */
670 int arm_tune_wbuf = 0;
672 /* Nonzero if tuning for Cortex-A9. */
673 int arm_tune_cortex_a9 = 0;
675 /* Nonzero if generating Thumb instructions. */
676 int thumb_code = 0;
678 /* Nonzero if we should define __THUMB_INTERWORK__ in the
679 preprocessor.
680 XXX This is a bit of a hack, it's intended to help work around
681 problems in GLD which doesn't understand that armv5t code is
682 interworking clean. */
683 int arm_cpp_interwork = 0;
685 /* Nonzero if chip supports Thumb 2. */
686 int arm_arch_thumb2;
688 /* Nonzero if chip supports integer division instruction. */
689 int arm_arch_hwdiv;
691 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
692 must report the mode of the memory reference from PRINT_OPERAND to
693 PRINT_OPERAND_ADDRESS. */
694 enum machine_mode output_memory_reference_mode;
696 /* The register number to be used for the PIC offset register. */
697 unsigned arm_pic_register = INVALID_REGNUM;
699 /* Set to 1 after arm_reorg has started. Reset to start at the start of
700 the next function. */
701 static int after_arm_reorg = 0;
703 static enum arm_pcs arm_pcs_default;
705 /* For an explanation of these variables, see final_prescan_insn below. */
706 int arm_ccfsm_state;
707 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
708 enum arm_cond_code arm_current_cc;
709 rtx arm_target_insn;
710 int arm_target_label;
711 /* The number of conditionally executed insns, including the current insn. */
712 int arm_condexec_count = 0;
713 /* A bitmask specifying the patterns for the IT block.
714 Zero means do not output an IT block before this insn. */
715 int arm_condexec_mask = 0;
716 /* The number of bits used in arm_condexec_mask. */
717 int arm_condexec_masklen = 0;
719 /* The condition codes of the ARM, and the inverse function. */
720 static const char * const arm_condition_codes[] =
722 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
723 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
726 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
727 #define streq(string1, string2) (strcmp (string1, string2) == 0)
729 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
730 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
731 | (1 << PIC_OFFSET_TABLE_REGNUM)))
733 /* Initialization code. */
735 struct processors
737 const char *const name;
738 enum processor_type core;
739 const char *arch;
740 const unsigned long flags;
741 const struct tune_params *const tune;
744 const struct tune_params arm_slowmul_tune =
746 arm_slowmul_rtx_costs,
750 const struct tune_params arm_fastmul_tune =
752 arm_fastmul_rtx_costs,
756 const struct tune_params arm_xscale_tune =
758 arm_xscale_rtx_costs,
762 const struct tune_params arm_9e_tune =
764 arm_9e_rtx_costs,
768 /* Not all of these give usefully different compilation alternatives,
769 but there is no simple way of generalizing them. */
770 static const struct processors all_cores[] =
772 /* ARM Cores */
773 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
774 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
775 #include "arm-cores.def"
776 #undef ARM_CORE
777 {NULL, arm_none, NULL, 0, NULL}
780 static const struct processors all_architectures[] =
782 /* ARM Architectures */
783 /* We don't specify tuning costs here as it will be figured out
784 from the core. */
786 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
787 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
788 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
789 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
790 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
791 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
792 implementations that support it, so we will leave it out for now. */
793 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
794 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
795 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
796 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
797 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
798 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
799 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
800 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
801 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
802 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
803 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
804 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
805 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
806 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
807 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
808 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
809 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
810 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
811 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
812 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
813 {NULL, arm_none, NULL, 0 , NULL}
817 /* These are populated as commandline arguments are processed, or NULL
818 if not specified. */
819 static const struct processors *arm_selected_arch;
820 static const struct processors *arm_selected_cpu;
821 static const struct processors *arm_selected_tune;
823 /* The name of the preprocessor macro to define for this architecture. */
825 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
827 /* Available values for -mfpu=. */
829 static const struct arm_fpu_desc all_fpus[] =
831 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
832 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
833 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
834 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
835 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
836 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
837 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
838 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
839 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
840 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
841 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
842 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
843 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
844 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
845 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
846 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
847 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
848 /* Compatibility aliases. */
849 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
853 struct float_abi
855 const char * name;
856 enum float_abi_type abi_type;
860 /* Available values for -mfloat-abi=. */
862 static const struct float_abi all_float_abis[] =
864 {"soft", ARM_FLOAT_ABI_SOFT},
865 {"softfp", ARM_FLOAT_ABI_SOFTFP},
866 {"hard", ARM_FLOAT_ABI_HARD}
870 struct fp16_format
872 const char *name;
873 enum arm_fp16_format_type fp16_format_type;
877 /* Available values for -mfp16-format=. */
879 static const struct fp16_format all_fp16_formats[] =
881 {"none", ARM_FP16_FORMAT_NONE},
882 {"ieee", ARM_FP16_FORMAT_IEEE},
883 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
887 struct abi_name
889 const char *name;
890 enum arm_abi_type abi_type;
894 /* Available values for -mabi=. */
896 static const struct abi_name arm_all_abis[] =
898 {"apcs-gnu", ARM_ABI_APCS},
899 {"atpcs", ARM_ABI_ATPCS},
900 {"aapcs", ARM_ABI_AAPCS},
901 {"iwmmxt", ARM_ABI_IWMMXT},
902 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
905 /* Supported TLS relocations. */
907 enum tls_reloc {
908 TLS_GD32,
909 TLS_LDM32,
910 TLS_LDO32,
911 TLS_IE32,
912 TLS_LE32
915 /* The maximum number of insns to be used when loading a constant. */
916 inline static int
917 arm_constant_limit (bool size_p)
919 return size_p ? 1 : current_tune->constant_limit;
922 /* Emit an insn that's a simple single-set. Both the operands must be known
923 to be valid. */
924 inline static rtx
925 emit_set_insn (rtx x, rtx y)
927 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
930 /* Return the number of bits set in VALUE. */
931 static unsigned
932 bit_count (unsigned long value)
934 unsigned long count = 0;
936 while (value)
938 count++;
939 value &= value - 1; /* Clear the least-significant set bit. */
942 return count;
945 /* Set up library functions unique to ARM. */
947 static void
948 arm_init_libfuncs (void)
950 /* There are no special library functions unless we are using the
951 ARM BPABI. */
952 if (!TARGET_BPABI)
953 return;
955 /* The functions below are described in Section 4 of the "Run-Time
956 ABI for the ARM architecture", Version 1.0. */
958 /* Double-precision floating-point arithmetic. Table 2. */
959 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
960 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
961 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
962 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
963 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
965 /* Double-precision comparisons. Table 3. */
966 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
967 set_optab_libfunc (ne_optab, DFmode, NULL);
968 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
969 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
970 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
971 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
972 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
974 /* Single-precision floating-point arithmetic. Table 4. */
975 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
976 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
977 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
978 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
979 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
981 /* Single-precision comparisons. Table 5. */
982 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
983 set_optab_libfunc (ne_optab, SFmode, NULL);
984 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
985 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
986 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
987 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
988 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
990 /* Floating-point to integer conversions. Table 6. */
991 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
992 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
993 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
994 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
995 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
996 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
997 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
998 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1000 /* Conversions between floating types. Table 7. */
1001 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1002 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1004 /* Integer to floating-point conversions. Table 8. */
1005 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1006 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1007 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1008 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1009 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1010 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1011 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1012 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1014 /* Long long. Table 9. */
1015 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1016 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1017 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1018 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1019 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1020 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1021 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1022 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1024 /* Integer (32/32->32) division. \S 4.3.1. */
1025 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1026 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1028 /* The divmod functions are designed so that they can be used for
1029 plain division, even though they return both the quotient and the
1030 remainder. The quotient is returned in the usual location (i.e.,
1031 r0 for SImode, {r0, r1} for DImode), just as would be expected
1032 for an ordinary division routine. Because the AAPCS calling
1033 conventions specify that all of { r0, r1, r2, r3 } are
1034 callee-saved registers, there is no need to tell the compiler
1035 explicitly that those registers are clobbered by these
1036 routines. */
1037 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1038 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1040 /* For SImode division the ABI provides div-without-mod routines,
1041 which are faster. */
1042 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1043 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1045 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1046 divmod libcalls instead. */
1047 set_optab_libfunc (smod_optab, DImode, NULL);
1048 set_optab_libfunc (umod_optab, DImode, NULL);
1049 set_optab_libfunc (smod_optab, SImode, NULL);
1050 set_optab_libfunc (umod_optab, SImode, NULL);
1052 /* Half-precision float operations. The compiler handles all operations
1053 with NULL libfuncs by converting the SFmode. */
1054 switch (arm_fp16_format)
1056 case ARM_FP16_FORMAT_IEEE:
1057 case ARM_FP16_FORMAT_ALTERNATIVE:
1059 /* Conversions. */
1060 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1061 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1062 ? "__gnu_f2h_ieee"
1063 : "__gnu_f2h_alternative"));
1064 set_conv_libfunc (sext_optab, SFmode, HFmode,
1065 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1066 ? "__gnu_h2f_ieee"
1067 : "__gnu_h2f_alternative"));
1069 /* Arithmetic. */
1070 set_optab_libfunc (add_optab, HFmode, NULL);
1071 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1072 set_optab_libfunc (smul_optab, HFmode, NULL);
1073 set_optab_libfunc (neg_optab, HFmode, NULL);
1074 set_optab_libfunc (sub_optab, HFmode, NULL);
1076 /* Comparisons. */
1077 set_optab_libfunc (eq_optab, HFmode, NULL);
1078 set_optab_libfunc (ne_optab, HFmode, NULL);
1079 set_optab_libfunc (lt_optab, HFmode, NULL);
1080 set_optab_libfunc (le_optab, HFmode, NULL);
1081 set_optab_libfunc (ge_optab, HFmode, NULL);
1082 set_optab_libfunc (gt_optab, HFmode, NULL);
1083 set_optab_libfunc (unord_optab, HFmode, NULL);
1084 break;
1086 default:
1087 break;
1090 if (TARGET_AAPCS_BASED)
1091 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1094 /* On AAPCS systems, this is the "struct __va_list". */
1095 static GTY(()) tree va_list_type;
1097 /* Return the type to use as __builtin_va_list. */
1098 static tree
1099 arm_build_builtin_va_list (void)
1101 tree va_list_name;
1102 tree ap_field;
1104 if (!TARGET_AAPCS_BASED)
1105 return std_build_builtin_va_list ();
1107 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1108 defined as:
1110 struct __va_list
1112 void *__ap;
1115 The C Library ABI further reinforces this definition in \S
1116 4.1.
1118 We must follow this definition exactly. The structure tag
1119 name is visible in C++ mangled names, and thus forms a part
1120 of the ABI. The field name may be used by people who
1121 #include <stdarg.h>. */
1122 /* Create the type. */
1123 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1124 /* Give it the required name. */
1125 va_list_name = build_decl (BUILTINS_LOCATION,
1126 TYPE_DECL,
1127 get_identifier ("__va_list"),
1128 va_list_type);
1129 DECL_ARTIFICIAL (va_list_name) = 1;
1130 TYPE_NAME (va_list_type) = va_list_name;
1131 /* Create the __ap field. */
1132 ap_field = build_decl (BUILTINS_LOCATION,
1133 FIELD_DECL,
1134 get_identifier ("__ap"),
1135 ptr_type_node);
1136 DECL_ARTIFICIAL (ap_field) = 1;
1137 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1138 TYPE_FIELDS (va_list_type) = ap_field;
1139 /* Compute its layout. */
1140 layout_type (va_list_type);
1142 return va_list_type;
1145 /* Return an expression of type "void *" pointing to the next
1146 available argument in a variable-argument list. VALIST is the
1147 user-level va_list object, of type __builtin_va_list. */
1148 static tree
1149 arm_extract_valist_ptr (tree valist)
1151 if (TREE_TYPE (valist) == error_mark_node)
1152 return error_mark_node;
1154 /* On an AAPCS target, the pointer is stored within "struct
1155 va_list". */
1156 if (TARGET_AAPCS_BASED)
1158 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1159 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1160 valist, ap_field, NULL_TREE);
1163 return valist;
1166 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1167 static void
1168 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1170 valist = arm_extract_valist_ptr (valist);
1171 std_expand_builtin_va_start (valist, nextarg);
1174 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1175 static tree
1176 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1177 gimple_seq *post_p)
1179 valist = arm_extract_valist_ptr (valist);
1180 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1183 /* Lookup NAME in SEL. */
1185 static const struct processors *
1186 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1188 if (!(name && *name))
1189 return NULL;
1191 for (; sel->name != NULL; sel++)
1193 if (streq (name, sel->name))
1194 return sel;
1197 error ("bad value (%s) for %s switch", name, desc);
1198 return NULL;
1201 /* Implement TARGET_HANDLE_OPTION. */
1203 static bool
1204 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1206 switch (code)
1208 case OPT_march_:
1209 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1210 return true;
1212 case OPT_mcpu_:
1213 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1214 return true;
1216 case OPT_mhard_float:
1217 target_float_abi_name = "hard";
1218 return true;
1220 case OPT_msoft_float:
1221 target_float_abi_name = "soft";
1222 return true;
1224 case OPT_mtune_:
1225 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1226 return true;
1228 default:
1229 return true;
1233 static void
1234 arm_target_help (void)
1236 int i;
1237 static int columns = 0;
1238 int remaining;
1240 /* If we have not done so already, obtain the desired maximum width of
1241 the output. Note - this is a duplication of the code at the start of
1242 gcc/opts.c:print_specific_help() - the two copies should probably be
1243 replaced by a single function. */
1244 if (columns == 0)
1246 const char *p;
1248 GET_ENVIRONMENT (p, "COLUMNS");
1249 if (p != NULL)
1251 int value = atoi (p);
1253 if (value > 0)
1254 columns = value;
1257 if (columns == 0)
1258 /* Use a reasonable default. */
1259 columns = 80;
1262 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1264 /* The - 2 is because we know that the last entry in the array is NULL. */
1265 i = ARRAY_SIZE (all_cores) - 2;
1266 gcc_assert (i > 0);
1267 printf (" %s", all_cores[i].name);
1268 remaining = columns - (strlen (all_cores[i].name) + 4);
1269 gcc_assert (remaining >= 0);
1271 while (i--)
1273 int len = strlen (all_cores[i].name);
1275 if (remaining > len + 2)
1277 printf (", %s", all_cores[i].name);
1278 remaining -= len + 2;
1280 else
1282 if (remaining > 0)
1283 printf (",");
1284 printf ("\n %s", all_cores[i].name);
1285 remaining = columns - (len + 4);
1289 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1291 i = ARRAY_SIZE (all_architectures) - 2;
1292 gcc_assert (i > 0);
1294 printf (" %s", all_architectures[i].name);
1295 remaining = columns - (strlen (all_architectures[i].name) + 4);
1296 gcc_assert (remaining >= 0);
1298 while (i--)
1300 int len = strlen (all_architectures[i].name);
1302 if (remaining > len + 2)
1304 printf (", %s", all_architectures[i].name);
1305 remaining -= len + 2;
1307 else
1309 if (remaining > 0)
1310 printf (",");
1311 printf ("\n %s", all_architectures[i].name);
1312 remaining = columns - (len + 4);
1315 printf ("\n");
1319 /* Fix up any incompatible options that the user has specified.
1320 This has now turned into a maze. */
1321 void
1322 arm_override_options (void)
1324 unsigned i;
1326 if (arm_selected_arch)
1328 if (arm_selected_cpu)
1330 /* Check for conflict between mcpu and march. */
1331 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1333 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1334 arm_selected_cpu->name, arm_selected_arch->name);
1335 /* -march wins for code generation.
1336 -mcpu wins for default tuning. */
1337 if (!arm_selected_tune)
1338 arm_selected_tune = arm_selected_cpu;
1340 arm_selected_cpu = arm_selected_arch;
1342 else
1343 /* -mcpu wins. */
1344 arm_selected_arch = NULL;
1346 else
1347 /* Pick a CPU based on the architecture. */
1348 arm_selected_cpu = arm_selected_arch;
1351 /* If the user did not specify a processor, choose one for them. */
1352 if (!arm_selected_cpu)
1354 const struct processors * sel;
1355 unsigned int sought;
1357 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1358 if (!arm_selected_cpu->name)
1360 #ifdef SUBTARGET_CPU_DEFAULT
1361 /* Use the subtarget default CPU if none was specified by
1362 configure. */
1363 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1364 #endif
1365 /* Default to ARM6. */
1366 if (arm_selected_cpu->name)
1367 arm_selected_cpu = &all_cores[arm6];
1370 sel = arm_selected_cpu;
1371 insn_flags = sel->flags;
1373 /* Now check to see if the user has specified some command line
1374 switch that require certain abilities from the cpu. */
1375 sought = 0;
1377 if (TARGET_INTERWORK || TARGET_THUMB)
1379 sought |= (FL_THUMB | FL_MODE32);
1381 /* There are no ARM processors that support both APCS-26 and
1382 interworking. Therefore we force FL_MODE26 to be removed
1383 from insn_flags here (if it was set), so that the search
1384 below will always be able to find a compatible processor. */
1385 insn_flags &= ~FL_MODE26;
1388 if (sought != 0 && ((sought & insn_flags) != sought))
1390 /* Try to locate a CPU type that supports all of the abilities
1391 of the default CPU, plus the extra abilities requested by
1392 the user. */
1393 for (sel = all_cores; sel->name != NULL; sel++)
1394 if ((sel->flags & sought) == (sought | insn_flags))
1395 break;
1397 if (sel->name == NULL)
1399 unsigned current_bit_count = 0;
1400 const struct processors * best_fit = NULL;
1402 /* Ideally we would like to issue an error message here
1403 saying that it was not possible to find a CPU compatible
1404 with the default CPU, but which also supports the command
1405 line options specified by the programmer, and so they
1406 ought to use the -mcpu=<name> command line option to
1407 override the default CPU type.
1409 If we cannot find a cpu that has both the
1410 characteristics of the default cpu and the given
1411 command line options we scan the array again looking
1412 for a best match. */
1413 for (sel = all_cores; sel->name != NULL; sel++)
1414 if ((sel->flags & sought) == sought)
1416 unsigned count;
1418 count = bit_count (sel->flags & insn_flags);
1420 if (count >= current_bit_count)
1422 best_fit = sel;
1423 current_bit_count = count;
1427 gcc_assert (best_fit);
1428 sel = best_fit;
1431 arm_selected_cpu = sel;
1435 gcc_assert (arm_selected_cpu);
1436 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1437 if (!arm_selected_tune)
1438 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1440 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1441 insn_flags = arm_selected_cpu->flags;
1443 arm_tune = arm_selected_tune->core;
1444 tune_flags = arm_selected_tune->flags;
1445 current_tune = arm_selected_tune->tune;
1447 if (target_fp16_format_name)
1449 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1451 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1453 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1454 break;
1457 if (i == ARRAY_SIZE (all_fp16_formats))
1458 error ("invalid __fp16 format option: -mfp16-format=%s",
1459 target_fp16_format_name);
1461 else
1462 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1464 if (target_abi_name)
1466 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1468 if (streq (arm_all_abis[i].name, target_abi_name))
1470 arm_abi = arm_all_abis[i].abi_type;
1471 break;
1474 if (i == ARRAY_SIZE (arm_all_abis))
1475 error ("invalid ABI option: -mabi=%s", target_abi_name);
1477 else
1478 arm_abi = ARM_DEFAULT_ABI;
1480 /* Make sure that the processor choice does not conflict with any of the
1481 other command line choices. */
1482 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1483 error ("target CPU does not support ARM mode");
1485 /* BPABI targets use linker tricks to allow interworking on cores
1486 without thumb support. */
1487 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1489 warning (0, "target CPU does not support interworking" );
1490 target_flags &= ~MASK_INTERWORK;
1493 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1495 warning (0, "target CPU does not support THUMB instructions");
1496 target_flags &= ~MASK_THUMB;
1499 if (TARGET_APCS_FRAME && TARGET_THUMB)
1501 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1502 target_flags &= ~MASK_APCS_FRAME;
1505 /* Callee super interworking implies thumb interworking. Adding
1506 this to the flags here simplifies the logic elsewhere. */
1507 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1508 target_flags |= MASK_INTERWORK;
1510 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1511 from here where no function is being compiled currently. */
1512 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1513 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1515 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1516 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1518 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1519 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1521 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1523 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1524 target_flags |= MASK_APCS_FRAME;
1527 if (TARGET_POKE_FUNCTION_NAME)
1528 target_flags |= MASK_APCS_FRAME;
1530 if (TARGET_APCS_REENT && flag_pic)
1531 error ("-fpic and -mapcs-reent are incompatible");
1533 if (TARGET_APCS_REENT)
1534 warning (0, "APCS reentrant code not supported. Ignored");
1536 /* If this target is normally configured to use APCS frames, warn if they
1537 are turned off and debugging is turned on. */
1538 if (TARGET_ARM
1539 && write_symbols != NO_DEBUG
1540 && !TARGET_APCS_FRAME
1541 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1542 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1544 if (TARGET_APCS_FLOAT)
1545 warning (0, "passing floating point arguments in fp regs not yet supported");
1547 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1548 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1549 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1550 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1551 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1552 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1553 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1554 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1555 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1556 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1557 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1558 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1559 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1561 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1562 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1563 thumb_code = (TARGET_ARM == 0);
1564 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1565 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1566 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1567 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1568 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1570 /* If we are not using the default (ARM mode) section anchor offset
1571 ranges, then set the correct ranges now. */
1572 if (TARGET_THUMB1)
1574 /* Thumb-1 LDR instructions cannot have negative offsets.
1575 Permissible positive offset ranges are 5-bit (for byte loads),
1576 6-bit (for halfword loads), or 7-bit (for word loads).
1577 Empirical results suggest a 7-bit anchor range gives the best
1578 overall code size. */
1579 targetm.min_anchor_offset = 0;
1580 targetm.max_anchor_offset = 127;
1582 else if (TARGET_THUMB2)
1584 /* The minimum is set such that the total size of the block
1585 for a particular anchor is 248 + 1 + 4095 bytes, which is
1586 divisible by eight, ensuring natural spacing of anchors. */
1587 targetm.min_anchor_offset = -248;
1588 targetm.max_anchor_offset = 4095;
1591 /* V5 code we generate is completely interworking capable, so we turn off
1592 TARGET_INTERWORK here to avoid many tests later on. */
1594 /* XXX However, we must pass the right pre-processor defines to CPP
1595 or GLD can get confused. This is a hack. */
1596 if (TARGET_INTERWORK)
1597 arm_cpp_interwork = 1;
1599 if (arm_arch5)
1600 target_flags &= ~MASK_INTERWORK;
1602 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1603 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1605 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1606 error ("iwmmxt abi requires an iwmmxt capable cpu");
1608 if (target_fpu_name == NULL && target_fpe_name != NULL)
1610 if (streq (target_fpe_name, "2"))
1611 target_fpu_name = "fpe2";
1612 else if (streq (target_fpe_name, "3"))
1613 target_fpu_name = "fpe3";
1614 else
1615 error ("invalid floating point emulation option: -mfpe=%s",
1616 target_fpe_name);
1619 if (target_fpu_name == NULL)
1621 #ifdef FPUTYPE_DEFAULT
1622 target_fpu_name = FPUTYPE_DEFAULT;
1623 #else
1624 if (arm_arch_cirrus)
1625 target_fpu_name = "maverick";
1626 else
1627 target_fpu_name = "fpe2";
1628 #endif
1631 arm_fpu_desc = NULL;
1632 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1634 if (streq (all_fpus[i].name, target_fpu_name))
1636 arm_fpu_desc = &all_fpus[i];
1637 break;
1641 if (!arm_fpu_desc)
1643 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1644 return;
1647 switch (arm_fpu_desc->model)
1649 case ARM_FP_MODEL_FPA:
1650 if (arm_fpu_desc->rev == 2)
1651 arm_fpu_attr = FPU_FPE2;
1652 else if (arm_fpu_desc->rev == 3)
1653 arm_fpu_attr = FPU_FPE3;
1654 else
1655 arm_fpu_attr = FPU_FPA;
1656 break;
1658 case ARM_FP_MODEL_MAVERICK:
1659 arm_fpu_attr = FPU_MAVERICK;
1660 break;
1662 case ARM_FP_MODEL_VFP:
1663 arm_fpu_attr = FPU_VFP;
1664 break;
1666 default:
1667 gcc_unreachable();
1670 if (target_float_abi_name != NULL)
1672 /* The user specified a FP ABI. */
1673 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1675 if (streq (all_float_abis[i].name, target_float_abi_name))
1677 arm_float_abi = all_float_abis[i].abi_type;
1678 break;
1681 if (i == ARRAY_SIZE (all_float_abis))
1682 error ("invalid floating point abi: -mfloat-abi=%s",
1683 target_float_abi_name);
1685 else
1686 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1688 if (TARGET_AAPCS_BASED
1689 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1690 error ("FPA is unsupported in the AAPCS");
1692 if (TARGET_AAPCS_BASED)
1694 if (TARGET_CALLER_INTERWORKING)
1695 error ("AAPCS does not support -mcaller-super-interworking");
1696 else
1697 if (TARGET_CALLEE_INTERWORKING)
1698 error ("AAPCS does not support -mcallee-super-interworking");
1701 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1702 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1703 will ever exist. GCC makes no attempt to support this combination. */
1704 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1705 sorry ("iWMMXt and hardware floating point");
1707 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1708 if (TARGET_THUMB2 && TARGET_IWMMXT)
1709 sorry ("Thumb-2 iWMMXt");
1711 /* __fp16 support currently assumes the core has ldrh. */
1712 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1713 sorry ("__fp16 and no ldrh");
1715 /* If soft-float is specified then don't use FPU. */
1716 if (TARGET_SOFT_FLOAT)
1717 arm_fpu_attr = FPU_NONE;
1719 if (TARGET_AAPCS_BASED)
1721 if (arm_abi == ARM_ABI_IWMMXT)
1722 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1723 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1724 && TARGET_HARD_FLOAT
1725 && TARGET_VFP)
1726 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1727 else
1728 arm_pcs_default = ARM_PCS_AAPCS;
1730 else
1732 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1733 sorry ("-mfloat-abi=hard and VFP");
1735 if (arm_abi == ARM_ABI_APCS)
1736 arm_pcs_default = ARM_PCS_APCS;
1737 else
1738 arm_pcs_default = ARM_PCS_ATPCS;
1741 /* For arm2/3 there is no need to do any scheduling if there is only
1742 a floating point emulator, or we are doing software floating-point. */
1743 if ((TARGET_SOFT_FLOAT
1744 || (TARGET_FPA && arm_fpu_desc->rev))
1745 && (tune_flags & FL_MODE32) == 0)
1746 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1748 if (target_thread_switch)
1750 if (strcmp (target_thread_switch, "soft") == 0)
1751 target_thread_pointer = TP_SOFT;
1752 else if (strcmp (target_thread_switch, "auto") == 0)
1753 target_thread_pointer = TP_AUTO;
1754 else if (strcmp (target_thread_switch, "cp15") == 0)
1755 target_thread_pointer = TP_CP15;
1756 else
1757 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1760 /* Use the cp15 method if it is available. */
1761 if (target_thread_pointer == TP_AUTO)
1763 if (arm_arch6k && !TARGET_THUMB1)
1764 target_thread_pointer = TP_CP15;
1765 else
1766 target_thread_pointer = TP_SOFT;
1769 if (TARGET_HARD_TP && TARGET_THUMB1)
1770 error ("can not use -mtp=cp15 with 16-bit Thumb");
1772 /* Override the default structure alignment for AAPCS ABI. */
1773 if (TARGET_AAPCS_BASED)
1774 arm_structure_size_boundary = 8;
1776 if (structure_size_string != NULL)
1778 int size = strtol (structure_size_string, NULL, 0);
1780 if (size == 8 || size == 32
1781 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1782 arm_structure_size_boundary = size;
1783 else
1784 warning (0, "structure size boundary can only be set to %s",
1785 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1788 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1790 error ("RTP PIC is incompatible with Thumb");
1791 flag_pic = 0;
1794 /* If stack checking is disabled, we can use r10 as the PIC register,
1795 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1796 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1798 if (TARGET_VXWORKS_RTP)
1799 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1800 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1803 if (flag_pic && TARGET_VXWORKS_RTP)
1804 arm_pic_register = 9;
1806 if (arm_pic_register_string != NULL)
1808 int pic_register = decode_reg_name (arm_pic_register_string);
1810 if (!flag_pic)
1811 warning (0, "-mpic-register= is useless without -fpic");
1813 /* Prevent the user from choosing an obviously stupid PIC register. */
1814 else if (pic_register < 0 || call_used_regs[pic_register]
1815 || pic_register == HARD_FRAME_POINTER_REGNUM
1816 || pic_register == STACK_POINTER_REGNUM
1817 || pic_register >= PC_REGNUM
1818 || (TARGET_VXWORKS_RTP
1819 && (unsigned int) pic_register != arm_pic_register))
1820 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1821 else
1822 arm_pic_register = pic_register;
1825 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1826 if (fix_cm3_ldrd == 2)
1828 if (arm_selected_cpu->core == cortexm3)
1829 fix_cm3_ldrd = 1;
1830 else
1831 fix_cm3_ldrd = 0;
1834 if (TARGET_THUMB1 && flag_schedule_insns)
1836 /* Don't warn since it's on by default in -O2. */
1837 flag_schedule_insns = 0;
1840 if (optimize_size)
1842 /* If optimizing for size, bump the number of instructions that we
1843 are prepared to conditionally execute (even on a StrongARM). */
1844 max_insns_skipped = 6;
1846 else
1848 /* StrongARM has early execution of branches, so a sequence
1849 that is worth skipping is shorter. */
1850 if (arm_tune_strongarm)
1851 max_insns_skipped = 3;
1854 /* Hot/Cold partitioning is not currently supported, since we can't
1855 handle literal pool placement in that case. */
1856 if (flag_reorder_blocks_and_partition)
1858 inform (input_location,
1859 "-freorder-blocks-and-partition not supported on this architecture");
1860 flag_reorder_blocks_and_partition = 0;
1861 flag_reorder_blocks = 1;
1864 /* Register global variables with the garbage collector. */
1865 arm_add_gc_roots ();
1868 static void
1869 arm_add_gc_roots (void)
1871 gcc_obstack_init(&minipool_obstack);
1872 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1875 /* A table of known ARM exception types.
1876 For use with the interrupt function attribute. */
1878 typedef struct
1880 const char *const arg;
1881 const unsigned long return_value;
1883 isr_attribute_arg;
1885 static const isr_attribute_arg isr_attribute_args [] =
1887 { "IRQ", ARM_FT_ISR },
1888 { "irq", ARM_FT_ISR },
1889 { "FIQ", ARM_FT_FIQ },
1890 { "fiq", ARM_FT_FIQ },
1891 { "ABORT", ARM_FT_ISR },
1892 { "abort", ARM_FT_ISR },
1893 { "ABORT", ARM_FT_ISR },
1894 { "abort", ARM_FT_ISR },
1895 { "UNDEF", ARM_FT_EXCEPTION },
1896 { "undef", ARM_FT_EXCEPTION },
1897 { "SWI", ARM_FT_EXCEPTION },
1898 { "swi", ARM_FT_EXCEPTION },
1899 { NULL, ARM_FT_NORMAL }
1902 /* Returns the (interrupt) function type of the current
1903 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1905 static unsigned long
1906 arm_isr_value (tree argument)
1908 const isr_attribute_arg * ptr;
1909 const char * arg;
1911 if (!arm_arch_notm)
1912 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1914 /* No argument - default to IRQ. */
1915 if (argument == NULL_TREE)
1916 return ARM_FT_ISR;
1918 /* Get the value of the argument. */
1919 if (TREE_VALUE (argument) == NULL_TREE
1920 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1921 return ARM_FT_UNKNOWN;
1923 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1925 /* Check it against the list of known arguments. */
1926 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1927 if (streq (arg, ptr->arg))
1928 return ptr->return_value;
1930 /* An unrecognized interrupt type. */
1931 return ARM_FT_UNKNOWN;
1934 /* Computes the type of the current function. */
1936 static unsigned long
1937 arm_compute_func_type (void)
1939 unsigned long type = ARM_FT_UNKNOWN;
1940 tree a;
1941 tree attr;
1943 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1945 /* Decide if the current function is volatile. Such functions
1946 never return, and many memory cycles can be saved by not storing
1947 register values that will never be needed again. This optimization
1948 was added to speed up context switching in a kernel application. */
1949 if (optimize > 0
1950 && (TREE_NOTHROW (current_function_decl)
1951 || !(flag_unwind_tables
1952 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1953 && TREE_THIS_VOLATILE (current_function_decl))
1954 type |= ARM_FT_VOLATILE;
1956 if (cfun->static_chain_decl != NULL)
1957 type |= ARM_FT_NESTED;
1959 attr = DECL_ATTRIBUTES (current_function_decl);
1961 a = lookup_attribute ("naked", attr);
1962 if (a != NULL_TREE)
1963 type |= ARM_FT_NAKED;
1965 a = lookup_attribute ("isr", attr);
1966 if (a == NULL_TREE)
1967 a = lookup_attribute ("interrupt", attr);
1969 if (a == NULL_TREE)
1970 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1971 else
1972 type |= arm_isr_value (TREE_VALUE (a));
1974 return type;
1977 /* Returns the type of the current function. */
1979 unsigned long
1980 arm_current_func_type (void)
1982 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1983 cfun->machine->func_type = arm_compute_func_type ();
1985 return cfun->machine->func_type;
1988 bool
1989 arm_allocate_stack_slots_for_args (void)
1991 /* Naked functions should not allocate stack slots for arguments. */
1992 return !IS_NAKED (arm_current_func_type ());
1996 /* Output assembler code for a block containing the constant parts
1997 of a trampoline, leaving space for the variable parts.
1999 On the ARM, (if r8 is the static chain regnum, and remembering that
2000 referencing pc adds an offset of 8) the trampoline looks like:
2001 ldr r8, [pc, #0]
2002 ldr pc, [pc]
2003 .word static chain value
2004 .word function's address
2005 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2007 static void
2008 arm_asm_trampoline_template (FILE *f)
2010 if (TARGET_ARM)
2012 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2013 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2015 else if (TARGET_THUMB2)
2017 /* The Thumb-2 trampoline is similar to the arm implementation.
2018 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2019 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2020 STATIC_CHAIN_REGNUM, PC_REGNUM);
2021 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2023 else
2025 ASM_OUTPUT_ALIGN (f, 2);
2026 fprintf (f, "\t.code\t16\n");
2027 fprintf (f, ".Ltrampoline_start:\n");
2028 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2029 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2030 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2031 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2032 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2033 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2035 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2036 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2039 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2041 static void
2042 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2044 rtx fnaddr, mem, a_tramp;
2046 emit_block_move (m_tramp, assemble_trampoline_template (),
2047 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2049 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2050 emit_move_insn (mem, chain_value);
2052 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2053 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2054 emit_move_insn (mem, fnaddr);
2056 a_tramp = XEXP (m_tramp, 0);
2057 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2058 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2059 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2062 /* Thumb trampolines should be entered in thumb mode, so set
2063 the bottom bit of the address. */
2065 static rtx
2066 arm_trampoline_adjust_address (rtx addr)
2068 if (TARGET_THUMB)
2069 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2070 NULL, 0, OPTAB_LIB_WIDEN);
2071 return addr;
2074 /* Return 1 if it is possible to return using a single instruction.
2075 If SIBLING is non-null, this is a test for a return before a sibling
2076 call. SIBLING is the call insn, so we can examine its register usage. */
2079 use_return_insn (int iscond, rtx sibling)
2081 int regno;
2082 unsigned int func_type;
2083 unsigned long saved_int_regs;
2084 unsigned HOST_WIDE_INT stack_adjust;
2085 arm_stack_offsets *offsets;
2087 /* Never use a return instruction before reload has run. */
2088 if (!reload_completed)
2089 return 0;
2091 func_type = arm_current_func_type ();
2093 /* Naked, volatile and stack alignment functions need special
2094 consideration. */
2095 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2096 return 0;
2098 /* So do interrupt functions that use the frame pointer and Thumb
2099 interrupt functions. */
2100 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2101 return 0;
2103 offsets = arm_get_frame_offsets ();
2104 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2106 /* As do variadic functions. */
2107 if (crtl->args.pretend_args_size
2108 || cfun->machine->uses_anonymous_args
2109 /* Or if the function calls __builtin_eh_return () */
2110 || crtl->calls_eh_return
2111 /* Or if the function calls alloca */
2112 || cfun->calls_alloca
2113 /* Or if there is a stack adjustment. However, if the stack pointer
2114 is saved on the stack, we can use a pre-incrementing stack load. */
2115 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2116 && stack_adjust == 4)))
2117 return 0;
2119 saved_int_regs = offsets->saved_regs_mask;
2121 /* Unfortunately, the insn
2123 ldmib sp, {..., sp, ...}
2125 triggers a bug on most SA-110 based devices, such that the stack
2126 pointer won't be correctly restored if the instruction takes a
2127 page fault. We work around this problem by popping r3 along with
2128 the other registers, since that is never slower than executing
2129 another instruction.
2131 We test for !arm_arch5 here, because code for any architecture
2132 less than this could potentially be run on one of the buggy
2133 chips. */
2134 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2136 /* Validate that r3 is a call-clobbered register (always true in
2137 the default abi) ... */
2138 if (!call_used_regs[3])
2139 return 0;
2141 /* ... that it isn't being used for a return value ... */
2142 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2143 return 0;
2145 /* ... or for a tail-call argument ... */
2146 if (sibling)
2148 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2150 if (find_regno_fusage (sibling, USE, 3))
2151 return 0;
2154 /* ... and that there are no call-saved registers in r0-r2
2155 (always true in the default ABI). */
2156 if (saved_int_regs & 0x7)
2157 return 0;
2160 /* Can't be done if interworking with Thumb, and any registers have been
2161 stacked. */
2162 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2163 return 0;
2165 /* On StrongARM, conditional returns are expensive if they aren't
2166 taken and multiple registers have been stacked. */
2167 if (iscond && arm_tune_strongarm)
2169 /* Conditional return when just the LR is stored is a simple
2170 conditional-load instruction, that's not expensive. */
2171 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2172 return 0;
2174 if (flag_pic
2175 && arm_pic_register != INVALID_REGNUM
2176 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2177 return 0;
2180 /* If there are saved registers but the LR isn't saved, then we need
2181 two instructions for the return. */
2182 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2183 return 0;
2185 /* Can't be done if any of the FPA regs are pushed,
2186 since this also requires an insn. */
2187 if (TARGET_HARD_FLOAT && TARGET_FPA)
2188 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2189 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2190 return 0;
2192 /* Likewise VFP regs. */
2193 if (TARGET_HARD_FLOAT && TARGET_VFP)
2194 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2195 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2196 return 0;
2198 if (TARGET_REALLY_IWMMXT)
2199 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2200 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2201 return 0;
2203 return 1;
2206 /* Return TRUE if int I is a valid immediate ARM constant. */
2209 const_ok_for_arm (HOST_WIDE_INT i)
2211 int lowbit;
2213 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2214 be all zero, or all one. */
2215 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2216 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2217 != ((~(unsigned HOST_WIDE_INT) 0)
2218 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2219 return FALSE;
2221 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2223 /* Fast return for 0 and small values. We must do this for zero, since
2224 the code below can't handle that one case. */
2225 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2226 return TRUE;
2228 /* Get the number of trailing zeros. */
2229 lowbit = ffs((int) i) - 1;
2231 /* Only even shifts are allowed in ARM mode so round down to the
2232 nearest even number. */
2233 if (TARGET_ARM)
2234 lowbit &= ~1;
2236 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2237 return TRUE;
2239 if (TARGET_ARM)
2241 /* Allow rotated constants in ARM mode. */
2242 if (lowbit <= 4
2243 && ((i & ~0xc000003f) == 0
2244 || (i & ~0xf000000f) == 0
2245 || (i & ~0xfc000003) == 0))
2246 return TRUE;
2248 else
2250 HOST_WIDE_INT v;
2252 /* Allow repeated pattern. */
2253 v = i & 0xff;
2254 v |= v << 16;
2255 if (i == v || i == (v | (v << 8)))
2256 return TRUE;
2259 return FALSE;
2262 /* Return true if I is a valid constant for the operation CODE. */
2263 static int
2264 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2266 if (const_ok_for_arm (i))
2267 return 1;
2269 switch (code)
2271 case PLUS:
2272 case COMPARE:
2273 case EQ:
2274 case NE:
2275 case GT:
2276 case LE:
2277 case LT:
2278 case GE:
2279 case GEU:
2280 case LTU:
2281 case GTU:
2282 case LEU:
2283 case UNORDERED:
2284 case ORDERED:
2285 case UNEQ:
2286 case UNGE:
2287 case UNLT:
2288 case UNGT:
2289 case UNLE:
2290 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2292 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2293 case XOR:
2294 return 0;
2296 case IOR:
2297 if (TARGET_THUMB2)
2298 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2299 return 0;
2301 case AND:
2302 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2304 default:
2305 gcc_unreachable ();
2309 /* Emit a sequence of insns to handle a large constant.
2310 CODE is the code of the operation required, it can be any of SET, PLUS,
2311 IOR, AND, XOR, MINUS;
2312 MODE is the mode in which the operation is being performed;
2313 VAL is the integer to operate on;
2314 SOURCE is the other operand (a register, or a null-pointer for SET);
2315 SUBTARGETS means it is safe to create scratch registers if that will
2316 either produce a simpler sequence, or we will want to cse the values.
2317 Return value is the number of insns emitted. */
2319 /* ??? Tweak this for thumb2. */
2321 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2322 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2324 rtx cond;
2326 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2327 cond = COND_EXEC_TEST (PATTERN (insn));
2328 else
2329 cond = NULL_RTX;
2331 if (subtargets || code == SET
2332 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2333 && REGNO (target) != REGNO (source)))
2335 /* After arm_reorg has been called, we can't fix up expensive
2336 constants by pushing them into memory so we must synthesize
2337 them in-line, regardless of the cost. This is only likely to
2338 be more costly on chips that have load delay slots and we are
2339 compiling without running the scheduler (so no splitting
2340 occurred before the final instruction emission).
2342 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2344 if (!after_arm_reorg
2345 && !cond
2346 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2347 1, 0)
2348 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2349 + (code != SET))))
2351 if (code == SET)
2353 /* Currently SET is the only monadic value for CODE, all
2354 the rest are diadic. */
2355 if (TARGET_USE_MOVT)
2356 arm_emit_movpair (target, GEN_INT (val));
2357 else
2358 emit_set_insn (target, GEN_INT (val));
2360 return 1;
2362 else
2364 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2366 if (TARGET_USE_MOVT)
2367 arm_emit_movpair (temp, GEN_INT (val));
2368 else
2369 emit_set_insn (temp, GEN_INT (val));
2371 /* For MINUS, the value is subtracted from, since we never
2372 have subtraction of a constant. */
2373 if (code == MINUS)
2374 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2375 else
2376 emit_set_insn (target,
2377 gen_rtx_fmt_ee (code, mode, source, temp));
2378 return 2;
2383 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2387 /* Return the number of instructions required to synthesize the given
2388 constant, if we start emitting them from bit-position I. */
2389 static int
2390 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2392 HOST_WIDE_INT temp1;
2393 int step_size = TARGET_ARM ? 2 : 1;
2394 int num_insns = 0;
2396 gcc_assert (TARGET_ARM || i == 0);
2400 int end;
2402 if (i <= 0)
2403 i += 32;
2404 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2406 end = i - 8;
2407 if (end < 0)
2408 end += 32;
2409 temp1 = remainder & ((0x0ff << end)
2410 | ((i < end) ? (0xff >> (32 - end)) : 0));
2411 remainder &= ~temp1;
2412 num_insns++;
2413 i -= 8 - step_size;
2415 i -= step_size;
2416 } while (remainder);
2417 return num_insns;
2420 static int
2421 find_best_start (unsigned HOST_WIDE_INT remainder)
2423 int best_consecutive_zeros = 0;
2424 int i;
2425 int best_start = 0;
2427 /* If we aren't targetting ARM, the best place to start is always at
2428 the bottom. */
2429 if (! TARGET_ARM)
2430 return 0;
2432 for (i = 0; i < 32; i += 2)
2434 int consecutive_zeros = 0;
2436 if (!(remainder & (3 << i)))
2438 while ((i < 32) && !(remainder & (3 << i)))
2440 consecutive_zeros += 2;
2441 i += 2;
2443 if (consecutive_zeros > best_consecutive_zeros)
2445 best_consecutive_zeros = consecutive_zeros;
2446 best_start = i - consecutive_zeros;
2448 i -= 2;
2452 /* So long as it won't require any more insns to do so, it's
2453 desirable to emit a small constant (in bits 0...9) in the last
2454 insn. This way there is more chance that it can be combined with
2455 a later addressing insn to form a pre-indexed load or store
2456 operation. Consider:
2458 *((volatile int *)0xe0000100) = 1;
2459 *((volatile int *)0xe0000110) = 2;
2461 We want this to wind up as:
2463 mov rA, #0xe0000000
2464 mov rB, #1
2465 str rB, [rA, #0x100]
2466 mov rB, #2
2467 str rB, [rA, #0x110]
2469 rather than having to synthesize both large constants from scratch.
2471 Therefore, we calculate how many insns would be required to emit
2472 the constant starting from `best_start', and also starting from
2473 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2474 yield a shorter sequence, we may as well use zero. */
2475 if (best_start != 0
2476 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2477 && (count_insns_for_constant (remainder, 0) <=
2478 count_insns_for_constant (remainder, best_start)))
2479 best_start = 0;
2481 return best_start;
2484 /* Emit an instruction with the indicated PATTERN. If COND is
2485 non-NULL, conditionalize the execution of the instruction on COND
2486 being true. */
2488 static void
2489 emit_constant_insn (rtx cond, rtx pattern)
2491 if (cond)
2492 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2493 emit_insn (pattern);
2496 /* As above, but extra parameter GENERATE which, if clear, suppresses
2497 RTL generation. */
2498 /* ??? This needs more work for thumb2. */
2500 static int
2501 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2502 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2503 int generate)
2505 int can_invert = 0;
2506 int can_negate = 0;
2507 int final_invert = 0;
2508 int can_negate_initial = 0;
2509 int i;
2510 int num_bits_set = 0;
2511 int set_sign_bit_copies = 0;
2512 int clear_sign_bit_copies = 0;
2513 int clear_zero_bit_copies = 0;
2514 int set_zero_bit_copies = 0;
2515 int insns = 0;
2516 unsigned HOST_WIDE_INT temp1, temp2;
2517 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2518 int step_size = TARGET_ARM ? 2 : 1;
2520 /* Find out which operations are safe for a given CODE. Also do a quick
2521 check for degenerate cases; these can occur when DImode operations
2522 are split. */
2523 switch (code)
2525 case SET:
2526 can_invert = 1;
2527 can_negate = 1;
2528 break;
2530 case PLUS:
2531 can_negate = 1;
2532 can_negate_initial = 1;
2533 break;
2535 case IOR:
2536 if (remainder == 0xffffffff)
2538 if (generate)
2539 emit_constant_insn (cond,
2540 gen_rtx_SET (VOIDmode, target,
2541 GEN_INT (ARM_SIGN_EXTEND (val))));
2542 return 1;
2545 if (remainder == 0)
2547 if (reload_completed && rtx_equal_p (target, source))
2548 return 0;
2550 if (generate)
2551 emit_constant_insn (cond,
2552 gen_rtx_SET (VOIDmode, target, source));
2553 return 1;
2556 if (TARGET_THUMB2)
2557 can_invert = 1;
2558 break;
2560 case AND:
2561 if (remainder == 0)
2563 if (generate)
2564 emit_constant_insn (cond,
2565 gen_rtx_SET (VOIDmode, target, const0_rtx));
2566 return 1;
2568 if (remainder == 0xffffffff)
2570 if (reload_completed && rtx_equal_p (target, source))
2571 return 0;
2572 if (generate)
2573 emit_constant_insn (cond,
2574 gen_rtx_SET (VOIDmode, target, source));
2575 return 1;
2577 can_invert = 1;
2578 break;
2580 case XOR:
2581 if (remainder == 0)
2583 if (reload_completed && rtx_equal_p (target, source))
2584 return 0;
2585 if (generate)
2586 emit_constant_insn (cond,
2587 gen_rtx_SET (VOIDmode, target, source));
2588 return 1;
2591 if (remainder == 0xffffffff)
2593 if (generate)
2594 emit_constant_insn (cond,
2595 gen_rtx_SET (VOIDmode, target,
2596 gen_rtx_NOT (mode, source)));
2597 return 1;
2599 break;
2601 case MINUS:
2602 /* We treat MINUS as (val - source), since (source - val) is always
2603 passed as (source + (-val)). */
2604 if (remainder == 0)
2606 if (generate)
2607 emit_constant_insn (cond,
2608 gen_rtx_SET (VOIDmode, target,
2609 gen_rtx_NEG (mode, source)));
2610 return 1;
2612 if (const_ok_for_arm (val))
2614 if (generate)
2615 emit_constant_insn (cond,
2616 gen_rtx_SET (VOIDmode, target,
2617 gen_rtx_MINUS (mode, GEN_INT (val),
2618 source)));
2619 return 1;
2621 can_negate = 1;
2623 break;
2625 default:
2626 gcc_unreachable ();
2629 /* If we can do it in one insn get out quickly. */
2630 if (const_ok_for_arm (val)
2631 || (can_negate_initial && const_ok_for_arm (-val))
2632 || (can_invert && const_ok_for_arm (~val)))
2634 if (generate)
2635 emit_constant_insn (cond,
2636 gen_rtx_SET (VOIDmode, target,
2637 (source
2638 ? gen_rtx_fmt_ee (code, mode, source,
2639 GEN_INT (val))
2640 : GEN_INT (val))));
2641 return 1;
2644 /* Calculate a few attributes that may be useful for specific
2645 optimizations. */
2646 /* Count number of leading zeros. */
2647 for (i = 31; i >= 0; i--)
2649 if ((remainder & (1 << i)) == 0)
2650 clear_sign_bit_copies++;
2651 else
2652 break;
2655 /* Count number of leading 1's. */
2656 for (i = 31; i >= 0; i--)
2658 if ((remainder & (1 << i)) != 0)
2659 set_sign_bit_copies++;
2660 else
2661 break;
2664 /* Count number of trailing zero's. */
2665 for (i = 0; i <= 31; i++)
2667 if ((remainder & (1 << i)) == 0)
2668 clear_zero_bit_copies++;
2669 else
2670 break;
2673 /* Count number of trailing 1's. */
2674 for (i = 0; i <= 31; i++)
2676 if ((remainder & (1 << i)) != 0)
2677 set_zero_bit_copies++;
2678 else
2679 break;
2682 switch (code)
2684 case SET:
2685 /* See if we can use movw. */
2686 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2688 if (generate)
2689 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2690 GEN_INT (val)));
2691 return 1;
2694 /* See if we can do this by sign_extending a constant that is known
2695 to be negative. This is a good, way of doing it, since the shift
2696 may well merge into a subsequent insn. */
2697 if (set_sign_bit_copies > 1)
2699 if (const_ok_for_arm
2700 (temp1 = ARM_SIGN_EXTEND (remainder
2701 << (set_sign_bit_copies - 1))))
2703 if (generate)
2705 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2706 emit_constant_insn (cond,
2707 gen_rtx_SET (VOIDmode, new_src,
2708 GEN_INT (temp1)));
2709 emit_constant_insn (cond,
2710 gen_ashrsi3 (target, new_src,
2711 GEN_INT (set_sign_bit_copies - 1)));
2713 return 2;
2715 /* For an inverted constant, we will need to set the low bits,
2716 these will be shifted out of harm's way. */
2717 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2718 if (const_ok_for_arm (~temp1))
2720 if (generate)
2722 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2723 emit_constant_insn (cond,
2724 gen_rtx_SET (VOIDmode, new_src,
2725 GEN_INT (temp1)));
2726 emit_constant_insn (cond,
2727 gen_ashrsi3 (target, new_src,
2728 GEN_INT (set_sign_bit_copies - 1)));
2730 return 2;
2734 /* See if we can calculate the value as the difference between two
2735 valid immediates. */
2736 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2738 int topshift = clear_sign_bit_copies & ~1;
2740 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2741 & (0xff000000 >> topshift));
2743 /* If temp1 is zero, then that means the 9 most significant
2744 bits of remainder were 1 and we've caused it to overflow.
2745 When topshift is 0 we don't need to do anything since we
2746 can borrow from 'bit 32'. */
2747 if (temp1 == 0 && topshift != 0)
2748 temp1 = 0x80000000 >> (topshift - 1);
2750 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2752 if (const_ok_for_arm (temp2))
2754 if (generate)
2756 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2757 emit_constant_insn (cond,
2758 gen_rtx_SET (VOIDmode, new_src,
2759 GEN_INT (temp1)));
2760 emit_constant_insn (cond,
2761 gen_addsi3 (target, new_src,
2762 GEN_INT (-temp2)));
2765 return 2;
2769 /* See if we can generate this by setting the bottom (or the top)
2770 16 bits, and then shifting these into the other half of the
2771 word. We only look for the simplest cases, to do more would cost
2772 too much. Be careful, however, not to generate this when the
2773 alternative would take fewer insns. */
2774 if (val & 0xffff0000)
2776 temp1 = remainder & 0xffff0000;
2777 temp2 = remainder & 0x0000ffff;
2779 /* Overlaps outside this range are best done using other methods. */
2780 for (i = 9; i < 24; i++)
2782 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2783 && !const_ok_for_arm (temp2))
2785 rtx new_src = (subtargets
2786 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2787 : target);
2788 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2789 source, subtargets, generate);
2790 source = new_src;
2791 if (generate)
2792 emit_constant_insn
2793 (cond,
2794 gen_rtx_SET
2795 (VOIDmode, target,
2796 gen_rtx_IOR (mode,
2797 gen_rtx_ASHIFT (mode, source,
2798 GEN_INT (i)),
2799 source)));
2800 return insns + 1;
2804 /* Don't duplicate cases already considered. */
2805 for (i = 17; i < 24; i++)
2807 if (((temp1 | (temp1 >> i)) == remainder)
2808 && !const_ok_for_arm (temp1))
2810 rtx new_src = (subtargets
2811 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2812 : target);
2813 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2814 source, subtargets, generate);
2815 source = new_src;
2816 if (generate)
2817 emit_constant_insn
2818 (cond,
2819 gen_rtx_SET (VOIDmode, target,
2820 gen_rtx_IOR
2821 (mode,
2822 gen_rtx_LSHIFTRT (mode, source,
2823 GEN_INT (i)),
2824 source)));
2825 return insns + 1;
2829 break;
2831 case IOR:
2832 case XOR:
2833 /* If we have IOR or XOR, and the constant can be loaded in a
2834 single instruction, and we can find a temporary to put it in,
2835 then this can be done in two instructions instead of 3-4. */
2836 if (subtargets
2837 /* TARGET can't be NULL if SUBTARGETS is 0 */
2838 || (reload_completed && !reg_mentioned_p (target, source)))
2840 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2842 if (generate)
2844 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2846 emit_constant_insn (cond,
2847 gen_rtx_SET (VOIDmode, sub,
2848 GEN_INT (val)));
2849 emit_constant_insn (cond,
2850 gen_rtx_SET (VOIDmode, target,
2851 gen_rtx_fmt_ee (code, mode,
2852 source, sub)));
2854 return 2;
2858 if (code == XOR)
2859 break;
2861 /* Convert.
2862 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2863 and the remainder 0s for e.g. 0xfff00000)
2864 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2866 This can be done in 2 instructions by using shifts with mov or mvn.
2867 e.g. for
2868 x = x | 0xfff00000;
2869 we generate.
2870 mvn r0, r0, asl #12
2871 mvn r0, r0, lsr #12 */
2872 if (set_sign_bit_copies > 8
2873 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2875 if (generate)
2877 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2878 rtx shift = GEN_INT (set_sign_bit_copies);
2880 emit_constant_insn
2881 (cond,
2882 gen_rtx_SET (VOIDmode, sub,
2883 gen_rtx_NOT (mode,
2884 gen_rtx_ASHIFT (mode,
2885 source,
2886 shift))));
2887 emit_constant_insn
2888 (cond,
2889 gen_rtx_SET (VOIDmode, target,
2890 gen_rtx_NOT (mode,
2891 gen_rtx_LSHIFTRT (mode, sub,
2892 shift))));
2894 return 2;
2897 /* Convert
2898 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2900 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2902 For eg. r0 = r0 | 0xfff
2903 mvn r0, r0, lsr #12
2904 mvn r0, r0, asl #12
2907 if (set_zero_bit_copies > 8
2908 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2910 if (generate)
2912 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2913 rtx shift = GEN_INT (set_zero_bit_copies);
2915 emit_constant_insn
2916 (cond,
2917 gen_rtx_SET (VOIDmode, sub,
2918 gen_rtx_NOT (mode,
2919 gen_rtx_LSHIFTRT (mode,
2920 source,
2921 shift))));
2922 emit_constant_insn
2923 (cond,
2924 gen_rtx_SET (VOIDmode, target,
2925 gen_rtx_NOT (mode,
2926 gen_rtx_ASHIFT (mode, sub,
2927 shift))));
2929 return 2;
2932 /* This will never be reached for Thumb2 because orn is a valid
2933 instruction. This is for Thumb1 and the ARM 32 bit cases.
2935 x = y | constant (such that ~constant is a valid constant)
2936 Transform this to
2937 x = ~(~y & ~constant).
2939 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2941 if (generate)
2943 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2944 emit_constant_insn (cond,
2945 gen_rtx_SET (VOIDmode, sub,
2946 gen_rtx_NOT (mode, source)));
2947 source = sub;
2948 if (subtargets)
2949 sub = gen_reg_rtx (mode);
2950 emit_constant_insn (cond,
2951 gen_rtx_SET (VOIDmode, sub,
2952 gen_rtx_AND (mode, source,
2953 GEN_INT (temp1))));
2954 emit_constant_insn (cond,
2955 gen_rtx_SET (VOIDmode, target,
2956 gen_rtx_NOT (mode, sub)));
2958 return 3;
2960 break;
2962 case AND:
2963 /* See if two shifts will do 2 or more insn's worth of work. */
2964 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2966 HOST_WIDE_INT shift_mask = ((0xffffffff
2967 << (32 - clear_sign_bit_copies))
2968 & 0xffffffff);
2970 if ((remainder | shift_mask) != 0xffffffff)
2972 if (generate)
2974 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2975 insns = arm_gen_constant (AND, mode, cond,
2976 remainder | shift_mask,
2977 new_src, source, subtargets, 1);
2978 source = new_src;
2980 else
2982 rtx targ = subtargets ? NULL_RTX : target;
2983 insns = arm_gen_constant (AND, mode, cond,
2984 remainder | shift_mask,
2985 targ, source, subtargets, 0);
2989 if (generate)
2991 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2992 rtx shift = GEN_INT (clear_sign_bit_copies);
2994 emit_insn (gen_ashlsi3 (new_src, source, shift));
2995 emit_insn (gen_lshrsi3 (target, new_src, shift));
2998 return insns + 2;
3001 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3003 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3005 if ((remainder | shift_mask) != 0xffffffff)
3007 if (generate)
3009 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3011 insns = arm_gen_constant (AND, mode, cond,
3012 remainder | shift_mask,
3013 new_src, source, subtargets, 1);
3014 source = new_src;
3016 else
3018 rtx targ = subtargets ? NULL_RTX : target;
3020 insns = arm_gen_constant (AND, mode, cond,
3021 remainder | shift_mask,
3022 targ, source, subtargets, 0);
3026 if (generate)
3028 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3029 rtx shift = GEN_INT (clear_zero_bit_copies);
3031 emit_insn (gen_lshrsi3 (new_src, source, shift));
3032 emit_insn (gen_ashlsi3 (target, new_src, shift));
3035 return insns + 2;
3038 break;
3040 default:
3041 break;
3044 for (i = 0; i < 32; i++)
3045 if (remainder & (1 << i))
3046 num_bits_set++;
3048 if ((code == AND)
3049 || (code != IOR && can_invert && num_bits_set > 16))
3050 remainder ^= 0xffffffff;
3051 else if (code == PLUS && num_bits_set > 16)
3052 remainder = (-remainder) & 0xffffffff;
3054 /* For XOR, if more than half the bits are set and there's a sequence
3055 of more than 8 consecutive ones in the pattern then we can XOR by the
3056 inverted constant and then invert the final result; this may save an
3057 instruction and might also lead to the final mvn being merged with
3058 some other operation. */
3059 else if (code == XOR && num_bits_set > 16
3060 && (count_insns_for_constant (remainder ^ 0xffffffff,
3061 find_best_start
3062 (remainder ^ 0xffffffff))
3063 < count_insns_for_constant (remainder,
3064 find_best_start (remainder))))
3066 remainder ^= 0xffffffff;
3067 final_invert = 1;
3069 else
3071 can_invert = 0;
3072 can_negate = 0;
3075 /* Now try and find a way of doing the job in either two or three
3076 instructions.
3077 We start by looking for the largest block of zeros that are aligned on
3078 a 2-bit boundary, we then fill up the temps, wrapping around to the
3079 top of the word when we drop off the bottom.
3080 In the worst case this code should produce no more than four insns.
3081 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3082 best place to start. */
3084 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3085 the same. */
3087 /* Now start emitting the insns. */
3088 i = find_best_start (remainder);
3091 int end;
3093 if (i <= 0)
3094 i += 32;
3095 if (remainder & (3 << (i - 2)))
3097 end = i - 8;
3098 if (end < 0)
3099 end += 32;
3100 temp1 = remainder & ((0x0ff << end)
3101 | ((i < end) ? (0xff >> (32 - end)) : 0));
3102 remainder &= ~temp1;
3104 if (generate)
3106 rtx new_src, temp1_rtx;
3108 if (code == SET || code == MINUS)
3110 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3111 if (can_invert && code != MINUS)
3112 temp1 = ~temp1;
3114 else
3116 if ((final_invert || remainder) && subtargets)
3117 new_src = gen_reg_rtx (mode);
3118 else
3119 new_src = target;
3120 if (can_invert)
3121 temp1 = ~temp1;
3122 else if (can_negate)
3123 temp1 = -temp1;
3126 temp1 = trunc_int_for_mode (temp1, mode);
3127 temp1_rtx = GEN_INT (temp1);
3129 if (code == SET)
3131 else if (code == MINUS)
3132 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3133 else
3134 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3136 emit_constant_insn (cond,
3137 gen_rtx_SET (VOIDmode, new_src,
3138 temp1_rtx));
3139 source = new_src;
3142 if (code == SET)
3144 can_invert = 0;
3145 code = PLUS;
3147 else if (code == MINUS)
3148 code = PLUS;
3150 insns++;
3151 i -= 8 - step_size;
3153 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3154 shifts. */
3155 i -= step_size;
3157 while (remainder);
3160 if (final_invert)
3162 if (generate)
3163 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3164 gen_rtx_NOT (mode, source)));
3165 insns++;
3168 return insns;
3171 /* Canonicalize a comparison so that we are more likely to recognize it.
3172 This can be done for a few constant compares, where we can make the
3173 immediate value easier to load. */
3175 enum rtx_code
3176 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3177 rtx * op1)
3179 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3180 unsigned HOST_WIDE_INT maxval;
3181 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3183 switch (code)
3185 case EQ:
3186 case NE:
3187 return code;
3189 case GT:
3190 case LE:
3191 if (i != maxval
3192 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3194 *op1 = GEN_INT (i + 1);
3195 return code == GT ? GE : LT;
3197 break;
3199 case GE:
3200 case LT:
3201 if (i != ~maxval
3202 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3204 *op1 = GEN_INT (i - 1);
3205 return code == GE ? GT : LE;
3207 break;
3209 case GTU:
3210 case LEU:
3211 if (i != ~((unsigned HOST_WIDE_INT) 0)
3212 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3214 *op1 = GEN_INT (i + 1);
3215 return code == GTU ? GEU : LTU;
3217 break;
3219 case GEU:
3220 case LTU:
3221 if (i != 0
3222 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3224 *op1 = GEN_INT (i - 1);
3225 return code == GEU ? GTU : LEU;
3227 break;
3229 default:
3230 gcc_unreachable ();
3233 return code;
3237 /* Define how to find the value returned by a function. */
3239 static rtx
3240 arm_function_value(const_tree type, const_tree func,
3241 bool outgoing ATTRIBUTE_UNUSED)
3243 enum machine_mode mode;
3244 int unsignedp ATTRIBUTE_UNUSED;
3245 rtx r ATTRIBUTE_UNUSED;
3247 mode = TYPE_MODE (type);
3249 if (TARGET_AAPCS_BASED)
3250 return aapcs_allocate_return_reg (mode, type, func);
3252 /* Promote integer types. */
3253 if (INTEGRAL_TYPE_P (type))
3254 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3256 /* Promotes small structs returned in a register to full-word size
3257 for big-endian AAPCS. */
3258 if (arm_return_in_msb (type))
3260 HOST_WIDE_INT size = int_size_in_bytes (type);
3261 if (size % UNITS_PER_WORD != 0)
3263 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3264 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3268 return LIBCALL_VALUE (mode);
3271 static int
3272 libcall_eq (const void *p1, const void *p2)
3274 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3277 static hashval_t
3278 libcall_hash (const void *p1)
3280 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3283 static void
3284 add_libcall (htab_t htab, rtx libcall)
3286 *htab_find_slot (htab, libcall, INSERT) = libcall;
3289 static bool
3290 arm_libcall_uses_aapcs_base (const_rtx libcall)
3292 static bool init_done = false;
3293 static htab_t libcall_htab;
3295 if (!init_done)
3297 init_done = true;
3299 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3300 NULL);
3301 add_libcall (libcall_htab,
3302 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3303 add_libcall (libcall_htab,
3304 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3305 add_libcall (libcall_htab,
3306 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3307 add_libcall (libcall_htab,
3308 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3310 add_libcall (libcall_htab,
3311 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3312 add_libcall (libcall_htab,
3313 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3314 add_libcall (libcall_htab,
3315 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3316 add_libcall (libcall_htab,
3317 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3319 add_libcall (libcall_htab,
3320 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3321 add_libcall (libcall_htab,
3322 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3323 add_libcall (libcall_htab,
3324 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3325 add_libcall (libcall_htab,
3326 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3327 add_libcall (libcall_htab,
3328 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3329 add_libcall (libcall_htab,
3330 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3333 return libcall && htab_find (libcall_htab, libcall) != NULL;
3337 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3339 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3340 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3342 /* The following libcalls return their result in integer registers,
3343 even though they return a floating point value. */
3344 if (arm_libcall_uses_aapcs_base (libcall))
3345 return gen_rtx_REG (mode, ARG_REGISTER(1));
3349 return LIBCALL_VALUE (mode);
3352 /* Determine the amount of memory needed to store the possible return
3353 registers of an untyped call. */
3355 arm_apply_result_size (void)
3357 int size = 16;
3359 if (TARGET_32BIT)
3361 if (TARGET_HARD_FLOAT_ABI)
3363 if (TARGET_VFP)
3364 size += 32;
3365 if (TARGET_FPA)
3366 size += 12;
3367 if (TARGET_MAVERICK)
3368 size += 8;
3370 if (TARGET_IWMMXT_ABI)
3371 size += 8;
3374 return size;
3377 /* Decide whether TYPE should be returned in memory (true)
3378 or in a register (false). FNTYPE is the type of the function making
3379 the call. */
3380 static bool
3381 arm_return_in_memory (const_tree type, const_tree fntype)
3383 HOST_WIDE_INT size;
3385 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3387 if (TARGET_AAPCS_BASED)
3389 /* Simple, non-aggregate types (ie not including vectors and
3390 complex) are always returned in a register (or registers).
3391 We don't care about which register here, so we can short-cut
3392 some of the detail. */
3393 if (!AGGREGATE_TYPE_P (type)
3394 && TREE_CODE (type) != VECTOR_TYPE
3395 && TREE_CODE (type) != COMPLEX_TYPE)
3396 return false;
3398 /* Any return value that is no larger than one word can be
3399 returned in r0. */
3400 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3401 return false;
3403 /* Check any available co-processors to see if they accept the
3404 type as a register candidate (VFP, for example, can return
3405 some aggregates in consecutive registers). These aren't
3406 available if the call is variadic. */
3407 if (aapcs_select_return_coproc (type, fntype) >= 0)
3408 return false;
3410 /* Vector values should be returned using ARM registers, not
3411 memory (unless they're over 16 bytes, which will break since
3412 we only have four call-clobbered registers to play with). */
3413 if (TREE_CODE (type) == VECTOR_TYPE)
3414 return (size < 0 || size > (4 * UNITS_PER_WORD));
3416 /* The rest go in memory. */
3417 return true;
3420 if (TREE_CODE (type) == VECTOR_TYPE)
3421 return (size < 0 || size > (4 * UNITS_PER_WORD));
3423 if (!AGGREGATE_TYPE_P (type) &&
3424 (TREE_CODE (type) != VECTOR_TYPE))
3425 /* All simple types are returned in registers. */
3426 return false;
3428 if (arm_abi != ARM_ABI_APCS)
3430 /* ATPCS and later return aggregate types in memory only if they are
3431 larger than a word (or are variable size). */
3432 return (size < 0 || size > UNITS_PER_WORD);
3435 /* For the arm-wince targets we choose to be compatible with Microsoft's
3436 ARM and Thumb compilers, which always return aggregates in memory. */
3437 #ifndef ARM_WINCE
3438 /* All structures/unions bigger than one word are returned in memory.
3439 Also catch the case where int_size_in_bytes returns -1. In this case
3440 the aggregate is either huge or of variable size, and in either case
3441 we will want to return it via memory and not in a register. */
3442 if (size < 0 || size > UNITS_PER_WORD)
3443 return true;
3445 if (TREE_CODE (type) == RECORD_TYPE)
3447 tree field;
3449 /* For a struct the APCS says that we only return in a register
3450 if the type is 'integer like' and every addressable element
3451 has an offset of zero. For practical purposes this means
3452 that the structure can have at most one non bit-field element
3453 and that this element must be the first one in the structure. */
3455 /* Find the first field, ignoring non FIELD_DECL things which will
3456 have been created by C++. */
3457 for (field = TYPE_FIELDS (type);
3458 field && TREE_CODE (field) != FIELD_DECL;
3459 field = TREE_CHAIN (field))
3460 continue;
3462 if (field == NULL)
3463 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3465 /* Check that the first field is valid for returning in a register. */
3467 /* ... Floats are not allowed */
3468 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3469 return true;
3471 /* ... Aggregates that are not themselves valid for returning in
3472 a register are not allowed. */
3473 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3474 return true;
3476 /* Now check the remaining fields, if any. Only bitfields are allowed,
3477 since they are not addressable. */
3478 for (field = TREE_CHAIN (field);
3479 field;
3480 field = TREE_CHAIN (field))
3482 if (TREE_CODE (field) != FIELD_DECL)
3483 continue;
3485 if (!DECL_BIT_FIELD_TYPE (field))
3486 return true;
3489 return false;
3492 if (TREE_CODE (type) == UNION_TYPE)
3494 tree field;
3496 /* Unions can be returned in registers if every element is
3497 integral, or can be returned in an integer register. */
3498 for (field = TYPE_FIELDS (type);
3499 field;
3500 field = TREE_CHAIN (field))
3502 if (TREE_CODE (field) != FIELD_DECL)
3503 continue;
3505 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3506 return true;
3508 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3509 return true;
3512 return false;
3514 #endif /* not ARM_WINCE */
3516 /* Return all other types in memory. */
3517 return true;
3520 /* Indicate whether or not words of a double are in big-endian order. */
3523 arm_float_words_big_endian (void)
3525 if (TARGET_MAVERICK)
3526 return 0;
3528 /* For FPA, float words are always big-endian. For VFP, floats words
3529 follow the memory system mode. */
3531 if (TARGET_FPA)
3533 return 1;
3536 if (TARGET_VFP)
3537 return (TARGET_BIG_END ? 1 : 0);
3539 return 1;
3542 const struct pcs_attribute_arg
3544 const char *arg;
3545 enum arm_pcs value;
3546 } pcs_attribute_args[] =
3548 {"aapcs", ARM_PCS_AAPCS},
3549 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3550 #if 0
3551 /* We could recognize these, but changes would be needed elsewhere
3552 * to implement them. */
3553 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3554 {"atpcs", ARM_PCS_ATPCS},
3555 {"apcs", ARM_PCS_APCS},
3556 #endif
3557 {NULL, ARM_PCS_UNKNOWN}
3560 static enum arm_pcs
3561 arm_pcs_from_attribute (tree attr)
3563 const struct pcs_attribute_arg *ptr;
3564 const char *arg;
3566 /* Get the value of the argument. */
3567 if (TREE_VALUE (attr) == NULL_TREE
3568 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3569 return ARM_PCS_UNKNOWN;
3571 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3573 /* Check it against the list of known arguments. */
3574 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3575 if (streq (arg, ptr->arg))
3576 return ptr->value;
3578 /* An unrecognized interrupt type. */
3579 return ARM_PCS_UNKNOWN;
3582 /* Get the PCS variant to use for this call. TYPE is the function's type
3583 specification, DECL is the specific declartion. DECL may be null if
3584 the call could be indirect or if this is a library call. */
3585 static enum arm_pcs
3586 arm_get_pcs_model (const_tree type, const_tree decl)
3588 bool user_convention = false;
3589 enum arm_pcs user_pcs = arm_pcs_default;
3590 tree attr;
3592 gcc_assert (type);
3594 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3595 if (attr)
3597 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3598 user_convention = true;
3601 if (TARGET_AAPCS_BASED)
3603 /* Detect varargs functions. These always use the base rules
3604 (no argument is ever a candidate for a co-processor
3605 register). */
3606 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3607 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3608 != void_type_node));
3610 if (user_convention)
3612 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3613 sorry ("Non-AAPCS derived PCS variant");
3614 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3615 error ("Variadic functions must use the base AAPCS variant");
3618 if (base_rules)
3619 return ARM_PCS_AAPCS;
3620 else if (user_convention)
3621 return user_pcs;
3622 else if (decl && flag_unit_at_a_time)
3624 /* Local functions never leak outside this compilation unit,
3625 so we are free to use whatever conventions are
3626 appropriate. */
3627 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3628 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3629 if (i && i->local)
3630 return ARM_PCS_AAPCS_LOCAL;
3633 else if (user_convention && user_pcs != arm_pcs_default)
3634 sorry ("PCS variant");
3636 /* For everything else we use the target's default. */
3637 return arm_pcs_default;
3641 static void
3642 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3643 const_tree fntype ATTRIBUTE_UNUSED,
3644 rtx libcall ATTRIBUTE_UNUSED,
3645 const_tree fndecl ATTRIBUTE_UNUSED)
3647 /* Record the unallocated VFP registers. */
3648 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3649 pcum->aapcs_vfp_reg_alloc = 0;
3652 /* Walk down the type tree of TYPE counting consecutive base elements.
3653 If *MODEP is VOIDmode, then set it to the first valid floating point
3654 type. If a non-floating point type is found, or if a floating point
3655 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3656 otherwise return the count in the sub-tree. */
3657 static int
3658 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3660 enum machine_mode mode;
3661 HOST_WIDE_INT size;
3663 switch (TREE_CODE (type))
3665 case REAL_TYPE:
3666 mode = TYPE_MODE (type);
3667 if (mode != DFmode && mode != SFmode)
3668 return -1;
3670 if (*modep == VOIDmode)
3671 *modep = mode;
3673 if (*modep == mode)
3674 return 1;
3676 break;
3678 case COMPLEX_TYPE:
3679 mode = TYPE_MODE (TREE_TYPE (type));
3680 if (mode != DFmode && mode != SFmode)
3681 return -1;
3683 if (*modep == VOIDmode)
3684 *modep = mode;
3686 if (*modep == mode)
3687 return 2;
3689 break;
3691 case VECTOR_TYPE:
3692 /* Use V2SImode and V4SImode as representatives of all 64-bit
3693 and 128-bit vector types, whether or not those modes are
3694 supported with the present options. */
3695 size = int_size_in_bytes (type);
3696 switch (size)
3698 case 8:
3699 mode = V2SImode;
3700 break;
3701 case 16:
3702 mode = V4SImode;
3703 break;
3704 default:
3705 return -1;
3708 if (*modep == VOIDmode)
3709 *modep = mode;
3711 /* Vector modes are considered to be opaque: two vectors are
3712 equivalent for the purposes of being homogeneous aggregates
3713 if they are the same size. */
3714 if (*modep == mode)
3715 return 1;
3717 break;
3719 case ARRAY_TYPE:
3721 int count;
3722 tree index = TYPE_DOMAIN (type);
3724 /* Can't handle incomplete types. */
3725 if (!COMPLETE_TYPE_P(type))
3726 return -1;
3728 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3729 if (count == -1
3730 || !index
3731 || !TYPE_MAX_VALUE (index)
3732 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3733 || !TYPE_MIN_VALUE (index)
3734 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3735 || count < 0)
3736 return -1;
3738 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3739 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3741 /* There must be no padding. */
3742 if (!host_integerp (TYPE_SIZE (type), 1)
3743 || (tree_low_cst (TYPE_SIZE (type), 1)
3744 != count * GET_MODE_BITSIZE (*modep)))
3745 return -1;
3747 return count;
3750 case RECORD_TYPE:
3752 int count = 0;
3753 int sub_count;
3754 tree field;
3756 /* Can't handle incomplete types. */
3757 if (!COMPLETE_TYPE_P(type))
3758 return -1;
3760 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3762 if (TREE_CODE (field) != FIELD_DECL)
3763 continue;
3765 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3766 if (sub_count < 0)
3767 return -1;
3768 count += sub_count;
3771 /* There must be no padding. */
3772 if (!host_integerp (TYPE_SIZE (type), 1)
3773 || (tree_low_cst (TYPE_SIZE (type), 1)
3774 != count * GET_MODE_BITSIZE (*modep)))
3775 return -1;
3777 return count;
3780 case UNION_TYPE:
3781 case QUAL_UNION_TYPE:
3783 /* These aren't very interesting except in a degenerate case. */
3784 int count = 0;
3785 int sub_count;
3786 tree field;
3788 /* Can't handle incomplete types. */
3789 if (!COMPLETE_TYPE_P(type))
3790 return -1;
3792 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3794 if (TREE_CODE (field) != FIELD_DECL)
3795 continue;
3797 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3798 if (sub_count < 0)
3799 return -1;
3800 count = count > sub_count ? count : sub_count;
3803 /* There must be no padding. */
3804 if (!host_integerp (TYPE_SIZE (type), 1)
3805 || (tree_low_cst (TYPE_SIZE (type), 1)
3806 != count * GET_MODE_BITSIZE (*modep)))
3807 return -1;
3809 return count;
3812 default:
3813 break;
3816 return -1;
3819 /* Return true if PCS_VARIANT should use VFP registers. */
3820 static bool
3821 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3823 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3824 return true;
3826 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3827 return false;
3829 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3830 (TARGET_VFP_DOUBLE || !is_double));
3833 static bool
3834 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3835 enum machine_mode mode, const_tree type,
3836 enum machine_mode *base_mode, int *count)
3838 enum machine_mode new_mode = VOIDmode;
3840 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3841 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3842 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3844 *count = 1;
3845 new_mode = mode;
3847 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3849 *count = 2;
3850 new_mode = (mode == DCmode ? DFmode : SFmode);
3852 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3854 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3856 if (ag_count > 0 && ag_count <= 4)
3857 *count = ag_count;
3858 else
3859 return false;
3861 else
3862 return false;
3865 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3866 return false;
3868 *base_mode = new_mode;
3869 return true;
3872 static bool
3873 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3874 enum machine_mode mode, const_tree type)
3876 int count ATTRIBUTE_UNUSED;
3877 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3879 if (!use_vfp_abi (pcs_variant, false))
3880 return false;
3881 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3882 &ag_mode, &count);
3885 static bool
3886 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3887 const_tree type)
3889 if (!use_vfp_abi (pcum->pcs_variant, false))
3890 return false;
3892 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3893 &pcum->aapcs_vfp_rmode,
3894 &pcum->aapcs_vfp_rcount);
3897 static bool
3898 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3899 const_tree type ATTRIBUTE_UNUSED)
3901 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3902 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3903 int regno;
3905 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3906 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3908 pcum->aapcs_vfp_reg_alloc = mask << regno;
3909 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3911 int i;
3912 int rcount = pcum->aapcs_vfp_rcount;
3913 int rshift = shift;
3914 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3915 rtx par;
3916 if (!TARGET_NEON)
3918 /* Avoid using unsupported vector modes. */
3919 if (rmode == V2SImode)
3920 rmode = DImode;
3921 else if (rmode == V4SImode)
3923 rmode = DImode;
3924 rcount *= 2;
3925 rshift /= 2;
3928 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3929 for (i = 0; i < rcount; i++)
3931 rtx tmp = gen_rtx_REG (rmode,
3932 FIRST_VFP_REGNUM + regno + i * rshift);
3933 tmp = gen_rtx_EXPR_LIST
3934 (VOIDmode, tmp,
3935 GEN_INT (i * GET_MODE_SIZE (rmode)));
3936 XVECEXP (par, 0, i) = tmp;
3939 pcum->aapcs_reg = par;
3941 else
3942 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3943 return true;
3945 return false;
3948 static rtx
3949 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3950 enum machine_mode mode,
3951 const_tree type ATTRIBUTE_UNUSED)
3953 if (!use_vfp_abi (pcs_variant, false))
3954 return false;
3956 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3958 int count;
3959 enum machine_mode ag_mode;
3960 int i;
3961 rtx par;
3962 int shift;
3964 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3965 &ag_mode, &count);
3967 if (!TARGET_NEON)
3969 if (ag_mode == V2SImode)
3970 ag_mode = DImode;
3971 else if (ag_mode == V4SImode)
3973 ag_mode = DImode;
3974 count *= 2;
3977 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3978 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3979 for (i = 0; i < count; i++)
3981 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3982 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3983 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
3984 XVECEXP (par, 0, i) = tmp;
3987 return par;
3990 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
3993 static void
3994 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3995 enum machine_mode mode ATTRIBUTE_UNUSED,
3996 const_tree type ATTRIBUTE_UNUSED)
3998 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
3999 pcum->aapcs_vfp_reg_alloc = 0;
4000 return;
4003 #define AAPCS_CP(X) \
4005 aapcs_ ## X ## _cum_init, \
4006 aapcs_ ## X ## _is_call_candidate, \
4007 aapcs_ ## X ## _allocate, \
4008 aapcs_ ## X ## _is_return_candidate, \
4009 aapcs_ ## X ## _allocate_return_reg, \
4010 aapcs_ ## X ## _advance \
4013 /* Table of co-processors that can be used to pass arguments in
4014 registers. Idealy no arugment should be a candidate for more than
4015 one co-processor table entry, but the table is processed in order
4016 and stops after the first match. If that entry then fails to put
4017 the argument into a co-processor register, the argument will go on
4018 the stack. */
4019 static struct
4021 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4022 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4024 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4025 BLKmode) is a candidate for this co-processor's registers; this
4026 function should ignore any position-dependent state in
4027 CUMULATIVE_ARGS and only use call-type dependent information. */
4028 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4030 /* Return true if the argument does get a co-processor register; it
4031 should set aapcs_reg to an RTX of the register allocated as is
4032 required for a return from FUNCTION_ARG. */
4033 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4035 /* Return true if a result of mode MODE (or type TYPE if MODE is
4036 BLKmode) is can be returned in this co-processor's registers. */
4037 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4039 /* Allocate and return an RTX element to hold the return type of a
4040 call, this routine must not fail and will only be called if
4041 is_return_candidate returned true with the same parameters. */
4042 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4044 /* Finish processing this argument and prepare to start processing
4045 the next one. */
4046 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4047 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4049 AAPCS_CP(vfp)
4052 #undef AAPCS_CP
4054 static int
4055 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4056 tree type)
4058 int i;
4060 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4061 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4062 return i;
4064 return -1;
4067 static int
4068 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4070 /* We aren't passed a decl, so we can't check that a call is local.
4071 However, it isn't clear that that would be a win anyway, since it
4072 might limit some tail-calling opportunities. */
4073 enum arm_pcs pcs_variant;
4075 if (fntype)
4077 const_tree fndecl = NULL_TREE;
4079 if (TREE_CODE (fntype) == FUNCTION_DECL)
4081 fndecl = fntype;
4082 fntype = TREE_TYPE (fntype);
4085 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4087 else
4088 pcs_variant = arm_pcs_default;
4090 if (pcs_variant != ARM_PCS_AAPCS)
4092 int i;
4094 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4095 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4096 TYPE_MODE (type),
4097 type))
4098 return i;
4100 return -1;
4103 static rtx
4104 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4105 const_tree fntype)
4107 /* We aren't passed a decl, so we can't check that a call is local.
4108 However, it isn't clear that that would be a win anyway, since it
4109 might limit some tail-calling opportunities. */
4110 enum arm_pcs pcs_variant;
4111 int unsignedp ATTRIBUTE_UNUSED;
4113 if (fntype)
4115 const_tree fndecl = NULL_TREE;
4117 if (TREE_CODE (fntype) == FUNCTION_DECL)
4119 fndecl = fntype;
4120 fntype = TREE_TYPE (fntype);
4123 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4125 else
4126 pcs_variant = arm_pcs_default;
4128 /* Promote integer types. */
4129 if (type && INTEGRAL_TYPE_P (type))
4130 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4132 if (pcs_variant != ARM_PCS_AAPCS)
4134 int i;
4136 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4137 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4138 type))
4139 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4140 mode, type);
4143 /* Promotes small structs returned in a register to full-word size
4144 for big-endian AAPCS. */
4145 if (type && arm_return_in_msb (type))
4147 HOST_WIDE_INT size = int_size_in_bytes (type);
4148 if (size % UNITS_PER_WORD != 0)
4150 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4151 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4155 return gen_rtx_REG (mode, R0_REGNUM);
4159 aapcs_libcall_value (enum machine_mode mode)
4161 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4164 /* Lay out a function argument using the AAPCS rules. The rule
4165 numbers referred to here are those in the AAPCS. */
4166 static void
4167 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4168 tree type, int named)
4170 int nregs, nregs2;
4171 int ncrn;
4173 /* We only need to do this once per argument. */
4174 if (pcum->aapcs_arg_processed)
4175 return;
4177 pcum->aapcs_arg_processed = true;
4179 /* Special case: if named is false then we are handling an incoming
4180 anonymous argument which is on the stack. */
4181 if (!named)
4182 return;
4184 /* Is this a potential co-processor register candidate? */
4185 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4187 int slot = aapcs_select_call_coproc (pcum, mode, type);
4188 pcum->aapcs_cprc_slot = slot;
4190 /* We don't have to apply any of the rules from part B of the
4191 preparation phase, these are handled elsewhere in the
4192 compiler. */
4194 if (slot >= 0)
4196 /* A Co-processor register candidate goes either in its own
4197 class of registers or on the stack. */
4198 if (!pcum->aapcs_cprc_failed[slot])
4200 /* C1.cp - Try to allocate the argument to co-processor
4201 registers. */
4202 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4203 return;
4205 /* C2.cp - Put the argument on the stack and note that we
4206 can't assign any more candidates in this slot. We also
4207 need to note that we have allocated stack space, so that
4208 we won't later try to split a non-cprc candidate between
4209 core registers and the stack. */
4210 pcum->aapcs_cprc_failed[slot] = true;
4211 pcum->can_split = false;
4214 /* We didn't get a register, so this argument goes on the
4215 stack. */
4216 gcc_assert (pcum->can_split == false);
4217 return;
4221 /* C3 - For double-word aligned arguments, round the NCRN up to the
4222 next even number. */
4223 ncrn = pcum->aapcs_ncrn;
4224 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4225 ncrn++;
4227 nregs = ARM_NUM_REGS2(mode, type);
4229 /* Sigh, this test should really assert that nregs > 0, but a GCC
4230 extension allows empty structs and then gives them empty size; it
4231 then allows such a structure to be passed by value. For some of
4232 the code below we have to pretend that such an argument has
4233 non-zero size so that we 'locate' it correctly either in
4234 registers or on the stack. */
4235 gcc_assert (nregs >= 0);
4237 nregs2 = nregs ? nregs : 1;
4239 /* C4 - Argument fits entirely in core registers. */
4240 if (ncrn + nregs2 <= NUM_ARG_REGS)
4242 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4243 pcum->aapcs_next_ncrn = ncrn + nregs;
4244 return;
4247 /* C5 - Some core registers left and there are no arguments already
4248 on the stack: split this argument between the remaining core
4249 registers and the stack. */
4250 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4252 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4253 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4254 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4255 return;
4258 /* C6 - NCRN is set to 4. */
4259 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4261 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4262 return;
4265 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4266 for a call to a function whose data type is FNTYPE.
4267 For a library call, FNTYPE is NULL. */
4268 void
4269 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4270 rtx libname,
4271 tree fndecl ATTRIBUTE_UNUSED)
4273 /* Long call handling. */
4274 if (fntype)
4275 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4276 else
4277 pcum->pcs_variant = arm_pcs_default;
4279 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4281 if (arm_libcall_uses_aapcs_base (libname))
4282 pcum->pcs_variant = ARM_PCS_AAPCS;
4284 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4285 pcum->aapcs_reg = NULL_RTX;
4286 pcum->aapcs_partial = 0;
4287 pcum->aapcs_arg_processed = false;
4288 pcum->aapcs_cprc_slot = -1;
4289 pcum->can_split = true;
4291 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4293 int i;
4295 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4297 pcum->aapcs_cprc_failed[i] = false;
4298 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4301 return;
4304 /* Legacy ABIs */
4306 /* On the ARM, the offset starts at 0. */
4307 pcum->nregs = 0;
4308 pcum->iwmmxt_nregs = 0;
4309 pcum->can_split = true;
4311 /* Varargs vectors are treated the same as long long.
4312 named_count avoids having to change the way arm handles 'named' */
4313 pcum->named_count = 0;
4314 pcum->nargs = 0;
4316 if (TARGET_REALLY_IWMMXT && fntype)
4318 tree fn_arg;
4320 for (fn_arg = TYPE_ARG_TYPES (fntype);
4321 fn_arg;
4322 fn_arg = TREE_CHAIN (fn_arg))
4323 pcum->named_count += 1;
4325 if (! pcum->named_count)
4326 pcum->named_count = INT_MAX;
4331 /* Return true if mode/type need doubleword alignment. */
4332 bool
4333 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4335 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4336 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4340 /* Determine where to put an argument to a function.
4341 Value is zero to push the argument on the stack,
4342 or a hard register in which to store the argument.
4344 MODE is the argument's machine mode.
4345 TYPE is the data type of the argument (as a tree).
4346 This is null for libcalls where that information may
4347 not be available.
4348 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4349 the preceding args and about the function being called.
4350 NAMED is nonzero if this argument is a named parameter
4351 (otherwise it is an extra parameter matching an ellipsis). */
4354 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4355 tree type, int named)
4357 int nregs;
4359 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4360 a call insn (op3 of a call_value insn). */
4361 if (mode == VOIDmode)
4362 return const0_rtx;
4364 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4366 aapcs_layout_arg (pcum, mode, type, named);
4367 return pcum->aapcs_reg;
4370 /* Varargs vectors are treated the same as long long.
4371 named_count avoids having to change the way arm handles 'named' */
4372 if (TARGET_IWMMXT_ABI
4373 && arm_vector_mode_supported_p (mode)
4374 && pcum->named_count > pcum->nargs + 1)
4376 if (pcum->iwmmxt_nregs <= 9)
4377 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4378 else
4380 pcum->can_split = false;
4381 return NULL_RTX;
4385 /* Put doubleword aligned quantities in even register pairs. */
4386 if (pcum->nregs & 1
4387 && ARM_DOUBLEWORD_ALIGN
4388 && arm_needs_doubleword_align (mode, type))
4389 pcum->nregs++;
4391 if (mode == VOIDmode)
4392 /* Pick an arbitrary value for operand 2 of the call insn. */
4393 return const0_rtx;
4395 /* Only allow splitting an arg between regs and memory if all preceding
4396 args were allocated to regs. For args passed by reference we only count
4397 the reference pointer. */
4398 if (pcum->can_split)
4399 nregs = 1;
4400 else
4401 nregs = ARM_NUM_REGS2 (mode, type);
4403 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4404 return NULL_RTX;
4406 return gen_rtx_REG (mode, pcum->nregs);
4409 static int
4410 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4411 tree type, bool named)
4413 int nregs = pcum->nregs;
4415 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4417 aapcs_layout_arg (pcum, mode, type, named);
4418 return pcum->aapcs_partial;
4421 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4422 return 0;
4424 if (NUM_ARG_REGS > nregs
4425 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4426 && pcum->can_split)
4427 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4429 return 0;
4432 void
4433 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4434 tree type, bool named)
4436 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4438 aapcs_layout_arg (pcum, mode, type, named);
4440 if (pcum->aapcs_cprc_slot >= 0)
4442 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4443 type);
4444 pcum->aapcs_cprc_slot = -1;
4447 /* Generic stuff. */
4448 pcum->aapcs_arg_processed = false;
4449 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4450 pcum->aapcs_reg = NULL_RTX;
4451 pcum->aapcs_partial = 0;
4453 else
4455 pcum->nargs += 1;
4456 if (arm_vector_mode_supported_p (mode)
4457 && pcum->named_count > pcum->nargs
4458 && TARGET_IWMMXT_ABI)
4459 pcum->iwmmxt_nregs += 1;
4460 else
4461 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4465 /* Variable sized types are passed by reference. This is a GCC
4466 extension to the ARM ABI. */
4468 static bool
4469 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4470 enum machine_mode mode ATTRIBUTE_UNUSED,
4471 const_tree type, bool named ATTRIBUTE_UNUSED)
4473 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4476 /* Encode the current state of the #pragma [no_]long_calls. */
4477 typedef enum
4479 OFF, /* No #pragma [no_]long_calls is in effect. */
4480 LONG, /* #pragma long_calls is in effect. */
4481 SHORT /* #pragma no_long_calls is in effect. */
4482 } arm_pragma_enum;
4484 static arm_pragma_enum arm_pragma_long_calls = OFF;
4486 void
4487 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4489 arm_pragma_long_calls = LONG;
4492 void
4493 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4495 arm_pragma_long_calls = SHORT;
4498 void
4499 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4501 arm_pragma_long_calls = OFF;
4504 /* Handle an attribute requiring a FUNCTION_DECL;
4505 arguments as in struct attribute_spec.handler. */
4506 static tree
4507 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4508 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4510 if (TREE_CODE (*node) != FUNCTION_DECL)
4512 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4513 name);
4514 *no_add_attrs = true;
4517 return NULL_TREE;
4520 /* Handle an "interrupt" or "isr" attribute;
4521 arguments as in struct attribute_spec.handler. */
4522 static tree
4523 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4524 bool *no_add_attrs)
4526 if (DECL_P (*node))
4528 if (TREE_CODE (*node) != FUNCTION_DECL)
4530 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4531 name);
4532 *no_add_attrs = true;
4534 /* FIXME: the argument if any is checked for type attributes;
4535 should it be checked for decl ones? */
4537 else
4539 if (TREE_CODE (*node) == FUNCTION_TYPE
4540 || TREE_CODE (*node) == METHOD_TYPE)
4542 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4544 warning (OPT_Wattributes, "%qE attribute ignored",
4545 name);
4546 *no_add_attrs = true;
4549 else if (TREE_CODE (*node) == POINTER_TYPE
4550 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4551 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4552 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4554 *node = build_variant_type_copy (*node);
4555 TREE_TYPE (*node) = build_type_attribute_variant
4556 (TREE_TYPE (*node),
4557 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4558 *no_add_attrs = true;
4560 else
4562 /* Possibly pass this attribute on from the type to a decl. */
4563 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4564 | (int) ATTR_FLAG_FUNCTION_NEXT
4565 | (int) ATTR_FLAG_ARRAY_NEXT))
4567 *no_add_attrs = true;
4568 return tree_cons (name, args, NULL_TREE);
4570 else
4572 warning (OPT_Wattributes, "%qE attribute ignored",
4573 name);
4578 return NULL_TREE;
4581 /* Handle a "pcs" attribute; arguments as in struct
4582 attribute_spec.handler. */
4583 static tree
4584 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4585 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4587 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4589 warning (OPT_Wattributes, "%qE attribute ignored", name);
4590 *no_add_attrs = true;
4592 return NULL_TREE;
4595 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4596 /* Handle the "notshared" attribute. This attribute is another way of
4597 requesting hidden visibility. ARM's compiler supports
4598 "__declspec(notshared)"; we support the same thing via an
4599 attribute. */
4601 static tree
4602 arm_handle_notshared_attribute (tree *node,
4603 tree name ATTRIBUTE_UNUSED,
4604 tree args ATTRIBUTE_UNUSED,
4605 int flags ATTRIBUTE_UNUSED,
4606 bool *no_add_attrs)
4608 tree decl = TYPE_NAME (*node);
4610 if (decl)
4612 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4613 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4614 *no_add_attrs = false;
4616 return NULL_TREE;
4618 #endif
4620 /* Return 0 if the attributes for two types are incompatible, 1 if they
4621 are compatible, and 2 if they are nearly compatible (which causes a
4622 warning to be generated). */
4623 static int
4624 arm_comp_type_attributes (const_tree type1, const_tree type2)
4626 int l1, l2, s1, s2;
4628 /* Check for mismatch of non-default calling convention. */
4629 if (TREE_CODE (type1) != FUNCTION_TYPE)
4630 return 1;
4632 /* Check for mismatched call attributes. */
4633 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4634 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4635 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4636 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4638 /* Only bother to check if an attribute is defined. */
4639 if (l1 | l2 | s1 | s2)
4641 /* If one type has an attribute, the other must have the same attribute. */
4642 if ((l1 != l2) || (s1 != s2))
4643 return 0;
4645 /* Disallow mixed attributes. */
4646 if ((l1 & s2) || (l2 & s1))
4647 return 0;
4650 /* Check for mismatched ISR attribute. */
4651 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4652 if (! l1)
4653 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4654 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4655 if (! l2)
4656 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4657 if (l1 != l2)
4658 return 0;
4660 return 1;
4663 /* Assigns default attributes to newly defined type. This is used to
4664 set short_call/long_call attributes for function types of
4665 functions defined inside corresponding #pragma scopes. */
4666 static void
4667 arm_set_default_type_attributes (tree type)
4669 /* Add __attribute__ ((long_call)) to all functions, when
4670 inside #pragma long_calls or __attribute__ ((short_call)),
4671 when inside #pragma no_long_calls. */
4672 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4674 tree type_attr_list, attr_name;
4675 type_attr_list = TYPE_ATTRIBUTES (type);
4677 if (arm_pragma_long_calls == LONG)
4678 attr_name = get_identifier ("long_call");
4679 else if (arm_pragma_long_calls == SHORT)
4680 attr_name = get_identifier ("short_call");
4681 else
4682 return;
4684 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4685 TYPE_ATTRIBUTES (type) = type_attr_list;
4689 /* Return true if DECL is known to be linked into section SECTION. */
4691 static bool
4692 arm_function_in_section_p (tree decl, section *section)
4694 /* We can only be certain about functions defined in the same
4695 compilation unit. */
4696 if (!TREE_STATIC (decl))
4697 return false;
4699 /* Make sure that SYMBOL always binds to the definition in this
4700 compilation unit. */
4701 if (!targetm.binds_local_p (decl))
4702 return false;
4704 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4705 if (!DECL_SECTION_NAME (decl))
4707 /* Make sure that we will not create a unique section for DECL. */
4708 if (flag_function_sections || DECL_ONE_ONLY (decl))
4709 return false;
4712 return function_section (decl) == section;
4715 /* Return nonzero if a 32-bit "long_call" should be generated for
4716 a call from the current function to DECL. We generate a long_call
4717 if the function:
4719 a. has an __attribute__((long call))
4720 or b. is within the scope of a #pragma long_calls
4721 or c. the -mlong-calls command line switch has been specified
4723 However we do not generate a long call if the function:
4725 d. has an __attribute__ ((short_call))
4726 or e. is inside the scope of a #pragma no_long_calls
4727 or f. is defined in the same section as the current function. */
4729 bool
4730 arm_is_long_call_p (tree decl)
4732 tree attrs;
4734 if (!decl)
4735 return TARGET_LONG_CALLS;
4737 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4738 if (lookup_attribute ("short_call", attrs))
4739 return false;
4741 /* For "f", be conservative, and only cater for cases in which the
4742 whole of the current function is placed in the same section. */
4743 if (!flag_reorder_blocks_and_partition
4744 && TREE_CODE (decl) == FUNCTION_DECL
4745 && arm_function_in_section_p (decl, current_function_section ()))
4746 return false;
4748 if (lookup_attribute ("long_call", attrs))
4749 return true;
4751 return TARGET_LONG_CALLS;
4754 /* Return nonzero if it is ok to make a tail-call to DECL. */
4755 static bool
4756 arm_function_ok_for_sibcall (tree decl, tree exp)
4758 unsigned long func_type;
4760 if (cfun->machine->sibcall_blocked)
4761 return false;
4763 /* Never tailcall something for which we have no decl, or if we
4764 are generating code for Thumb-1. */
4765 if (decl == NULL || TARGET_THUMB1)
4766 return false;
4768 /* The PIC register is live on entry to VxWorks PLT entries, so we
4769 must make the call before restoring the PIC register. */
4770 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4771 return false;
4773 /* Cannot tail-call to long calls, since these are out of range of
4774 a branch instruction. */
4775 if (arm_is_long_call_p (decl))
4776 return false;
4778 /* If we are interworking and the function is not declared static
4779 then we can't tail-call it unless we know that it exists in this
4780 compilation unit (since it might be a Thumb routine). */
4781 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4782 return false;
4784 func_type = arm_current_func_type ();
4785 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4786 if (IS_INTERRUPT (func_type))
4787 return false;
4789 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4791 /* Check that the return value locations are the same. For
4792 example that we aren't returning a value from the sibling in
4793 a VFP register but then need to transfer it to a core
4794 register. */
4795 rtx a, b;
4797 a = arm_function_value (TREE_TYPE (exp), decl, false);
4798 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4799 cfun->decl, false);
4800 if (!rtx_equal_p (a, b))
4801 return false;
4804 /* Never tailcall if function may be called with a misaligned SP. */
4805 if (IS_STACKALIGN (func_type))
4806 return false;
4808 /* Everything else is ok. */
4809 return true;
4813 /* Addressing mode support functions. */
4815 /* Return nonzero if X is a legitimate immediate operand when compiling
4816 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4818 legitimate_pic_operand_p (rtx x)
4820 if (GET_CODE (x) == SYMBOL_REF
4821 || (GET_CODE (x) == CONST
4822 && GET_CODE (XEXP (x, 0)) == PLUS
4823 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4824 return 0;
4826 return 1;
4829 /* Record that the current function needs a PIC register. Initialize
4830 cfun->machine->pic_reg if we have not already done so. */
4832 static void
4833 require_pic_register (void)
4835 /* A lot of the logic here is made obscure by the fact that this
4836 routine gets called as part of the rtx cost estimation process.
4837 We don't want those calls to affect any assumptions about the real
4838 function; and further, we can't call entry_of_function() until we
4839 start the real expansion process. */
4840 if (!crtl->uses_pic_offset_table)
4842 gcc_assert (can_create_pseudo_p ());
4843 if (arm_pic_register != INVALID_REGNUM)
4845 if (!cfun->machine->pic_reg)
4846 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4848 /* Play games to avoid marking the function as needing pic
4849 if we are being called as part of the cost-estimation
4850 process. */
4851 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4852 crtl->uses_pic_offset_table = 1;
4854 else
4856 rtx seq;
4858 if (!cfun->machine->pic_reg)
4859 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4861 /* Play games to avoid marking the function as needing pic
4862 if we are being called as part of the cost-estimation
4863 process. */
4864 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4866 crtl->uses_pic_offset_table = 1;
4867 start_sequence ();
4869 arm_load_pic_register (0UL);
4871 seq = get_insns ();
4872 end_sequence ();
4873 /* We can be called during expansion of PHI nodes, where
4874 we can't yet emit instructions directly in the final
4875 insn stream. Queue the insns on the entry edge, they will
4876 be committed after everything else is expanded. */
4877 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4884 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4886 if (GET_CODE (orig) == SYMBOL_REF
4887 || GET_CODE (orig) == LABEL_REF)
4889 rtx pic_ref, address;
4890 rtx insn;
4892 if (reg == 0)
4894 gcc_assert (can_create_pseudo_p ());
4895 reg = gen_reg_rtx (Pmode);
4896 address = gen_reg_rtx (Pmode);
4898 else
4899 address = reg;
4901 /* VxWorks does not impose a fixed gap between segments; the run-time
4902 gap can be different from the object-file gap. We therefore can't
4903 use GOTOFF unless we are absolutely sure that the symbol is in the
4904 same segment as the GOT. Unfortunately, the flexibility of linker
4905 scripts means that we can't be sure of that in general, so assume
4906 that GOTOFF is never valid on VxWorks. */
4907 if ((GET_CODE (orig) == LABEL_REF
4908 || (GET_CODE (orig) == SYMBOL_REF &&
4909 SYMBOL_REF_LOCAL_P (orig)))
4910 && NEED_GOT_RELOC
4911 && !TARGET_VXWORKS_RTP)
4912 insn = arm_pic_static_addr (orig, reg);
4913 else
4915 /* If this function doesn't have a pic register, create one now. */
4916 require_pic_register ();
4918 if (TARGET_32BIT)
4919 emit_insn (gen_pic_load_addr_32bit (address, orig));
4920 else /* TARGET_THUMB1 */
4921 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4923 pic_ref = gen_const_mem (Pmode,
4924 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4925 address));
4926 insn = emit_move_insn (reg, pic_ref);
4929 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4930 by loop. */
4931 set_unique_reg_note (insn, REG_EQUAL, orig);
4933 return reg;
4935 else if (GET_CODE (orig) == CONST)
4937 rtx base, offset;
4939 if (GET_CODE (XEXP (orig, 0)) == PLUS
4940 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4941 return orig;
4943 /* Handle the case where we have: const (UNSPEC_TLS). */
4944 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4945 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4946 return orig;
4948 /* Handle the case where we have:
4949 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4950 CONST_INT. */
4951 if (GET_CODE (XEXP (orig, 0)) == PLUS
4952 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4953 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4955 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4956 return orig;
4959 if (reg == 0)
4961 gcc_assert (can_create_pseudo_p ());
4962 reg = gen_reg_rtx (Pmode);
4965 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4967 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4968 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4969 base == reg ? 0 : reg);
4971 if (GET_CODE (offset) == CONST_INT)
4973 /* The base register doesn't really matter, we only want to
4974 test the index for the appropriate mode. */
4975 if (!arm_legitimate_index_p (mode, offset, SET, 0))
4977 gcc_assert (can_create_pseudo_p ());
4978 offset = force_reg (Pmode, offset);
4981 if (GET_CODE (offset) == CONST_INT)
4982 return plus_constant (base, INTVAL (offset));
4985 if (GET_MODE_SIZE (mode) > 4
4986 && (GET_MODE_CLASS (mode) == MODE_INT
4987 || TARGET_SOFT_FLOAT))
4989 emit_insn (gen_addsi3 (reg, base, offset));
4990 return reg;
4993 return gen_rtx_PLUS (Pmode, base, offset);
4996 return orig;
5000 /* Find a spare register to use during the prolog of a function. */
5002 static int
5003 thumb_find_work_register (unsigned long pushed_regs_mask)
5005 int reg;
5007 /* Check the argument registers first as these are call-used. The
5008 register allocation order means that sometimes r3 might be used
5009 but earlier argument registers might not, so check them all. */
5010 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5011 if (!df_regs_ever_live_p (reg))
5012 return reg;
5014 /* Before going on to check the call-saved registers we can try a couple
5015 more ways of deducing that r3 is available. The first is when we are
5016 pushing anonymous arguments onto the stack and we have less than 4
5017 registers worth of fixed arguments(*). In this case r3 will be part of
5018 the variable argument list and so we can be sure that it will be
5019 pushed right at the start of the function. Hence it will be available
5020 for the rest of the prologue.
5021 (*): ie crtl->args.pretend_args_size is greater than 0. */
5022 if (cfun->machine->uses_anonymous_args
5023 && crtl->args.pretend_args_size > 0)
5024 return LAST_ARG_REGNUM;
5026 /* The other case is when we have fixed arguments but less than 4 registers
5027 worth. In this case r3 might be used in the body of the function, but
5028 it is not being used to convey an argument into the function. In theory
5029 we could just check crtl->args.size to see how many bytes are
5030 being passed in argument registers, but it seems that it is unreliable.
5031 Sometimes it will have the value 0 when in fact arguments are being
5032 passed. (See testcase execute/20021111-1.c for an example). So we also
5033 check the args_info.nregs field as well. The problem with this field is
5034 that it makes no allowances for arguments that are passed to the
5035 function but which are not used. Hence we could miss an opportunity
5036 when a function has an unused argument in r3. But it is better to be
5037 safe than to be sorry. */
5038 if (! cfun->machine->uses_anonymous_args
5039 && crtl->args.size >= 0
5040 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5041 && crtl->args.info.nregs < 4)
5042 return LAST_ARG_REGNUM;
5044 /* Otherwise look for a call-saved register that is going to be pushed. */
5045 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5046 if (pushed_regs_mask & (1 << reg))
5047 return reg;
5049 if (TARGET_THUMB2)
5051 /* Thumb-2 can use high regs. */
5052 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5053 if (pushed_regs_mask & (1 << reg))
5054 return reg;
5056 /* Something went wrong - thumb_compute_save_reg_mask()
5057 should have arranged for a suitable register to be pushed. */
5058 gcc_unreachable ();
5061 static GTY(()) int pic_labelno;
5063 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5064 low register. */
5066 void
5067 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5069 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5071 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5072 return;
5074 gcc_assert (flag_pic);
5076 pic_reg = cfun->machine->pic_reg;
5077 if (TARGET_VXWORKS_RTP)
5079 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5080 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5081 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5083 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5085 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5086 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5088 else
5090 /* We use an UNSPEC rather than a LABEL_REF because this label
5091 never appears in the code stream. */
5093 labelno = GEN_INT (pic_labelno++);
5094 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5095 l1 = gen_rtx_CONST (VOIDmode, l1);
5097 /* On the ARM the PC register contains 'dot + 8' at the time of the
5098 addition, on the Thumb it is 'dot + 4'. */
5099 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5100 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5101 UNSPEC_GOTSYM_OFF);
5102 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5104 if (TARGET_32BIT)
5106 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5107 if (TARGET_ARM)
5108 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5109 else
5110 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5112 else /* TARGET_THUMB1 */
5114 if (arm_pic_register != INVALID_REGNUM
5115 && REGNO (pic_reg) > LAST_LO_REGNUM)
5117 /* We will have pushed the pic register, so we should always be
5118 able to find a work register. */
5119 pic_tmp = gen_rtx_REG (SImode,
5120 thumb_find_work_register (saved_regs));
5121 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5122 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5124 else
5125 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5126 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5130 /* Need to emit this whether or not we obey regdecls,
5131 since setjmp/longjmp can cause life info to screw up. */
5132 emit_use (pic_reg);
5135 /* Generate code to load the address of a static var when flag_pic is set. */
5136 static rtx
5137 arm_pic_static_addr (rtx orig, rtx reg)
5139 rtx l1, labelno, offset_rtx, insn;
5141 gcc_assert (flag_pic);
5143 /* We use an UNSPEC rather than a LABEL_REF because this label
5144 never appears in the code stream. */
5145 labelno = GEN_INT (pic_labelno++);
5146 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5147 l1 = gen_rtx_CONST (VOIDmode, l1);
5149 /* On the ARM the PC register contains 'dot + 8' at the time of the
5150 addition, on the Thumb it is 'dot + 4'. */
5151 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5152 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5153 UNSPEC_SYMBOL_OFFSET);
5154 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5156 if (TARGET_32BIT)
5158 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5159 if (TARGET_ARM)
5160 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5161 else
5162 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5164 else /* TARGET_THUMB1 */
5166 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5167 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5170 return insn;
5173 /* Return nonzero if X is valid as an ARM state addressing register. */
5174 static int
5175 arm_address_register_rtx_p (rtx x, int strict_p)
5177 int regno;
5179 if (GET_CODE (x) != REG)
5180 return 0;
5182 regno = REGNO (x);
5184 if (strict_p)
5185 return ARM_REGNO_OK_FOR_BASE_P (regno);
5187 return (regno <= LAST_ARM_REGNUM
5188 || regno >= FIRST_PSEUDO_REGISTER
5189 || regno == FRAME_POINTER_REGNUM
5190 || regno == ARG_POINTER_REGNUM);
5193 /* Return TRUE if this rtx is the difference of a symbol and a label,
5194 and will reduce to a PC-relative relocation in the object file.
5195 Expressions like this can be left alone when generating PIC, rather
5196 than forced through the GOT. */
5197 static int
5198 pcrel_constant_p (rtx x)
5200 if (GET_CODE (x) == MINUS)
5201 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5203 return FALSE;
5206 /* Return nonzero if X is a valid ARM state address operand. */
5208 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5209 int strict_p)
5211 bool use_ldrd;
5212 enum rtx_code code = GET_CODE (x);
5214 if (arm_address_register_rtx_p (x, strict_p))
5215 return 1;
5217 use_ldrd = (TARGET_LDRD
5218 && (mode == DImode
5219 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5221 if (code == POST_INC || code == PRE_DEC
5222 || ((code == PRE_INC || code == POST_DEC)
5223 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5224 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5226 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5227 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5228 && GET_CODE (XEXP (x, 1)) == PLUS
5229 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5231 rtx addend = XEXP (XEXP (x, 1), 1);
5233 /* Don't allow ldrd post increment by register because it's hard
5234 to fixup invalid register choices. */
5235 if (use_ldrd
5236 && GET_CODE (x) == POST_MODIFY
5237 && GET_CODE (addend) == REG)
5238 return 0;
5240 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5241 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5244 /* After reload constants split into minipools will have addresses
5245 from a LABEL_REF. */
5246 else if (reload_completed
5247 && (code == LABEL_REF
5248 || (code == CONST
5249 && GET_CODE (XEXP (x, 0)) == PLUS
5250 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5251 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5252 return 1;
5254 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5255 return 0;
5257 else if (code == PLUS)
5259 rtx xop0 = XEXP (x, 0);
5260 rtx xop1 = XEXP (x, 1);
5262 return ((arm_address_register_rtx_p (xop0, strict_p)
5263 && GET_CODE(xop1) == CONST_INT
5264 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5265 || (arm_address_register_rtx_p (xop1, strict_p)
5266 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5269 #if 0
5270 /* Reload currently can't handle MINUS, so disable this for now */
5271 else if (GET_CODE (x) == MINUS)
5273 rtx xop0 = XEXP (x, 0);
5274 rtx xop1 = XEXP (x, 1);
5276 return (arm_address_register_rtx_p (xop0, strict_p)
5277 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5279 #endif
5281 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5282 && code == SYMBOL_REF
5283 && CONSTANT_POOL_ADDRESS_P (x)
5284 && ! (flag_pic
5285 && symbol_mentioned_p (get_pool_constant (x))
5286 && ! pcrel_constant_p (get_pool_constant (x))))
5287 return 1;
5289 return 0;
5292 /* Return nonzero if X is a valid Thumb-2 address operand. */
5293 static int
5294 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5296 bool use_ldrd;
5297 enum rtx_code code = GET_CODE (x);
5299 if (arm_address_register_rtx_p (x, strict_p))
5300 return 1;
5302 use_ldrd = (TARGET_LDRD
5303 && (mode == DImode
5304 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5306 if (code == POST_INC || code == PRE_DEC
5307 || ((code == PRE_INC || code == POST_DEC)
5308 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5309 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5311 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5312 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5313 && GET_CODE (XEXP (x, 1)) == PLUS
5314 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5316 /* Thumb-2 only has autoincrement by constant. */
5317 rtx addend = XEXP (XEXP (x, 1), 1);
5318 HOST_WIDE_INT offset;
5320 if (GET_CODE (addend) != CONST_INT)
5321 return 0;
5323 offset = INTVAL(addend);
5324 if (GET_MODE_SIZE (mode) <= 4)
5325 return (offset > -256 && offset < 256);
5327 return (use_ldrd && offset > -1024 && offset < 1024
5328 && (offset & 3) == 0);
5331 /* After reload constants split into minipools will have addresses
5332 from a LABEL_REF. */
5333 else if (reload_completed
5334 && (code == LABEL_REF
5335 || (code == CONST
5336 && GET_CODE (XEXP (x, 0)) == PLUS
5337 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5338 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5339 return 1;
5341 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5342 return 0;
5344 else if (code == PLUS)
5346 rtx xop0 = XEXP (x, 0);
5347 rtx xop1 = XEXP (x, 1);
5349 return ((arm_address_register_rtx_p (xop0, strict_p)
5350 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5351 || (arm_address_register_rtx_p (xop1, strict_p)
5352 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5355 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5356 && code == SYMBOL_REF
5357 && CONSTANT_POOL_ADDRESS_P (x)
5358 && ! (flag_pic
5359 && symbol_mentioned_p (get_pool_constant (x))
5360 && ! pcrel_constant_p (get_pool_constant (x))))
5361 return 1;
5363 return 0;
5366 /* Return nonzero if INDEX is valid for an address index operand in
5367 ARM state. */
5368 static int
5369 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5370 int strict_p)
5372 HOST_WIDE_INT range;
5373 enum rtx_code code = GET_CODE (index);
5375 /* Standard coprocessor addressing modes. */
5376 if (TARGET_HARD_FLOAT
5377 && (TARGET_FPA || TARGET_MAVERICK)
5378 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5379 || (TARGET_MAVERICK && mode == DImode)))
5380 return (code == CONST_INT && INTVAL (index) < 1024
5381 && INTVAL (index) > -1024
5382 && (INTVAL (index) & 3) == 0);
5384 if (TARGET_NEON
5385 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5386 return (code == CONST_INT
5387 && INTVAL (index) < 1016
5388 && INTVAL (index) > -1024
5389 && (INTVAL (index) & 3) == 0);
5391 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5392 return (code == CONST_INT
5393 && INTVAL (index) < 1024
5394 && INTVAL (index) > -1024
5395 && (INTVAL (index) & 3) == 0);
5397 if (arm_address_register_rtx_p (index, strict_p)
5398 && (GET_MODE_SIZE (mode) <= 4))
5399 return 1;
5401 if (mode == DImode || mode == DFmode)
5403 if (code == CONST_INT)
5405 HOST_WIDE_INT val = INTVAL (index);
5407 if (TARGET_LDRD)
5408 return val > -256 && val < 256;
5409 else
5410 return val > -4096 && val < 4092;
5413 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5416 if (GET_MODE_SIZE (mode) <= 4
5417 && ! (arm_arch4
5418 && (mode == HImode
5419 || mode == HFmode
5420 || (mode == QImode && outer == SIGN_EXTEND))))
5422 if (code == MULT)
5424 rtx xiop0 = XEXP (index, 0);
5425 rtx xiop1 = XEXP (index, 1);
5427 return ((arm_address_register_rtx_p (xiop0, strict_p)
5428 && power_of_two_operand (xiop1, SImode))
5429 || (arm_address_register_rtx_p (xiop1, strict_p)
5430 && power_of_two_operand (xiop0, SImode)));
5432 else if (code == LSHIFTRT || code == ASHIFTRT
5433 || code == ASHIFT || code == ROTATERT)
5435 rtx op = XEXP (index, 1);
5437 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5438 && GET_CODE (op) == CONST_INT
5439 && INTVAL (op) > 0
5440 && INTVAL (op) <= 31);
5444 /* For ARM v4 we may be doing a sign-extend operation during the
5445 load. */
5446 if (arm_arch4)
5448 if (mode == HImode
5449 || mode == HFmode
5450 || (outer == SIGN_EXTEND && mode == QImode))
5451 range = 256;
5452 else
5453 range = 4096;
5455 else
5456 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5458 return (code == CONST_INT
5459 && INTVAL (index) < range
5460 && INTVAL (index) > -range);
5463 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5464 index operand. i.e. 1, 2, 4 or 8. */
5465 static bool
5466 thumb2_index_mul_operand (rtx op)
5468 HOST_WIDE_INT val;
5470 if (GET_CODE(op) != CONST_INT)
5471 return false;
5473 val = INTVAL(op);
5474 return (val == 1 || val == 2 || val == 4 || val == 8);
5477 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5478 static int
5479 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5481 enum rtx_code code = GET_CODE (index);
5483 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5484 /* Standard coprocessor addressing modes. */
5485 if (TARGET_HARD_FLOAT
5486 && (TARGET_FPA || TARGET_MAVERICK)
5487 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5488 || (TARGET_MAVERICK && mode == DImode)))
5489 return (code == CONST_INT && INTVAL (index) < 1024
5490 && INTVAL (index) > -1024
5491 && (INTVAL (index) & 3) == 0);
5493 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5495 /* For DImode assume values will usually live in core regs
5496 and only allow LDRD addressing modes. */
5497 if (!TARGET_LDRD || mode != DImode)
5498 return (code == CONST_INT
5499 && INTVAL (index) < 1024
5500 && INTVAL (index) > -1024
5501 && (INTVAL (index) & 3) == 0);
5504 if (TARGET_NEON
5505 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5506 return (code == CONST_INT
5507 && INTVAL (index) < 1016
5508 && INTVAL (index) > -1024
5509 && (INTVAL (index) & 3) == 0);
5511 if (arm_address_register_rtx_p (index, strict_p)
5512 && (GET_MODE_SIZE (mode) <= 4))
5513 return 1;
5515 if (mode == DImode || mode == DFmode)
5517 if (code == CONST_INT)
5519 HOST_WIDE_INT val = INTVAL (index);
5520 /* ??? Can we assume ldrd for thumb2? */
5521 /* Thumb-2 ldrd only has reg+const addressing modes. */
5522 /* ldrd supports offsets of +-1020.
5523 However the ldr fallback does not. */
5524 return val > -256 && val < 256 && (val & 3) == 0;
5526 else
5527 return 0;
5530 if (code == MULT)
5532 rtx xiop0 = XEXP (index, 0);
5533 rtx xiop1 = XEXP (index, 1);
5535 return ((arm_address_register_rtx_p (xiop0, strict_p)
5536 && thumb2_index_mul_operand (xiop1))
5537 || (arm_address_register_rtx_p (xiop1, strict_p)
5538 && thumb2_index_mul_operand (xiop0)));
5540 else if (code == ASHIFT)
5542 rtx op = XEXP (index, 1);
5544 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5545 && GET_CODE (op) == CONST_INT
5546 && INTVAL (op) > 0
5547 && INTVAL (op) <= 3);
5550 return (code == CONST_INT
5551 && INTVAL (index) < 4096
5552 && INTVAL (index) > -256);
5555 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5556 static int
5557 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5559 int regno;
5561 if (GET_CODE (x) != REG)
5562 return 0;
5564 regno = REGNO (x);
5566 if (strict_p)
5567 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5569 return (regno <= LAST_LO_REGNUM
5570 || regno > LAST_VIRTUAL_REGISTER
5571 || regno == FRAME_POINTER_REGNUM
5572 || (GET_MODE_SIZE (mode) >= 4
5573 && (regno == STACK_POINTER_REGNUM
5574 || regno >= FIRST_PSEUDO_REGISTER
5575 || x == hard_frame_pointer_rtx
5576 || x == arg_pointer_rtx)));
5579 /* Return nonzero if x is a legitimate index register. This is the case
5580 for any base register that can access a QImode object. */
5581 inline static int
5582 thumb1_index_register_rtx_p (rtx x, int strict_p)
5584 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5587 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5589 The AP may be eliminated to either the SP or the FP, so we use the
5590 least common denominator, e.g. SImode, and offsets from 0 to 64.
5592 ??? Verify whether the above is the right approach.
5594 ??? Also, the FP may be eliminated to the SP, so perhaps that
5595 needs special handling also.
5597 ??? Look at how the mips16 port solves this problem. It probably uses
5598 better ways to solve some of these problems.
5600 Although it is not incorrect, we don't accept QImode and HImode
5601 addresses based on the frame pointer or arg pointer until the
5602 reload pass starts. This is so that eliminating such addresses
5603 into stack based ones won't produce impossible code. */
5604 static int
5605 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5607 /* ??? Not clear if this is right. Experiment. */
5608 if (GET_MODE_SIZE (mode) < 4
5609 && !(reload_in_progress || reload_completed)
5610 && (reg_mentioned_p (frame_pointer_rtx, x)
5611 || reg_mentioned_p (arg_pointer_rtx, x)
5612 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5613 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5614 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5615 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5616 return 0;
5618 /* Accept any base register. SP only in SImode or larger. */
5619 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5620 return 1;
5622 /* This is PC relative data before arm_reorg runs. */
5623 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5624 && GET_CODE (x) == SYMBOL_REF
5625 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5626 return 1;
5628 /* This is PC relative data after arm_reorg runs. */
5629 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5630 && reload_completed
5631 && (GET_CODE (x) == LABEL_REF
5632 || (GET_CODE (x) == CONST
5633 && GET_CODE (XEXP (x, 0)) == PLUS
5634 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5635 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5636 return 1;
5638 /* Post-inc indexing only supported for SImode and larger. */
5639 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5640 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5641 return 1;
5643 else if (GET_CODE (x) == PLUS)
5645 /* REG+REG address can be any two index registers. */
5646 /* We disallow FRAME+REG addressing since we know that FRAME
5647 will be replaced with STACK, and SP relative addressing only
5648 permits SP+OFFSET. */
5649 if (GET_MODE_SIZE (mode) <= 4
5650 && XEXP (x, 0) != frame_pointer_rtx
5651 && XEXP (x, 1) != frame_pointer_rtx
5652 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5653 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5654 return 1;
5656 /* REG+const has 5-7 bit offset for non-SP registers. */
5657 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5658 || XEXP (x, 0) == arg_pointer_rtx)
5659 && GET_CODE (XEXP (x, 1)) == CONST_INT
5660 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5661 return 1;
5663 /* REG+const has 10-bit offset for SP, but only SImode and
5664 larger is supported. */
5665 /* ??? Should probably check for DI/DFmode overflow here
5666 just like GO_IF_LEGITIMATE_OFFSET does. */
5667 else if (GET_CODE (XEXP (x, 0)) == REG
5668 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5669 && GET_MODE_SIZE (mode) >= 4
5670 && GET_CODE (XEXP (x, 1)) == CONST_INT
5671 && INTVAL (XEXP (x, 1)) >= 0
5672 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5673 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5674 return 1;
5676 else if (GET_CODE (XEXP (x, 0)) == REG
5677 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5678 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5679 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5680 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5681 && GET_MODE_SIZE (mode) >= 4
5682 && GET_CODE (XEXP (x, 1)) == CONST_INT
5683 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5684 return 1;
5687 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5688 && GET_MODE_SIZE (mode) == 4
5689 && GET_CODE (x) == SYMBOL_REF
5690 && CONSTANT_POOL_ADDRESS_P (x)
5691 && ! (flag_pic
5692 && symbol_mentioned_p (get_pool_constant (x))
5693 && ! pcrel_constant_p (get_pool_constant (x))))
5694 return 1;
5696 return 0;
5699 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5700 instruction of mode MODE. */
5702 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5704 switch (GET_MODE_SIZE (mode))
5706 case 1:
5707 return val >= 0 && val < 32;
5709 case 2:
5710 return val >= 0 && val < 64 && (val & 1) == 0;
5712 default:
5713 return (val >= 0
5714 && (val + GET_MODE_SIZE (mode)) <= 128
5715 && (val & 3) == 0);
5719 bool
5720 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5722 if (TARGET_ARM)
5723 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5724 else if (TARGET_THUMB2)
5725 return thumb2_legitimate_address_p (mode, x, strict_p);
5726 else /* if (TARGET_THUMB1) */
5727 return thumb1_legitimate_address_p (mode, x, strict_p);
5730 /* Build the SYMBOL_REF for __tls_get_addr. */
5732 static GTY(()) rtx tls_get_addr_libfunc;
5734 static rtx
5735 get_tls_get_addr (void)
5737 if (!tls_get_addr_libfunc)
5738 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5739 return tls_get_addr_libfunc;
5742 static rtx
5743 arm_load_tp (rtx target)
5745 if (!target)
5746 target = gen_reg_rtx (SImode);
5748 if (TARGET_HARD_TP)
5750 /* Can return in any reg. */
5751 emit_insn (gen_load_tp_hard (target));
5753 else
5755 /* Always returned in r0. Immediately copy the result into a pseudo,
5756 otherwise other uses of r0 (e.g. setting up function arguments) may
5757 clobber the value. */
5759 rtx tmp;
5761 emit_insn (gen_load_tp_soft ());
5763 tmp = gen_rtx_REG (SImode, 0);
5764 emit_move_insn (target, tmp);
5766 return target;
5769 static rtx
5770 load_tls_operand (rtx x, rtx reg)
5772 rtx tmp;
5774 if (reg == NULL_RTX)
5775 reg = gen_reg_rtx (SImode);
5777 tmp = gen_rtx_CONST (SImode, x);
5779 emit_move_insn (reg, tmp);
5781 return reg;
5784 static rtx
5785 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5787 rtx insns, label, labelno, sum;
5789 start_sequence ();
5791 labelno = GEN_INT (pic_labelno++);
5792 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5793 label = gen_rtx_CONST (VOIDmode, label);
5795 sum = gen_rtx_UNSPEC (Pmode,
5796 gen_rtvec (4, x, GEN_INT (reloc), label,
5797 GEN_INT (TARGET_ARM ? 8 : 4)),
5798 UNSPEC_TLS);
5799 reg = load_tls_operand (sum, reg);
5801 if (TARGET_ARM)
5802 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5803 else if (TARGET_THUMB2)
5804 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5805 else /* TARGET_THUMB1 */
5806 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5808 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5809 Pmode, 1, reg, Pmode);
5811 insns = get_insns ();
5812 end_sequence ();
5814 return insns;
5818 legitimize_tls_address (rtx x, rtx reg)
5820 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5821 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5823 switch (model)
5825 case TLS_MODEL_GLOBAL_DYNAMIC:
5826 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5827 dest = gen_reg_rtx (Pmode);
5828 emit_libcall_block (insns, dest, ret, x);
5829 return dest;
5831 case TLS_MODEL_LOCAL_DYNAMIC:
5832 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5834 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5835 share the LDM result with other LD model accesses. */
5836 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5837 UNSPEC_TLS);
5838 dest = gen_reg_rtx (Pmode);
5839 emit_libcall_block (insns, dest, ret, eqv);
5841 /* Load the addend. */
5842 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5843 UNSPEC_TLS);
5844 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5845 return gen_rtx_PLUS (Pmode, dest, addend);
5847 case TLS_MODEL_INITIAL_EXEC:
5848 labelno = GEN_INT (pic_labelno++);
5849 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5850 label = gen_rtx_CONST (VOIDmode, label);
5851 sum = gen_rtx_UNSPEC (Pmode,
5852 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5853 GEN_INT (TARGET_ARM ? 8 : 4)),
5854 UNSPEC_TLS);
5855 reg = load_tls_operand (sum, reg);
5857 if (TARGET_ARM)
5858 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5859 else if (TARGET_THUMB2)
5860 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5861 else
5863 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5864 emit_move_insn (reg, gen_const_mem (SImode, reg));
5867 tp = arm_load_tp (NULL_RTX);
5869 return gen_rtx_PLUS (Pmode, tp, reg);
5871 case TLS_MODEL_LOCAL_EXEC:
5872 tp = arm_load_tp (NULL_RTX);
5874 reg = gen_rtx_UNSPEC (Pmode,
5875 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5876 UNSPEC_TLS);
5877 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5879 return gen_rtx_PLUS (Pmode, tp, reg);
5881 default:
5882 abort ();
5886 /* Try machine-dependent ways of modifying an illegitimate address
5887 to be legitimate. If we find one, return the new, valid address. */
5889 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5891 if (!TARGET_ARM)
5893 /* TODO: legitimize_address for Thumb2. */
5894 if (TARGET_THUMB2)
5895 return x;
5896 return thumb_legitimize_address (x, orig_x, mode);
5899 if (arm_tls_symbol_p (x))
5900 return legitimize_tls_address (x, NULL_RTX);
5902 if (GET_CODE (x) == PLUS)
5904 rtx xop0 = XEXP (x, 0);
5905 rtx xop1 = XEXP (x, 1);
5907 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5908 xop0 = force_reg (SImode, xop0);
5910 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5911 xop1 = force_reg (SImode, xop1);
5913 if (ARM_BASE_REGISTER_RTX_P (xop0)
5914 && GET_CODE (xop1) == CONST_INT)
5916 HOST_WIDE_INT n, low_n;
5917 rtx base_reg, val;
5918 n = INTVAL (xop1);
5920 /* VFP addressing modes actually allow greater offsets, but for
5921 now we just stick with the lowest common denominator. */
5922 if (mode == DImode
5923 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5925 low_n = n & 0x0f;
5926 n &= ~0x0f;
5927 if (low_n > 4)
5929 n += 16;
5930 low_n -= 16;
5933 else
5935 low_n = ((mode) == TImode ? 0
5936 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5937 n -= low_n;
5940 base_reg = gen_reg_rtx (SImode);
5941 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5942 emit_move_insn (base_reg, val);
5943 x = plus_constant (base_reg, low_n);
5945 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5946 x = gen_rtx_PLUS (SImode, xop0, xop1);
5949 /* XXX We don't allow MINUS any more -- see comment in
5950 arm_legitimate_address_outer_p (). */
5951 else if (GET_CODE (x) == MINUS)
5953 rtx xop0 = XEXP (x, 0);
5954 rtx xop1 = XEXP (x, 1);
5956 if (CONSTANT_P (xop0))
5957 xop0 = force_reg (SImode, xop0);
5959 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5960 xop1 = force_reg (SImode, xop1);
5962 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5963 x = gen_rtx_MINUS (SImode, xop0, xop1);
5966 /* Make sure to take full advantage of the pre-indexed addressing mode
5967 with absolute addresses which often allows for the base register to
5968 be factorized for multiple adjacent memory references, and it might
5969 even allows for the mini pool to be avoided entirely. */
5970 else if (GET_CODE (x) == CONST_INT && optimize > 0)
5972 unsigned int bits;
5973 HOST_WIDE_INT mask, base, index;
5974 rtx base_reg;
5976 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5977 use a 8-bit index. So let's use a 12-bit index for SImode only and
5978 hope that arm_gen_constant will enable ldrb to use more bits. */
5979 bits = (mode == SImode) ? 12 : 8;
5980 mask = (1 << bits) - 1;
5981 base = INTVAL (x) & ~mask;
5982 index = INTVAL (x) & mask;
5983 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
5985 /* It'll most probably be more efficient to generate the base
5986 with more bits set and use a negative index instead. */
5987 base |= mask;
5988 index -= mask;
5990 base_reg = force_reg (SImode, GEN_INT (base));
5991 x = plus_constant (base_reg, index);
5994 if (flag_pic)
5996 /* We need to find and carefully transform any SYMBOL and LABEL
5997 references; so go back to the original address expression. */
5998 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6000 if (new_x != orig_x)
6001 x = new_x;
6004 return x;
6008 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6009 to be legitimate. If we find one, return the new, valid address. */
6011 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6013 if (arm_tls_symbol_p (x))
6014 return legitimize_tls_address (x, NULL_RTX);
6016 if (GET_CODE (x) == PLUS
6017 && GET_CODE (XEXP (x, 1)) == CONST_INT
6018 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6019 || INTVAL (XEXP (x, 1)) < 0))
6021 rtx xop0 = XEXP (x, 0);
6022 rtx xop1 = XEXP (x, 1);
6023 HOST_WIDE_INT offset = INTVAL (xop1);
6025 /* Try and fold the offset into a biasing of the base register and
6026 then offsetting that. Don't do this when optimizing for space
6027 since it can cause too many CSEs. */
6028 if (optimize_size && offset >= 0
6029 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6031 HOST_WIDE_INT delta;
6033 if (offset >= 256)
6034 delta = offset - (256 - GET_MODE_SIZE (mode));
6035 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6036 delta = 31 * GET_MODE_SIZE (mode);
6037 else
6038 delta = offset & (~31 * GET_MODE_SIZE (mode));
6040 xop0 = force_operand (plus_constant (xop0, offset - delta),
6041 NULL_RTX);
6042 x = plus_constant (xop0, delta);
6044 else if (offset < 0 && offset > -256)
6045 /* Small negative offsets are best done with a subtract before the
6046 dereference, forcing these into a register normally takes two
6047 instructions. */
6048 x = force_operand (x, NULL_RTX);
6049 else
6051 /* For the remaining cases, force the constant into a register. */
6052 xop1 = force_reg (SImode, xop1);
6053 x = gen_rtx_PLUS (SImode, xop0, xop1);
6056 else if (GET_CODE (x) == PLUS
6057 && s_register_operand (XEXP (x, 1), SImode)
6058 && !s_register_operand (XEXP (x, 0), SImode))
6060 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6062 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6065 if (flag_pic)
6067 /* We need to find and carefully transform any SYMBOL and LABEL
6068 references; so go back to the original address expression. */
6069 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6071 if (new_x != orig_x)
6072 x = new_x;
6075 return x;
6079 thumb_legitimize_reload_address (rtx *x_p,
6080 enum machine_mode mode,
6081 int opnum, int type,
6082 int ind_levels ATTRIBUTE_UNUSED)
6084 rtx x = *x_p;
6086 if (GET_CODE (x) == PLUS
6087 && GET_MODE_SIZE (mode) < 4
6088 && REG_P (XEXP (x, 0))
6089 && XEXP (x, 0) == stack_pointer_rtx
6090 && GET_CODE (XEXP (x, 1)) == CONST_INT
6091 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6093 rtx orig_x = x;
6095 x = copy_rtx (x);
6096 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6097 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6098 return x;
6101 /* If both registers are hi-regs, then it's better to reload the
6102 entire expression rather than each register individually. That
6103 only requires one reload register rather than two. */
6104 if (GET_CODE (x) == PLUS
6105 && REG_P (XEXP (x, 0))
6106 && REG_P (XEXP (x, 1))
6107 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6108 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6110 rtx orig_x = x;
6112 x = copy_rtx (x);
6113 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6114 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6115 return x;
6118 return NULL;
6121 /* Test for various thread-local symbols. */
6123 /* Return TRUE if X is a thread-local symbol. */
6125 static bool
6126 arm_tls_symbol_p (rtx x)
6128 if (! TARGET_HAVE_TLS)
6129 return false;
6131 if (GET_CODE (x) != SYMBOL_REF)
6132 return false;
6134 return SYMBOL_REF_TLS_MODEL (x) != 0;
6137 /* Helper for arm_tls_referenced_p. */
6139 static int
6140 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6142 if (GET_CODE (*x) == SYMBOL_REF)
6143 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6145 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6146 TLS offsets, not real symbol references. */
6147 if (GET_CODE (*x) == UNSPEC
6148 && XINT (*x, 1) == UNSPEC_TLS)
6149 return -1;
6151 return 0;
6154 /* Return TRUE if X contains any TLS symbol references. */
6156 bool
6157 arm_tls_referenced_p (rtx x)
6159 if (! TARGET_HAVE_TLS)
6160 return false;
6162 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6165 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6167 bool
6168 arm_cannot_force_const_mem (rtx x)
6170 rtx base, offset;
6172 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6174 split_const (x, &base, &offset);
6175 if (GET_CODE (base) == SYMBOL_REF
6176 && !offset_within_block_p (base, INTVAL (offset)))
6177 return true;
6179 return arm_tls_referenced_p (x);
6182 #define REG_OR_SUBREG_REG(X) \
6183 (GET_CODE (X) == REG \
6184 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6186 #define REG_OR_SUBREG_RTX(X) \
6187 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6189 #ifndef COSTS_N_INSNS
6190 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6191 #endif
6192 static inline int
6193 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6195 enum machine_mode mode = GET_MODE (x);
6197 switch (code)
6199 case ASHIFT:
6200 case ASHIFTRT:
6201 case LSHIFTRT:
6202 case ROTATERT:
6203 case PLUS:
6204 case MINUS:
6205 case COMPARE:
6206 case NEG:
6207 case NOT:
6208 return COSTS_N_INSNS (1);
6210 case MULT:
6211 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6213 int cycles = 0;
6214 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6216 while (i)
6218 i >>= 2;
6219 cycles++;
6221 return COSTS_N_INSNS (2) + cycles;
6223 return COSTS_N_INSNS (1) + 16;
6225 case SET:
6226 return (COSTS_N_INSNS (1)
6227 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6228 + GET_CODE (SET_DEST (x)) == MEM));
6230 case CONST_INT:
6231 if (outer == SET)
6233 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6234 return 0;
6235 if (thumb_shiftable_const (INTVAL (x)))
6236 return COSTS_N_INSNS (2);
6237 return COSTS_N_INSNS (3);
6239 else if ((outer == PLUS || outer == COMPARE)
6240 && INTVAL (x) < 256 && INTVAL (x) > -256)
6241 return 0;
6242 else if ((outer == IOR || outer == XOR || outer == AND)
6243 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6244 return COSTS_N_INSNS (1);
6245 else if (outer == AND)
6247 int i;
6248 /* This duplicates the tests in the andsi3 expander. */
6249 for (i = 9; i <= 31; i++)
6250 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6251 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6252 return COSTS_N_INSNS (2);
6254 else if (outer == ASHIFT || outer == ASHIFTRT
6255 || outer == LSHIFTRT)
6256 return 0;
6257 return COSTS_N_INSNS (2);
6259 case CONST:
6260 case CONST_DOUBLE:
6261 case LABEL_REF:
6262 case SYMBOL_REF:
6263 return COSTS_N_INSNS (3);
6265 case UDIV:
6266 case UMOD:
6267 case DIV:
6268 case MOD:
6269 return 100;
6271 case TRUNCATE:
6272 return 99;
6274 case AND:
6275 case XOR:
6276 case IOR:
6277 /* XXX guess. */
6278 return 8;
6280 case MEM:
6281 /* XXX another guess. */
6282 /* Memory costs quite a lot for the first word, but subsequent words
6283 load at the equivalent of a single insn each. */
6284 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6285 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6286 ? 4 : 0));
6288 case IF_THEN_ELSE:
6289 /* XXX a guess. */
6290 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6291 return 14;
6292 return 2;
6294 case ZERO_EXTEND:
6295 /* XXX still guessing. */
6296 switch (GET_MODE (XEXP (x, 0)))
6298 case QImode:
6299 return (1 + (mode == DImode ? 4 : 0)
6300 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6302 case HImode:
6303 return (4 + (mode == DImode ? 4 : 0)
6304 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6306 case SImode:
6307 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6309 default:
6310 return 99;
6313 default:
6314 return 99;
6318 static inline bool
6319 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6321 enum machine_mode mode = GET_MODE (x);
6322 enum rtx_code subcode;
6323 rtx operand;
6324 enum rtx_code code = GET_CODE (x);
6325 *total = 0;
6327 switch (code)
6329 case MEM:
6330 /* Memory costs quite a lot for the first word, but subsequent words
6331 load at the equivalent of a single insn each. */
6332 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6333 return true;
6335 case DIV:
6336 case MOD:
6337 case UDIV:
6338 case UMOD:
6339 if (TARGET_HARD_FLOAT && mode == SFmode)
6340 *total = COSTS_N_INSNS (2);
6341 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6342 *total = COSTS_N_INSNS (4);
6343 else
6344 *total = COSTS_N_INSNS (20);
6345 return false;
6347 case ROTATE:
6348 if (GET_CODE (XEXP (x, 1)) == REG)
6349 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6350 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6351 *total = rtx_cost (XEXP (x, 1), code, speed);
6353 /* Fall through */
6354 case ROTATERT:
6355 if (mode != SImode)
6357 *total += COSTS_N_INSNS (4);
6358 return true;
6361 /* Fall through */
6362 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6363 *total += rtx_cost (XEXP (x, 0), code, speed);
6364 if (mode == DImode)
6366 *total += COSTS_N_INSNS (3);
6367 return true;
6370 *total += COSTS_N_INSNS (1);
6371 /* Increase the cost of complex shifts because they aren't any faster,
6372 and reduce dual issue opportunities. */
6373 if (arm_tune_cortex_a9
6374 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6375 ++*total;
6377 return true;
6379 case MINUS:
6380 if (TARGET_THUMB2)
6382 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6384 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6385 *total = COSTS_N_INSNS (1);
6386 else
6387 *total = COSTS_N_INSNS (20);
6389 else
6390 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6391 /* Thumb2 does not have RSB, so all arguments must be
6392 registers (subtracting a constant is canonicalized as
6393 addition of the negated constant). */
6394 return false;
6397 if (mode == DImode)
6399 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6400 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6401 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6403 *total += rtx_cost (XEXP (x, 1), code, speed);
6404 return true;
6407 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6408 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6410 *total += rtx_cost (XEXP (x, 0), code, speed);
6411 return true;
6414 return false;
6417 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6419 if (TARGET_HARD_FLOAT
6420 && (mode == SFmode
6421 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6423 *total = COSTS_N_INSNS (1);
6424 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6425 && arm_const_double_rtx (XEXP (x, 0)))
6427 *total += rtx_cost (XEXP (x, 1), code, speed);
6428 return true;
6431 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6432 && arm_const_double_rtx (XEXP (x, 1)))
6434 *total += rtx_cost (XEXP (x, 0), code, speed);
6435 return true;
6438 return false;
6440 *total = COSTS_N_INSNS (20);
6441 return false;
6444 *total = COSTS_N_INSNS (1);
6445 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6446 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6448 *total += rtx_cost (XEXP (x, 1), code, speed);
6449 return true;
6452 subcode = GET_CODE (XEXP (x, 1));
6453 if (subcode == ASHIFT || subcode == ASHIFTRT
6454 || subcode == LSHIFTRT
6455 || subcode == ROTATE || subcode == ROTATERT)
6457 *total += rtx_cost (XEXP (x, 0), code, speed);
6458 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6459 return true;
6462 /* A shift as a part of RSB costs no more than RSB itself. */
6463 if (GET_CODE (XEXP (x, 0)) == MULT
6464 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6466 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6467 *total += rtx_cost (XEXP (x, 1), code, speed);
6468 return true;
6471 if (subcode == MULT
6472 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6474 *total += rtx_cost (XEXP (x, 0), code, speed);
6475 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6476 return true;
6479 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6480 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6482 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6483 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6484 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6485 *total += COSTS_N_INSNS (1);
6487 return true;
6490 /* Fall through */
6492 case PLUS:
6493 if (code == PLUS && arm_arch6 && mode == SImode
6494 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6495 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6497 *total = COSTS_N_INSNS (1);
6498 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6499 speed);
6500 *total += rtx_cost (XEXP (x, 1), code, speed);
6501 return true;
6504 /* MLA: All arguments must be registers. We filter out
6505 multiplication by a power of two, so that we fall down into
6506 the code below. */
6507 if (GET_CODE (XEXP (x, 0)) == MULT
6508 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6510 /* The cost comes from the cost of the multiply. */
6511 return false;
6514 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6516 if (TARGET_HARD_FLOAT
6517 && (mode == SFmode
6518 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6520 *total = COSTS_N_INSNS (1);
6521 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6522 && arm_const_double_rtx (XEXP (x, 1)))
6524 *total += rtx_cost (XEXP (x, 0), code, speed);
6525 return true;
6528 return false;
6531 *total = COSTS_N_INSNS (20);
6532 return false;
6535 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6536 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6538 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6539 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6540 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6541 *total += COSTS_N_INSNS (1);
6542 return true;
6545 /* Fall through */
6547 case AND: case XOR: case IOR:
6549 /* Normally the frame registers will be spilt into reg+const during
6550 reload, so it is a bad idea to combine them with other instructions,
6551 since then they might not be moved outside of loops. As a compromise
6552 we allow integration with ops that have a constant as their second
6553 operand. */
6554 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6555 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6556 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6557 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6558 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6559 *total = 4;
6561 if (mode == DImode)
6563 *total += COSTS_N_INSNS (2);
6564 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6565 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6567 *total += rtx_cost (XEXP (x, 0), code, speed);
6568 return true;
6571 return false;
6574 *total += COSTS_N_INSNS (1);
6575 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6576 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6578 *total += rtx_cost (XEXP (x, 0), code, speed);
6579 return true;
6581 subcode = GET_CODE (XEXP (x, 0));
6582 if (subcode == ASHIFT || subcode == ASHIFTRT
6583 || subcode == LSHIFTRT
6584 || subcode == ROTATE || subcode == ROTATERT)
6586 *total += rtx_cost (XEXP (x, 1), code, speed);
6587 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6588 return true;
6591 if (subcode == MULT
6592 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6594 *total += rtx_cost (XEXP (x, 1), code, speed);
6595 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6596 return true;
6599 if (subcode == UMIN || subcode == UMAX
6600 || subcode == SMIN || subcode == SMAX)
6602 *total = COSTS_N_INSNS (3);
6603 return true;
6606 return false;
6608 case MULT:
6609 /* This should have been handled by the CPU specific routines. */
6610 gcc_unreachable ();
6612 case TRUNCATE:
6613 if (arm_arch3m && mode == SImode
6614 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6615 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6616 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6617 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6618 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6619 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6621 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6622 return true;
6624 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6625 return false;
6627 case NEG:
6628 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6630 if (TARGET_HARD_FLOAT
6631 && (mode == SFmode
6632 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6634 *total = COSTS_N_INSNS (1);
6635 return false;
6637 *total = COSTS_N_INSNS (2);
6638 return false;
6641 /* Fall through */
6642 case NOT:
6643 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6644 if (mode == SImode && code == NOT)
6646 subcode = GET_CODE (XEXP (x, 0));
6647 if (subcode == ASHIFT || subcode == ASHIFTRT
6648 || subcode == LSHIFTRT
6649 || subcode == ROTATE || subcode == ROTATERT
6650 || (subcode == MULT
6651 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6653 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6654 /* Register shifts cost an extra cycle. */
6655 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6656 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6657 subcode, speed);
6658 return true;
6662 return false;
6664 case IF_THEN_ELSE:
6665 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6667 *total = COSTS_N_INSNS (4);
6668 return true;
6671 operand = XEXP (x, 0);
6673 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6674 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6675 && GET_CODE (XEXP (operand, 0)) == REG
6676 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6677 *total += COSTS_N_INSNS (1);
6678 *total += (rtx_cost (XEXP (x, 1), code, speed)
6679 + rtx_cost (XEXP (x, 2), code, speed));
6680 return true;
6682 case NE:
6683 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6685 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6686 return true;
6688 goto scc_insn;
6690 case GE:
6691 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6692 && mode == SImode && XEXP (x, 1) == const0_rtx)
6694 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6695 return true;
6697 goto scc_insn;
6699 case LT:
6700 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6701 && mode == SImode && XEXP (x, 1) == const0_rtx)
6703 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6704 return true;
6706 goto scc_insn;
6708 case EQ:
6709 case GT:
6710 case LE:
6711 case GEU:
6712 case LTU:
6713 case GTU:
6714 case LEU:
6715 case UNORDERED:
6716 case ORDERED:
6717 case UNEQ:
6718 case UNGE:
6719 case UNLT:
6720 case UNGT:
6721 case UNLE:
6722 scc_insn:
6723 /* SCC insns. In the case where the comparison has already been
6724 performed, then they cost 2 instructions. Otherwise they need
6725 an additional comparison before them. */
6726 *total = COSTS_N_INSNS (2);
6727 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6729 return true;
6732 /* Fall through */
6733 case COMPARE:
6734 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6736 *total = 0;
6737 return true;
6740 *total += COSTS_N_INSNS (1);
6741 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6742 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6744 *total += rtx_cost (XEXP (x, 0), code, speed);
6745 return true;
6748 subcode = GET_CODE (XEXP (x, 0));
6749 if (subcode == ASHIFT || subcode == ASHIFTRT
6750 || subcode == LSHIFTRT
6751 || subcode == ROTATE || subcode == ROTATERT)
6753 *total += rtx_cost (XEXP (x, 1), code, speed);
6754 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6755 return true;
6758 if (subcode == MULT
6759 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6761 *total += rtx_cost (XEXP (x, 1), code, speed);
6762 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6763 return true;
6766 return false;
6768 case UMIN:
6769 case UMAX:
6770 case SMIN:
6771 case SMAX:
6772 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6773 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6774 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6775 *total += rtx_cost (XEXP (x, 1), code, speed);
6776 return true;
6778 case ABS:
6779 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6781 if (TARGET_HARD_FLOAT
6782 && (mode == SFmode
6783 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6785 *total = COSTS_N_INSNS (1);
6786 return false;
6788 *total = COSTS_N_INSNS (20);
6789 return false;
6791 *total = COSTS_N_INSNS (1);
6792 if (mode == DImode)
6793 *total += COSTS_N_INSNS (3);
6794 return false;
6796 case SIGN_EXTEND:
6797 if (GET_MODE_CLASS (mode) == MODE_INT)
6799 *total = 0;
6800 if (mode == DImode)
6801 *total += COSTS_N_INSNS (1);
6803 if (GET_MODE (XEXP (x, 0)) != SImode)
6805 if (arm_arch6)
6807 if (GET_CODE (XEXP (x, 0)) != MEM)
6808 *total += COSTS_N_INSNS (1);
6810 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6811 *total += COSTS_N_INSNS (2);
6814 return false;
6817 /* Fall through */
6818 case ZERO_EXTEND:
6819 *total = 0;
6820 if (GET_MODE_CLASS (mode) == MODE_INT)
6822 if (mode == DImode)
6823 *total += COSTS_N_INSNS (1);
6825 if (GET_MODE (XEXP (x, 0)) != SImode)
6827 if (arm_arch6)
6829 if (GET_CODE (XEXP (x, 0)) != MEM)
6830 *total += COSTS_N_INSNS (1);
6832 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6833 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6834 1 : 2);
6837 return false;
6840 switch (GET_MODE (XEXP (x, 0)))
6842 case V8QImode:
6843 case V4HImode:
6844 case V2SImode:
6845 case V4QImode:
6846 case V2HImode:
6847 *total = COSTS_N_INSNS (1);
6848 return false;
6850 default:
6851 gcc_unreachable ();
6853 gcc_unreachable ();
6855 case ZERO_EXTRACT:
6856 case SIGN_EXTRACT:
6857 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6858 return true;
6860 case CONST_INT:
6861 if (const_ok_for_arm (INTVAL (x))
6862 || const_ok_for_arm (~INTVAL (x)))
6863 *total = COSTS_N_INSNS (1);
6864 else
6865 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6866 INTVAL (x), NULL_RTX,
6867 NULL_RTX, 0, 0));
6868 return true;
6870 case CONST:
6871 case LABEL_REF:
6872 case SYMBOL_REF:
6873 *total = COSTS_N_INSNS (3);
6874 return true;
6876 case HIGH:
6877 *total = COSTS_N_INSNS (1);
6878 return true;
6880 case LO_SUM:
6881 *total = COSTS_N_INSNS (1);
6882 *total += rtx_cost (XEXP (x, 0), code, speed);
6883 return true;
6885 case CONST_DOUBLE:
6886 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6887 && (mode == SFmode || !TARGET_VFP_SINGLE))
6888 *total = COSTS_N_INSNS (1);
6889 else
6890 *total = COSTS_N_INSNS (4);
6891 return true;
6893 default:
6894 *total = COSTS_N_INSNS (4);
6895 return false;
6899 /* Estimates the size cost of thumb1 instructions.
6900 For now most of the code is copied from thumb1_rtx_costs. We need more
6901 fine grain tuning when we have more related test cases. */
6902 static inline int
6903 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6905 enum machine_mode mode = GET_MODE (x);
6907 switch (code)
6909 case ASHIFT:
6910 case ASHIFTRT:
6911 case LSHIFTRT:
6912 case ROTATERT:
6913 case PLUS:
6914 case MINUS:
6915 case COMPARE:
6916 case NEG:
6917 case NOT:
6918 return COSTS_N_INSNS (1);
6920 case MULT:
6921 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6923 /* Thumb1 mul instruction can't operate on const. We must Load it
6924 into a register first. */
6925 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
6926 return COSTS_N_INSNS (1) + const_size;
6928 return COSTS_N_INSNS (1);
6930 case SET:
6931 return (COSTS_N_INSNS (1)
6932 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6933 + GET_CODE (SET_DEST (x)) == MEM));
6935 case CONST_INT:
6936 if (outer == SET)
6938 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6939 return 0;
6940 if (thumb_shiftable_const (INTVAL (x)))
6941 return COSTS_N_INSNS (2);
6942 return COSTS_N_INSNS (3);
6944 else if ((outer == PLUS || outer == COMPARE)
6945 && INTVAL (x) < 256 && INTVAL (x) > -256)
6946 return 0;
6947 else if ((outer == IOR || outer == XOR || outer == AND)
6948 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6949 return COSTS_N_INSNS (1);
6950 else if (outer == AND)
6952 int i;
6953 /* This duplicates the tests in the andsi3 expander. */
6954 for (i = 9; i <= 31; i++)
6955 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6956 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6957 return COSTS_N_INSNS (2);
6959 else if (outer == ASHIFT || outer == ASHIFTRT
6960 || outer == LSHIFTRT)
6961 return 0;
6962 return COSTS_N_INSNS (2);
6964 case CONST:
6965 case CONST_DOUBLE:
6966 case LABEL_REF:
6967 case SYMBOL_REF:
6968 return COSTS_N_INSNS (3);
6970 case UDIV:
6971 case UMOD:
6972 case DIV:
6973 case MOD:
6974 return 100;
6976 case TRUNCATE:
6977 return 99;
6979 case AND:
6980 case XOR:
6981 case IOR:
6982 /* XXX guess. */
6983 return 8;
6985 case MEM:
6986 /* XXX another guess. */
6987 /* Memory costs quite a lot for the first word, but subsequent words
6988 load at the equivalent of a single insn each. */
6989 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6990 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6991 ? 4 : 0));
6993 case IF_THEN_ELSE:
6994 /* XXX a guess. */
6995 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6996 return 14;
6997 return 2;
6999 case ZERO_EXTEND:
7000 /* XXX still guessing. */
7001 switch (GET_MODE (XEXP (x, 0)))
7003 case QImode:
7004 return (1 + (mode == DImode ? 4 : 0)
7005 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7007 case HImode:
7008 return (4 + (mode == DImode ? 4 : 0)
7009 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7011 case SImode:
7012 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7014 default:
7015 return 99;
7018 default:
7019 return 99;
7023 /* RTX costs when optimizing for size. */
7024 static bool
7025 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7026 int *total)
7028 enum machine_mode mode = GET_MODE (x);
7029 if (TARGET_THUMB1)
7031 *total = thumb1_size_rtx_costs (x, code, outer_code);
7032 return true;
7035 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7036 switch (code)
7038 case MEM:
7039 /* A memory access costs 1 insn if the mode is small, or the address is
7040 a single register, otherwise it costs one insn per word. */
7041 if (REG_P (XEXP (x, 0)))
7042 *total = COSTS_N_INSNS (1);
7043 else
7044 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7045 return true;
7047 case DIV:
7048 case MOD:
7049 case UDIV:
7050 case UMOD:
7051 /* Needs a libcall, so it costs about this. */
7052 *total = COSTS_N_INSNS (2);
7053 return false;
7055 case ROTATE:
7056 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7058 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7059 return true;
7061 /* Fall through */
7062 case ROTATERT:
7063 case ASHIFT:
7064 case LSHIFTRT:
7065 case ASHIFTRT:
7066 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7068 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7069 return true;
7071 else if (mode == SImode)
7073 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7074 /* Slightly disparage register shifts, but not by much. */
7075 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7076 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7077 return true;
7080 /* Needs a libcall. */
7081 *total = COSTS_N_INSNS (2);
7082 return false;
7084 case MINUS:
7085 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7086 && (mode == SFmode || !TARGET_VFP_SINGLE))
7088 *total = COSTS_N_INSNS (1);
7089 return false;
7092 if (mode == SImode)
7094 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7095 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7097 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7098 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7099 || subcode1 == ROTATE || subcode1 == ROTATERT
7100 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7101 || subcode1 == ASHIFTRT)
7103 /* It's just the cost of the two operands. */
7104 *total = 0;
7105 return false;
7108 *total = COSTS_N_INSNS (1);
7109 return false;
7112 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7113 return false;
7115 case PLUS:
7116 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7117 && (mode == SFmode || !TARGET_VFP_SINGLE))
7119 *total = COSTS_N_INSNS (1);
7120 return false;
7123 /* A shift as a part of ADD costs nothing. */
7124 if (GET_CODE (XEXP (x, 0)) == MULT
7125 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7127 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7128 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7129 *total += rtx_cost (XEXP (x, 1), code, false);
7130 return true;
7133 /* Fall through */
7134 case AND: case XOR: case IOR:
7135 if (mode == SImode)
7137 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7139 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7140 || subcode == LSHIFTRT || subcode == ASHIFTRT
7141 || (code == AND && subcode == NOT))
7143 /* It's just the cost of the two operands. */
7144 *total = 0;
7145 return false;
7149 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7150 return false;
7152 case MULT:
7153 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7154 return false;
7156 case NEG:
7157 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7158 && (mode == SFmode || !TARGET_VFP_SINGLE))
7160 *total = COSTS_N_INSNS (1);
7161 return false;
7164 /* Fall through */
7165 case NOT:
7166 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7168 return false;
7170 case IF_THEN_ELSE:
7171 *total = 0;
7172 return false;
7174 case COMPARE:
7175 if (cc_register (XEXP (x, 0), VOIDmode))
7176 * total = 0;
7177 else
7178 *total = COSTS_N_INSNS (1);
7179 return false;
7181 case ABS:
7182 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7183 && (mode == SFmode || !TARGET_VFP_SINGLE))
7184 *total = COSTS_N_INSNS (1);
7185 else
7186 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7187 return false;
7189 case SIGN_EXTEND:
7190 *total = 0;
7191 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7193 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7194 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7196 if (mode == DImode)
7197 *total += COSTS_N_INSNS (1);
7198 return false;
7200 case ZERO_EXTEND:
7201 *total = 0;
7202 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7204 switch (GET_MODE (XEXP (x, 0)))
7206 case QImode:
7207 *total += COSTS_N_INSNS (1);
7208 break;
7210 case HImode:
7211 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7213 case SImode:
7214 break;
7216 default:
7217 *total += COSTS_N_INSNS (2);
7221 if (mode == DImode)
7222 *total += COSTS_N_INSNS (1);
7224 return false;
7226 case CONST_INT:
7227 if (const_ok_for_arm (INTVAL (x)))
7228 /* A multiplication by a constant requires another instruction
7229 to load the constant to a register. */
7230 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7231 ? 1 : 0);
7232 else if (const_ok_for_arm (~INTVAL (x)))
7233 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7234 else if (const_ok_for_arm (-INTVAL (x)))
7236 if (outer_code == COMPARE || outer_code == PLUS
7237 || outer_code == MINUS)
7238 *total = 0;
7239 else
7240 *total = COSTS_N_INSNS (1);
7242 else
7243 *total = COSTS_N_INSNS (2);
7244 return true;
7246 case CONST:
7247 case LABEL_REF:
7248 case SYMBOL_REF:
7249 *total = COSTS_N_INSNS (2);
7250 return true;
7252 case CONST_DOUBLE:
7253 *total = COSTS_N_INSNS (4);
7254 return true;
7256 case HIGH:
7257 case LO_SUM:
7258 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7259 cost of these slightly. */
7260 *total = COSTS_N_INSNS (1) + 1;
7261 return true;
7263 default:
7264 if (mode != VOIDmode)
7265 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7266 else
7267 *total = COSTS_N_INSNS (4); /* How knows? */
7268 return false;
7272 /* RTX costs when optimizing for size. */
7273 static bool
7274 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7275 bool speed)
7277 if (!speed)
7278 return arm_size_rtx_costs (x, (enum rtx_code) code,
7279 (enum rtx_code) outer_code, total);
7280 else
7281 return current_tune->rtx_costs (x, (enum rtx_code) code,
7282 (enum rtx_code) outer_code,
7283 total, speed);
7286 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7287 supported on any "slowmul" cores, so it can be ignored. */
7289 static bool
7290 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7291 int *total, bool speed)
7293 enum machine_mode mode = GET_MODE (x);
7295 if (TARGET_THUMB)
7297 *total = thumb1_rtx_costs (x, code, outer_code);
7298 return true;
7301 switch (code)
7303 case MULT:
7304 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7305 || mode == DImode)
7307 *total = COSTS_N_INSNS (20);
7308 return false;
7311 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7313 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7314 & (unsigned HOST_WIDE_INT) 0xffffffff);
7315 int cost, const_ok = const_ok_for_arm (i);
7316 int j, booth_unit_size;
7318 /* Tune as appropriate. */
7319 cost = const_ok ? 4 : 8;
7320 booth_unit_size = 2;
7321 for (j = 0; i && j < 32; j += booth_unit_size)
7323 i >>= booth_unit_size;
7324 cost++;
7327 *total = COSTS_N_INSNS (cost);
7328 *total += rtx_cost (XEXP (x, 0), code, speed);
7329 return true;
7332 *total = COSTS_N_INSNS (20);
7333 return false;
7335 default:
7336 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7341 /* RTX cost for cores with a fast multiply unit (M variants). */
7343 static bool
7344 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7345 int *total, bool speed)
7347 enum machine_mode mode = GET_MODE (x);
7349 if (TARGET_THUMB1)
7351 *total = thumb1_rtx_costs (x, code, outer_code);
7352 return true;
7355 /* ??? should thumb2 use different costs? */
7356 switch (code)
7358 case MULT:
7359 /* There is no point basing this on the tuning, since it is always the
7360 fast variant if it exists at all. */
7361 if (mode == DImode
7362 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7363 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7364 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7366 *total = COSTS_N_INSNS(2);
7367 return false;
7371 if (mode == DImode)
7373 *total = COSTS_N_INSNS (5);
7374 return false;
7377 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7379 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7380 & (unsigned HOST_WIDE_INT) 0xffffffff);
7381 int cost, const_ok = const_ok_for_arm (i);
7382 int j, booth_unit_size;
7384 /* Tune as appropriate. */
7385 cost = const_ok ? 4 : 8;
7386 booth_unit_size = 8;
7387 for (j = 0; i && j < 32; j += booth_unit_size)
7389 i >>= booth_unit_size;
7390 cost++;
7393 *total = COSTS_N_INSNS(cost);
7394 return false;
7397 if (mode == SImode)
7399 *total = COSTS_N_INSNS (4);
7400 return false;
7403 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7405 if (TARGET_HARD_FLOAT
7406 && (mode == SFmode
7407 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7409 *total = COSTS_N_INSNS (1);
7410 return false;
7414 /* Requires a lib call */
7415 *total = COSTS_N_INSNS (20);
7416 return false;
7418 default:
7419 return arm_rtx_costs_1 (x, outer_code, total, speed);
7424 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7425 so it can be ignored. */
7427 static bool
7428 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7429 int *total, bool speed)
7431 enum machine_mode mode = GET_MODE (x);
7433 if (TARGET_THUMB)
7435 *total = thumb1_rtx_costs (x, code, outer_code);
7436 return true;
7439 switch (code)
7441 case COMPARE:
7442 if (GET_CODE (XEXP (x, 0)) != MULT)
7443 return arm_rtx_costs_1 (x, outer_code, total, speed);
7445 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7446 will stall until the multiplication is complete. */
7447 *total = COSTS_N_INSNS (3);
7448 return false;
7450 case MULT:
7451 /* There is no point basing this on the tuning, since it is always the
7452 fast variant if it exists at all. */
7453 if (mode == DImode
7454 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7455 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7456 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7458 *total = COSTS_N_INSNS (2);
7459 return false;
7463 if (mode == DImode)
7465 *total = COSTS_N_INSNS (5);
7466 return false;
7469 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7471 /* If operand 1 is a constant we can more accurately
7472 calculate the cost of the multiply. The multiplier can
7473 retire 15 bits on the first cycle and a further 12 on the
7474 second. We do, of course, have to load the constant into
7475 a register first. */
7476 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7477 /* There's a general overhead of one cycle. */
7478 int cost = 1;
7479 unsigned HOST_WIDE_INT masked_const;
7481 if (i & 0x80000000)
7482 i = ~i;
7484 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7486 masked_const = i & 0xffff8000;
7487 if (masked_const != 0)
7489 cost++;
7490 masked_const = i & 0xf8000000;
7491 if (masked_const != 0)
7492 cost++;
7494 *total = COSTS_N_INSNS (cost);
7495 return false;
7498 if (mode == SImode)
7500 *total = COSTS_N_INSNS (3);
7501 return false;
7504 /* Requires a lib call */
7505 *total = COSTS_N_INSNS (20);
7506 return false;
7508 default:
7509 return arm_rtx_costs_1 (x, outer_code, total, speed);
7514 /* RTX costs for 9e (and later) cores. */
7516 static bool
7517 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7518 int *total, bool speed)
7520 enum machine_mode mode = GET_MODE (x);
7522 if (TARGET_THUMB1)
7524 switch (code)
7526 case MULT:
7527 *total = COSTS_N_INSNS (3);
7528 return true;
7530 default:
7531 *total = thumb1_rtx_costs (x, code, outer_code);
7532 return true;
7536 switch (code)
7538 case MULT:
7539 /* There is no point basing this on the tuning, since it is always the
7540 fast variant if it exists at all. */
7541 if (mode == DImode
7542 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7543 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7544 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7546 *total = COSTS_N_INSNS (2);
7547 return false;
7551 if (mode == DImode)
7553 *total = COSTS_N_INSNS (5);
7554 return false;
7557 if (mode == SImode)
7559 *total = COSTS_N_INSNS (2);
7560 return false;
7563 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7565 if (TARGET_HARD_FLOAT
7566 && (mode == SFmode
7567 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7569 *total = COSTS_N_INSNS (1);
7570 return false;
7574 *total = COSTS_N_INSNS (20);
7575 return false;
7577 default:
7578 return arm_rtx_costs_1 (x, outer_code, total, speed);
7581 /* All address computations that can be done are free, but rtx cost returns
7582 the same for practically all of them. So we weight the different types
7583 of address here in the order (most pref first):
7584 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7585 static inline int
7586 arm_arm_address_cost (rtx x)
7588 enum rtx_code c = GET_CODE (x);
7590 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7591 return 0;
7592 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7593 return 10;
7595 if (c == PLUS)
7597 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7598 return 2;
7600 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7601 return 3;
7603 return 4;
7606 return 6;
7609 static inline int
7610 arm_thumb_address_cost (rtx x)
7612 enum rtx_code c = GET_CODE (x);
7614 if (c == REG)
7615 return 1;
7616 if (c == PLUS
7617 && GET_CODE (XEXP (x, 0)) == REG
7618 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7619 return 1;
7621 return 2;
7624 static int
7625 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7627 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7630 static int
7631 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7633 rtx i_pat, d_pat;
7635 /* Some true dependencies can have a higher cost depending
7636 on precisely how certain input operands are used. */
7637 if (arm_tune_xscale
7638 && REG_NOTE_KIND (link) == 0
7639 && recog_memoized (insn) >= 0
7640 && recog_memoized (dep) >= 0)
7642 int shift_opnum = get_attr_shift (insn);
7643 enum attr_type attr_type = get_attr_type (dep);
7645 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7646 operand for INSN. If we have a shifted input operand and the
7647 instruction we depend on is another ALU instruction, then we may
7648 have to account for an additional stall. */
7649 if (shift_opnum != 0
7650 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7652 rtx shifted_operand;
7653 int opno;
7655 /* Get the shifted operand. */
7656 extract_insn (insn);
7657 shifted_operand = recog_data.operand[shift_opnum];
7659 /* Iterate over all the operands in DEP. If we write an operand
7660 that overlaps with SHIFTED_OPERAND, then we have increase the
7661 cost of this dependency. */
7662 extract_insn (dep);
7663 preprocess_constraints ();
7664 for (opno = 0; opno < recog_data.n_operands; opno++)
7666 /* We can ignore strict inputs. */
7667 if (recog_data.operand_type[opno] == OP_IN)
7668 continue;
7670 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7671 shifted_operand))
7672 return 2;
7677 /* XXX This is not strictly true for the FPA. */
7678 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7679 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7680 return 0;
7682 /* Call insns don't incur a stall, even if they follow a load. */
7683 if (REG_NOTE_KIND (link) == 0
7684 && GET_CODE (insn) == CALL_INSN)
7685 return 1;
7687 if ((i_pat = single_set (insn)) != NULL
7688 && GET_CODE (SET_SRC (i_pat)) == MEM
7689 && (d_pat = single_set (dep)) != NULL
7690 && GET_CODE (SET_DEST (d_pat)) == MEM)
7692 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7693 /* This is a load after a store, there is no conflict if the load reads
7694 from a cached area. Assume that loads from the stack, and from the
7695 constant pool are cached, and that others will miss. This is a
7696 hack. */
7698 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7699 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7700 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7701 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7702 return 1;
7705 return cost;
7708 static int fp_consts_inited = 0;
7710 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7711 static const char * const strings_fp[8] =
7713 "0", "1", "2", "3",
7714 "4", "5", "0.5", "10"
7717 static REAL_VALUE_TYPE values_fp[8];
7719 static void
7720 init_fp_table (void)
7722 int i;
7723 REAL_VALUE_TYPE r;
7725 if (TARGET_VFP)
7726 fp_consts_inited = 1;
7727 else
7728 fp_consts_inited = 8;
7730 for (i = 0; i < fp_consts_inited; i++)
7732 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7733 values_fp[i] = r;
7737 /* Return TRUE if rtx X is a valid immediate FP constant. */
7739 arm_const_double_rtx (rtx x)
7741 REAL_VALUE_TYPE r;
7742 int i;
7744 if (!fp_consts_inited)
7745 init_fp_table ();
7747 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7748 if (REAL_VALUE_MINUS_ZERO (r))
7749 return 0;
7751 for (i = 0; i < fp_consts_inited; i++)
7752 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7753 return 1;
7755 return 0;
7758 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7760 neg_const_double_rtx_ok_for_fpa (rtx x)
7762 REAL_VALUE_TYPE r;
7763 int i;
7765 if (!fp_consts_inited)
7766 init_fp_table ();
7768 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7769 r = real_value_negate (&r);
7770 if (REAL_VALUE_MINUS_ZERO (r))
7771 return 0;
7773 for (i = 0; i < 8; i++)
7774 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7775 return 1;
7777 return 0;
7781 /* VFPv3 has a fairly wide range of representable immediates, formed from
7782 "quarter-precision" floating-point values. These can be evaluated using this
7783 formula (with ^ for exponentiation):
7785 -1^s * n * 2^-r
7787 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7788 16 <= n <= 31 and 0 <= r <= 7.
7790 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7792 - A (most-significant) is the sign bit.
7793 - BCD are the exponent (encoded as r XOR 3).
7794 - EFGH are the mantissa (encoded as n - 16).
7797 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7798 fconst[sd] instruction, or -1 if X isn't suitable. */
7799 static int
7800 vfp3_const_double_index (rtx x)
7802 REAL_VALUE_TYPE r, m;
7803 int sign, exponent;
7804 unsigned HOST_WIDE_INT mantissa, mant_hi;
7805 unsigned HOST_WIDE_INT mask;
7806 HOST_WIDE_INT m1, m2;
7807 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7809 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7810 return -1;
7812 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7814 /* We can't represent these things, so detect them first. */
7815 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7816 return -1;
7818 /* Extract sign, exponent and mantissa. */
7819 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7820 r = real_value_abs (&r);
7821 exponent = REAL_EXP (&r);
7822 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7823 highest (sign) bit, with a fixed binary point at bit point_pos.
7824 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7825 bits for the mantissa, this may fail (low bits would be lost). */
7826 real_ldexp (&m, &r, point_pos - exponent);
7827 REAL_VALUE_TO_INT (&m1, &m2, m);
7828 mantissa = m1;
7829 mant_hi = m2;
7831 /* If there are bits set in the low part of the mantissa, we can't
7832 represent this value. */
7833 if (mantissa != 0)
7834 return -1;
7836 /* Now make it so that mantissa contains the most-significant bits, and move
7837 the point_pos to indicate that the least-significant bits have been
7838 discarded. */
7839 point_pos -= HOST_BITS_PER_WIDE_INT;
7840 mantissa = mant_hi;
7842 /* We can permit four significant bits of mantissa only, plus a high bit
7843 which is always 1. */
7844 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7845 if ((mantissa & mask) != 0)
7846 return -1;
7848 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7849 mantissa >>= point_pos - 5;
7851 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7852 floating-point immediate zero with Neon using an integer-zero load, but
7853 that case is handled elsewhere.) */
7854 if (mantissa == 0)
7855 return -1;
7857 gcc_assert (mantissa >= 16 && mantissa <= 31);
7859 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7860 normalized significands are in the range [1, 2). (Our mantissa is shifted
7861 left 4 places at this point relative to normalized IEEE754 values). GCC
7862 internally uses [0.5, 1) (see real.c), so the exponent returned from
7863 REAL_EXP must be altered. */
7864 exponent = 5 - exponent;
7866 if (exponent < 0 || exponent > 7)
7867 return -1;
7869 /* Sign, mantissa and exponent are now in the correct form to plug into the
7870 formula described in the comment above. */
7871 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7874 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7876 vfp3_const_double_rtx (rtx x)
7878 if (!TARGET_VFP3)
7879 return 0;
7881 return vfp3_const_double_index (x) != -1;
7884 /* Recognize immediates which can be used in various Neon instructions. Legal
7885 immediates are described by the following table (for VMVN variants, the
7886 bitwise inverse of the constant shown is recognized. In either case, VMOV
7887 is output and the correct instruction to use for a given constant is chosen
7888 by the assembler). The constant shown is replicated across all elements of
7889 the destination vector.
7891 insn elems variant constant (binary)
7892 ---- ----- ------- -----------------
7893 vmov i32 0 00000000 00000000 00000000 abcdefgh
7894 vmov i32 1 00000000 00000000 abcdefgh 00000000
7895 vmov i32 2 00000000 abcdefgh 00000000 00000000
7896 vmov i32 3 abcdefgh 00000000 00000000 00000000
7897 vmov i16 4 00000000 abcdefgh
7898 vmov i16 5 abcdefgh 00000000
7899 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7900 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7901 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7902 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7903 vmvn i16 10 00000000 abcdefgh
7904 vmvn i16 11 abcdefgh 00000000
7905 vmov i32 12 00000000 00000000 abcdefgh 11111111
7906 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7907 vmov i32 14 00000000 abcdefgh 11111111 11111111
7908 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7909 vmov i8 16 abcdefgh
7910 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7911 eeeeeeee ffffffff gggggggg hhhhhhhh
7912 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7914 For case 18, B = !b. Representable values are exactly those accepted by
7915 vfp3_const_double_index, but are output as floating-point numbers rather
7916 than indices.
7918 Variants 0-5 (inclusive) may also be used as immediates for the second
7919 operand of VORR/VBIC instructions.
7921 The INVERSE argument causes the bitwise inverse of the given operand to be
7922 recognized instead (used for recognizing legal immediates for the VAND/VORN
7923 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7924 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7925 output, rather than the real insns vbic/vorr).
7927 INVERSE makes no difference to the recognition of float vectors.
7929 The return value is the variant of immediate as shown in the above table, or
7930 -1 if the given value doesn't match any of the listed patterns.
7932 static int
7933 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7934 rtx *modconst, int *elementwidth)
7936 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7937 matches = 1; \
7938 for (i = 0; i < idx; i += (STRIDE)) \
7939 if (!(TEST)) \
7940 matches = 0; \
7941 if (matches) \
7943 immtype = (CLASS); \
7944 elsize = (ELSIZE); \
7945 break; \
7948 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7949 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7950 unsigned char bytes[16];
7951 int immtype = -1, matches;
7952 unsigned int invmask = inverse ? 0xff : 0;
7954 /* Vectors of float constants. */
7955 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7957 rtx el0 = CONST_VECTOR_ELT (op, 0);
7958 REAL_VALUE_TYPE r0;
7960 if (!vfp3_const_double_rtx (el0))
7961 return -1;
7963 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7965 for (i = 1; i < n_elts; i++)
7967 rtx elt = CONST_VECTOR_ELT (op, i);
7968 REAL_VALUE_TYPE re;
7970 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
7972 if (!REAL_VALUES_EQUAL (r0, re))
7973 return -1;
7976 if (modconst)
7977 *modconst = CONST_VECTOR_ELT (op, 0);
7979 if (elementwidth)
7980 *elementwidth = 0;
7982 return 18;
7985 /* Splat vector constant out into a byte vector. */
7986 for (i = 0; i < n_elts; i++)
7988 rtx el = CONST_VECTOR_ELT (op, i);
7989 unsigned HOST_WIDE_INT elpart;
7990 unsigned int part, parts;
7992 if (GET_CODE (el) == CONST_INT)
7994 elpart = INTVAL (el);
7995 parts = 1;
7997 else if (GET_CODE (el) == CONST_DOUBLE)
7999 elpart = CONST_DOUBLE_LOW (el);
8000 parts = 2;
8002 else
8003 gcc_unreachable ();
8005 for (part = 0; part < parts; part++)
8007 unsigned int byte;
8008 for (byte = 0; byte < innersize; byte++)
8010 bytes[idx++] = (elpart & 0xff) ^ invmask;
8011 elpart >>= BITS_PER_UNIT;
8013 if (GET_CODE (el) == CONST_DOUBLE)
8014 elpart = CONST_DOUBLE_HIGH (el);
8018 /* Sanity check. */
8019 gcc_assert (idx == GET_MODE_SIZE (mode));
8023 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8024 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8026 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8027 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8029 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8030 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8032 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8033 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8035 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8037 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8039 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8040 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8042 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8043 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8045 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8046 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8048 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8049 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8051 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8053 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8055 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8056 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8058 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8059 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8061 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8062 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8064 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8065 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8067 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8069 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8070 && bytes[i] == bytes[(i + 8) % idx]);
8072 while (0);
8074 if (immtype == -1)
8075 return -1;
8077 if (elementwidth)
8078 *elementwidth = elsize;
8080 if (modconst)
8082 unsigned HOST_WIDE_INT imm = 0;
8084 /* Un-invert bytes of recognized vector, if necessary. */
8085 if (invmask != 0)
8086 for (i = 0; i < idx; i++)
8087 bytes[i] ^= invmask;
8089 if (immtype == 17)
8091 /* FIXME: Broken on 32-bit H_W_I hosts. */
8092 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8094 for (i = 0; i < 8; i++)
8095 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8096 << (i * BITS_PER_UNIT);
8098 *modconst = GEN_INT (imm);
8100 else
8102 unsigned HOST_WIDE_INT imm = 0;
8104 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8105 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8107 *modconst = GEN_INT (imm);
8111 return immtype;
8112 #undef CHECK
8115 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8116 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8117 float elements), and a modified constant (whatever should be output for a
8118 VMOV) in *MODCONST. */
8121 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8122 rtx *modconst, int *elementwidth)
8124 rtx tmpconst;
8125 int tmpwidth;
8126 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8128 if (retval == -1)
8129 return 0;
8131 if (modconst)
8132 *modconst = tmpconst;
8134 if (elementwidth)
8135 *elementwidth = tmpwidth;
8137 return 1;
8140 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8141 the immediate is valid, write a constant suitable for using as an operand
8142 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8143 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8146 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8147 rtx *modconst, int *elementwidth)
8149 rtx tmpconst;
8150 int tmpwidth;
8151 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8153 if (retval < 0 || retval > 5)
8154 return 0;
8156 if (modconst)
8157 *modconst = tmpconst;
8159 if (elementwidth)
8160 *elementwidth = tmpwidth;
8162 return 1;
8165 /* Return a string suitable for output of Neon immediate logic operation
8166 MNEM. */
8168 char *
8169 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8170 int inverse, int quad)
8172 int width, is_valid;
8173 static char templ[40];
8175 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8177 gcc_assert (is_valid != 0);
8179 if (quad)
8180 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8181 else
8182 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8184 return templ;
8187 /* Output a sequence of pairwise operations to implement a reduction.
8188 NOTE: We do "too much work" here, because pairwise operations work on two
8189 registers-worth of operands in one go. Unfortunately we can't exploit those
8190 extra calculations to do the full operation in fewer steps, I don't think.
8191 Although all vector elements of the result but the first are ignored, we
8192 actually calculate the same result in each of the elements. An alternative
8193 such as initially loading a vector with zero to use as each of the second
8194 operands would use up an additional register and take an extra instruction,
8195 for no particular gain. */
8197 void
8198 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8199 rtx (*reduc) (rtx, rtx, rtx))
8201 enum machine_mode inner = GET_MODE_INNER (mode);
8202 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8203 rtx tmpsum = op1;
8205 for (i = parts / 2; i >= 1; i /= 2)
8207 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8208 emit_insn (reduc (dest, tmpsum, tmpsum));
8209 tmpsum = dest;
8213 /* If VALS is a vector constant that can be loaded into a register
8214 using VDUP, generate instructions to do so and return an RTX to
8215 assign to the register. Otherwise return NULL_RTX. */
8217 static rtx
8218 neon_vdup_constant (rtx vals)
8220 enum machine_mode mode = GET_MODE (vals);
8221 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8222 int n_elts = GET_MODE_NUNITS (mode);
8223 bool all_same = true;
8224 rtx x;
8225 int i;
8227 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8228 return NULL_RTX;
8230 for (i = 0; i < n_elts; ++i)
8232 x = XVECEXP (vals, 0, i);
8233 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8234 all_same = false;
8237 if (!all_same)
8238 /* The elements are not all the same. We could handle repeating
8239 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8240 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8241 vdup.i16). */
8242 return NULL_RTX;
8244 /* We can load this constant by using VDUP and a constant in a
8245 single ARM register. This will be cheaper than a vector
8246 load. */
8248 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8249 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8250 UNSPEC_VDUP_N);
8253 /* Generate code to load VALS, which is a PARALLEL containing only
8254 constants (for vec_init) or CONST_VECTOR, efficiently into a
8255 register. Returns an RTX to copy into the register, or NULL_RTX
8256 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8259 neon_make_constant (rtx vals)
8261 enum machine_mode mode = GET_MODE (vals);
8262 rtx target;
8263 rtx const_vec = NULL_RTX;
8264 int n_elts = GET_MODE_NUNITS (mode);
8265 int n_const = 0;
8266 int i;
8268 if (GET_CODE (vals) == CONST_VECTOR)
8269 const_vec = vals;
8270 else if (GET_CODE (vals) == PARALLEL)
8272 /* A CONST_VECTOR must contain only CONST_INTs and
8273 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8274 Only store valid constants in a CONST_VECTOR. */
8275 for (i = 0; i < n_elts; ++i)
8277 rtx x = XVECEXP (vals, 0, i);
8278 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8279 n_const++;
8281 if (n_const == n_elts)
8282 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8284 else
8285 gcc_unreachable ();
8287 if (const_vec != NULL
8288 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8289 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8290 return const_vec;
8291 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8292 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8293 pipeline cycle; creating the constant takes one or two ARM
8294 pipeline cycles. */
8295 return target;
8296 else if (const_vec != NULL_RTX)
8297 /* Load from constant pool. On Cortex-A8 this takes two cycles
8298 (for either double or quad vectors). We can not take advantage
8299 of single-cycle VLD1 because we need a PC-relative addressing
8300 mode. */
8301 return const_vec;
8302 else
8303 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8304 We can not construct an initializer. */
8305 return NULL_RTX;
8308 /* Initialize vector TARGET to VALS. */
8310 void
8311 neon_expand_vector_init (rtx target, rtx vals)
8313 enum machine_mode mode = GET_MODE (target);
8314 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8315 int n_elts = GET_MODE_NUNITS (mode);
8316 int n_var = 0, one_var = -1;
8317 bool all_same = true;
8318 rtx x, mem;
8319 int i;
8321 for (i = 0; i < n_elts; ++i)
8323 x = XVECEXP (vals, 0, i);
8324 if (!CONSTANT_P (x))
8325 ++n_var, one_var = i;
8327 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8328 all_same = false;
8331 if (n_var == 0)
8333 rtx constant = neon_make_constant (vals);
8334 if (constant != NULL_RTX)
8336 emit_move_insn (target, constant);
8337 return;
8341 /* Splat a single non-constant element if we can. */
8342 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8344 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8345 emit_insn (gen_rtx_SET (VOIDmode, target,
8346 gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8347 UNSPEC_VDUP_N)));
8348 return;
8351 /* One field is non-constant. Load constant then overwrite varying
8352 field. This is more efficient than using the stack. */
8353 if (n_var == 1)
8355 rtx copy = copy_rtx (vals);
8356 rtvec ops;
8358 /* Load constant part of vector, substitute neighboring value for
8359 varying element. */
8360 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8361 neon_expand_vector_init (target, copy);
8363 /* Insert variable. */
8364 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8365 ops = gen_rtvec (3, x, target, GEN_INT (one_var));
8366 emit_insn (gen_rtx_SET (VOIDmode, target,
8367 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
8368 return;
8371 /* Construct the vector in memory one field at a time
8372 and load the whole vector. */
8373 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8374 for (i = 0; i < n_elts; i++)
8375 emit_move_insn (adjust_address_nv (mem, inner_mode,
8376 i * GET_MODE_SIZE (inner_mode)),
8377 XVECEXP (vals, 0, i));
8378 emit_move_insn (target, mem);
8381 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8382 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8383 reported source locations are bogus. */
8385 static void
8386 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8387 const char *err)
8389 HOST_WIDE_INT lane;
8391 gcc_assert (GET_CODE (operand) == CONST_INT);
8393 lane = INTVAL (operand);
8395 if (lane < low || lane >= high)
8396 error (err);
8399 /* Bounds-check lanes. */
8401 void
8402 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8404 bounds_check (operand, low, high, "lane out of range");
8407 /* Bounds-check constants. */
8409 void
8410 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8412 bounds_check (operand, low, high, "constant out of range");
8415 HOST_WIDE_INT
8416 neon_element_bits (enum machine_mode mode)
8418 if (mode == DImode)
8419 return GET_MODE_BITSIZE (mode);
8420 else
8421 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8425 /* Predicates for `match_operand' and `match_operator'. */
8427 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8429 cirrus_memory_offset (rtx op)
8431 /* Reject eliminable registers. */
8432 if (! (reload_in_progress || reload_completed)
8433 && ( reg_mentioned_p (frame_pointer_rtx, op)
8434 || reg_mentioned_p (arg_pointer_rtx, op)
8435 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8436 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8437 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8438 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8439 return 0;
8441 if (GET_CODE (op) == MEM)
8443 rtx ind;
8445 ind = XEXP (op, 0);
8447 /* Match: (mem (reg)). */
8448 if (GET_CODE (ind) == REG)
8449 return 1;
8451 /* Match:
8452 (mem (plus (reg)
8453 (const))). */
8454 if (GET_CODE (ind) == PLUS
8455 && GET_CODE (XEXP (ind, 0)) == REG
8456 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8457 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8458 return 1;
8461 return 0;
8464 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8465 WB is true if full writeback address modes are allowed and is false
8466 if limited writeback address modes (POST_INC and PRE_DEC) are
8467 allowed. */
8470 arm_coproc_mem_operand (rtx op, bool wb)
8472 rtx ind;
8474 /* Reject eliminable registers. */
8475 if (! (reload_in_progress || reload_completed)
8476 && ( reg_mentioned_p (frame_pointer_rtx, op)
8477 || reg_mentioned_p (arg_pointer_rtx, op)
8478 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8479 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8480 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8481 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8482 return FALSE;
8484 /* Constants are converted into offsets from labels. */
8485 if (GET_CODE (op) != MEM)
8486 return FALSE;
8488 ind = XEXP (op, 0);
8490 if (reload_completed
8491 && (GET_CODE (ind) == LABEL_REF
8492 || (GET_CODE (ind) == CONST
8493 && GET_CODE (XEXP (ind, 0)) == PLUS
8494 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8495 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8496 return TRUE;
8498 /* Match: (mem (reg)). */
8499 if (GET_CODE (ind) == REG)
8500 return arm_address_register_rtx_p (ind, 0);
8502 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8503 acceptable in any case (subject to verification by
8504 arm_address_register_rtx_p). We need WB to be true to accept
8505 PRE_INC and POST_DEC. */
8506 if (GET_CODE (ind) == POST_INC
8507 || GET_CODE (ind) == PRE_DEC
8508 || (wb
8509 && (GET_CODE (ind) == PRE_INC
8510 || GET_CODE (ind) == POST_DEC)))
8511 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8513 if (wb
8514 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8515 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8516 && GET_CODE (XEXP (ind, 1)) == PLUS
8517 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8518 ind = XEXP (ind, 1);
8520 /* Match:
8521 (plus (reg)
8522 (const)). */
8523 if (GET_CODE (ind) == PLUS
8524 && GET_CODE (XEXP (ind, 0)) == REG
8525 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8526 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8527 && INTVAL (XEXP (ind, 1)) > -1024
8528 && INTVAL (XEXP (ind, 1)) < 1024
8529 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8530 return TRUE;
8532 return FALSE;
8535 /* Return TRUE if OP is a memory operand which we can load or store a vector
8536 to/from. TYPE is one of the following values:
8537 0 - Vector load/stor (vldr)
8538 1 - Core registers (ldm)
8539 2 - Element/structure loads (vld1)
8542 neon_vector_mem_operand (rtx op, int type)
8544 rtx ind;
8546 /* Reject eliminable registers. */
8547 if (! (reload_in_progress || reload_completed)
8548 && ( reg_mentioned_p (frame_pointer_rtx, op)
8549 || reg_mentioned_p (arg_pointer_rtx, op)
8550 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8551 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8552 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8553 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8554 return FALSE;
8556 /* Constants are converted into offsets from labels. */
8557 if (GET_CODE (op) != MEM)
8558 return FALSE;
8560 ind = XEXP (op, 0);
8562 if (reload_completed
8563 && (GET_CODE (ind) == LABEL_REF
8564 || (GET_CODE (ind) == CONST
8565 && GET_CODE (XEXP (ind, 0)) == PLUS
8566 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8567 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8568 return TRUE;
8570 /* Match: (mem (reg)). */
8571 if (GET_CODE (ind) == REG)
8572 return arm_address_register_rtx_p (ind, 0);
8574 /* Allow post-increment with Neon registers. */
8575 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8576 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8578 /* FIXME: vld1 allows register post-modify. */
8580 /* Match:
8581 (plus (reg)
8582 (const)). */
8583 if (type == 0
8584 && GET_CODE (ind) == PLUS
8585 && GET_CODE (XEXP (ind, 0)) == REG
8586 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8587 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8588 && INTVAL (XEXP (ind, 1)) > -1024
8589 && INTVAL (XEXP (ind, 1)) < 1016
8590 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8591 return TRUE;
8593 return FALSE;
8596 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8597 type. */
8599 neon_struct_mem_operand (rtx op)
8601 rtx ind;
8603 /* Reject eliminable registers. */
8604 if (! (reload_in_progress || reload_completed)
8605 && ( reg_mentioned_p (frame_pointer_rtx, op)
8606 || reg_mentioned_p (arg_pointer_rtx, op)
8607 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8608 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8609 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8610 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8611 return FALSE;
8613 /* Constants are converted into offsets from labels. */
8614 if (GET_CODE (op) != MEM)
8615 return FALSE;
8617 ind = XEXP (op, 0);
8619 if (reload_completed
8620 && (GET_CODE (ind) == LABEL_REF
8621 || (GET_CODE (ind) == CONST
8622 && GET_CODE (XEXP (ind, 0)) == PLUS
8623 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8624 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8625 return TRUE;
8627 /* Match: (mem (reg)). */
8628 if (GET_CODE (ind) == REG)
8629 return arm_address_register_rtx_p (ind, 0);
8631 return FALSE;
8634 /* Return true if X is a register that will be eliminated later on. */
8636 arm_eliminable_register (rtx x)
8638 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8639 || REGNO (x) == ARG_POINTER_REGNUM
8640 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8641 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8644 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8645 coprocessor registers. Otherwise return NO_REGS. */
8647 enum reg_class
8648 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8650 if (mode == HFmode)
8652 if (!TARGET_NEON_FP16)
8653 return GENERAL_REGS;
8654 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8655 return NO_REGS;
8656 return GENERAL_REGS;
8659 if (TARGET_NEON
8660 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8661 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8662 && neon_vector_mem_operand (x, 0))
8663 return NO_REGS;
8665 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8666 return NO_REGS;
8668 return GENERAL_REGS;
8671 /* Values which must be returned in the most-significant end of the return
8672 register. */
8674 static bool
8675 arm_return_in_msb (const_tree valtype)
8677 return (TARGET_AAPCS_BASED
8678 && BYTES_BIG_ENDIAN
8679 && (AGGREGATE_TYPE_P (valtype)
8680 || TREE_CODE (valtype) == COMPLEX_TYPE));
8683 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8684 Use by the Cirrus Maverick code which has to workaround
8685 a hardware bug triggered by such instructions. */
8686 static bool
8687 arm_memory_load_p (rtx insn)
8689 rtx body, lhs, rhs;;
8691 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8692 return false;
8694 body = PATTERN (insn);
8696 if (GET_CODE (body) != SET)
8697 return false;
8699 lhs = XEXP (body, 0);
8700 rhs = XEXP (body, 1);
8702 lhs = REG_OR_SUBREG_RTX (lhs);
8704 /* If the destination is not a general purpose
8705 register we do not have to worry. */
8706 if (GET_CODE (lhs) != REG
8707 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8708 return false;
8710 /* As well as loads from memory we also have to react
8711 to loads of invalid constants which will be turned
8712 into loads from the minipool. */
8713 return (GET_CODE (rhs) == MEM
8714 || GET_CODE (rhs) == SYMBOL_REF
8715 || note_invalid_constants (insn, -1, false));
8718 /* Return TRUE if INSN is a Cirrus instruction. */
8719 static bool
8720 arm_cirrus_insn_p (rtx insn)
8722 enum attr_cirrus attr;
8724 /* get_attr cannot accept USE or CLOBBER. */
8725 if (!insn
8726 || GET_CODE (insn) != INSN
8727 || GET_CODE (PATTERN (insn)) == USE
8728 || GET_CODE (PATTERN (insn)) == CLOBBER)
8729 return 0;
8731 attr = get_attr_cirrus (insn);
8733 return attr != CIRRUS_NOT;
8736 /* Cirrus reorg for invalid instruction combinations. */
8737 static void
8738 cirrus_reorg (rtx first)
8740 enum attr_cirrus attr;
8741 rtx body = PATTERN (first);
8742 rtx t;
8743 int nops;
8745 /* Any branch must be followed by 2 non Cirrus instructions. */
8746 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8748 nops = 0;
8749 t = next_nonnote_insn (first);
8751 if (arm_cirrus_insn_p (t))
8752 ++ nops;
8754 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8755 ++ nops;
8757 while (nops --)
8758 emit_insn_after (gen_nop (), first);
8760 return;
8763 /* (float (blah)) is in parallel with a clobber. */
8764 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8765 body = XVECEXP (body, 0, 0);
8767 if (GET_CODE (body) == SET)
8769 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8771 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8772 be followed by a non Cirrus insn. */
8773 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8775 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8776 emit_insn_after (gen_nop (), first);
8778 return;
8780 else if (arm_memory_load_p (first))
8782 unsigned int arm_regno;
8784 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8785 ldr/cfmv64hr combination where the Rd field is the same
8786 in both instructions must be split with a non Cirrus
8787 insn. Example:
8789 ldr r0, blah
8791 cfmvsr mvf0, r0. */
8793 /* Get Arm register number for ldr insn. */
8794 if (GET_CODE (lhs) == REG)
8795 arm_regno = REGNO (lhs);
8796 else
8798 gcc_assert (GET_CODE (rhs) == REG);
8799 arm_regno = REGNO (rhs);
8802 /* Next insn. */
8803 first = next_nonnote_insn (first);
8805 if (! arm_cirrus_insn_p (first))
8806 return;
8808 body = PATTERN (first);
8810 /* (float (blah)) is in parallel with a clobber. */
8811 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8812 body = XVECEXP (body, 0, 0);
8814 if (GET_CODE (body) == FLOAT)
8815 body = XEXP (body, 0);
8817 if (get_attr_cirrus (first) == CIRRUS_MOVE
8818 && GET_CODE (XEXP (body, 1)) == REG
8819 && arm_regno == REGNO (XEXP (body, 1)))
8820 emit_insn_after (gen_nop (), first);
8822 return;
8826 /* get_attr cannot accept USE or CLOBBER. */
8827 if (!first
8828 || GET_CODE (first) != INSN
8829 || GET_CODE (PATTERN (first)) == USE
8830 || GET_CODE (PATTERN (first)) == CLOBBER)
8831 return;
8833 attr = get_attr_cirrus (first);
8835 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8836 must be followed by a non-coprocessor instruction. */
8837 if (attr == CIRRUS_COMPARE)
8839 nops = 0;
8841 t = next_nonnote_insn (first);
8843 if (arm_cirrus_insn_p (t))
8844 ++ nops;
8846 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8847 ++ nops;
8849 while (nops --)
8850 emit_insn_after (gen_nop (), first);
8852 return;
8856 /* Return TRUE if X references a SYMBOL_REF. */
8858 symbol_mentioned_p (rtx x)
8860 const char * fmt;
8861 int i;
8863 if (GET_CODE (x) == SYMBOL_REF)
8864 return 1;
8866 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8867 are constant offsets, not symbols. */
8868 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8869 return 0;
8871 fmt = GET_RTX_FORMAT (GET_CODE (x));
8873 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8875 if (fmt[i] == 'E')
8877 int j;
8879 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8880 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8881 return 1;
8883 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8884 return 1;
8887 return 0;
8890 /* Return TRUE if X references a LABEL_REF. */
8892 label_mentioned_p (rtx x)
8894 const char * fmt;
8895 int i;
8897 if (GET_CODE (x) == LABEL_REF)
8898 return 1;
8900 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8901 instruction, but they are constant offsets, not symbols. */
8902 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8903 return 0;
8905 fmt = GET_RTX_FORMAT (GET_CODE (x));
8906 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8908 if (fmt[i] == 'E')
8910 int j;
8912 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8913 if (label_mentioned_p (XVECEXP (x, i, j)))
8914 return 1;
8916 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8917 return 1;
8920 return 0;
8924 tls_mentioned_p (rtx x)
8926 switch (GET_CODE (x))
8928 case CONST:
8929 return tls_mentioned_p (XEXP (x, 0));
8931 case UNSPEC:
8932 if (XINT (x, 1) == UNSPEC_TLS)
8933 return 1;
8935 default:
8936 return 0;
8940 /* Must not copy any rtx that uses a pc-relative address. */
8942 static int
8943 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
8945 if (GET_CODE (*x) == UNSPEC
8946 && XINT (*x, 1) == UNSPEC_PIC_BASE)
8947 return 1;
8948 return 0;
8951 static bool
8952 arm_cannot_copy_insn_p (rtx insn)
8954 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
8957 enum rtx_code
8958 minmax_code (rtx x)
8960 enum rtx_code code = GET_CODE (x);
8962 switch (code)
8964 case SMAX:
8965 return GE;
8966 case SMIN:
8967 return LE;
8968 case UMIN:
8969 return LEU;
8970 case UMAX:
8971 return GEU;
8972 default:
8973 gcc_unreachable ();
8977 /* Return 1 if memory locations are adjacent. */
8979 adjacent_mem_locations (rtx a, rtx b)
8981 /* We don't guarantee to preserve the order of these memory refs. */
8982 if (volatile_refs_p (a) || volatile_refs_p (b))
8983 return 0;
8985 if ((GET_CODE (XEXP (a, 0)) == REG
8986 || (GET_CODE (XEXP (a, 0)) == PLUS
8987 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
8988 && (GET_CODE (XEXP (b, 0)) == REG
8989 || (GET_CODE (XEXP (b, 0)) == PLUS
8990 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
8992 HOST_WIDE_INT val0 = 0, val1 = 0;
8993 rtx reg0, reg1;
8994 int val_diff;
8996 if (GET_CODE (XEXP (a, 0)) == PLUS)
8998 reg0 = XEXP (XEXP (a, 0), 0);
8999 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9001 else
9002 reg0 = XEXP (a, 0);
9004 if (GET_CODE (XEXP (b, 0)) == PLUS)
9006 reg1 = XEXP (XEXP (b, 0), 0);
9007 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9009 else
9010 reg1 = XEXP (b, 0);
9012 /* Don't accept any offset that will require multiple
9013 instructions to handle, since this would cause the
9014 arith_adjacentmem pattern to output an overlong sequence. */
9015 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9016 return 0;
9018 /* Don't allow an eliminable register: register elimination can make
9019 the offset too large. */
9020 if (arm_eliminable_register (reg0))
9021 return 0;
9023 val_diff = val1 - val0;
9025 if (arm_ld_sched)
9027 /* If the target has load delay slots, then there's no benefit
9028 to using an ldm instruction unless the offset is zero and
9029 we are optimizing for size. */
9030 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9031 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9032 && (val_diff == 4 || val_diff == -4));
9035 return ((REGNO (reg0) == REGNO (reg1))
9036 && (val_diff == 4 || val_diff == -4));
9039 return 0;
9042 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9043 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9044 instruction. ADD_OFFSET is nonzero if the base address register needs
9045 to be modified with an add instruction before we can use it. */
9047 static bool
9048 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9049 int nops, HOST_WIDE_INT add_offset)
9051 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9052 if the offset isn't small enough. The reason 2 ldrs are faster
9053 is because these ARMs are able to do more than one cache access
9054 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9055 whilst the ARM8 has a double bandwidth cache. This means that
9056 these cores can do both an instruction fetch and a data fetch in
9057 a single cycle, so the trick of calculating the address into a
9058 scratch register (one of the result regs) and then doing a load
9059 multiple actually becomes slower (and no smaller in code size).
9060 That is the transformation
9062 ldr rd1, [rbase + offset]
9063 ldr rd2, [rbase + offset + 4]
9067 add rd1, rbase, offset
9068 ldmia rd1, {rd1, rd2}
9070 produces worse code -- '3 cycles + any stalls on rd2' instead of
9071 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9072 access per cycle, the first sequence could never complete in less
9073 than 6 cycles, whereas the ldm sequence would only take 5 and
9074 would make better use of sequential accesses if not hitting the
9075 cache.
9077 We cheat here and test 'arm_ld_sched' which we currently know to
9078 only be true for the ARM8, ARM9 and StrongARM. If this ever
9079 changes, then the test below needs to be reworked. */
9080 if (nops == 2 && arm_ld_sched && add_offset != 0)
9081 return false;
9083 return true;
9086 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9087 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9088 an array ORDER which describes the sequence to use when accessing the
9089 offsets that produces an ascending order. In this sequence, each
9090 offset must be larger by exactly 4 than the previous one. ORDER[0]
9091 must have been filled in with the lowest offset by the caller.
9092 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9093 we use to verify that ORDER produces an ascending order of registers.
9094 Return true if it was possible to construct such an order, false if
9095 not. */
9097 static bool
9098 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9099 int *unsorted_regs)
9101 int i;
9102 for (i = 1; i < nops; i++)
9104 int j;
9106 order[i] = order[i - 1];
9107 for (j = 0; j < nops; j++)
9108 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9110 /* We must find exactly one offset that is higher than the
9111 previous one by 4. */
9112 if (order[i] != order[i - 1])
9113 return false;
9114 order[i] = j;
9116 if (order[i] == order[i - 1])
9117 return false;
9118 /* The register numbers must be ascending. */
9119 if (unsorted_regs != NULL
9120 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9121 return false;
9123 return true;
9127 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9128 HOST_WIDE_INT *load_offset)
9130 int unsorted_regs[MAX_LDM_STM_OPS];
9131 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9132 int order[MAX_LDM_STM_OPS];
9133 int base_reg = -1;
9134 int i, ldm_case;
9136 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9137 easily extended if required. */
9138 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9140 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9142 /* Loop over the operands and check that the memory references are
9143 suitable (i.e. immediate offsets from the same base register). At
9144 the same time, extract the target register, and the memory
9145 offsets. */
9146 for (i = 0; i < nops; i++)
9148 rtx reg;
9149 rtx offset;
9151 /* Convert a subreg of a mem into the mem itself. */
9152 if (GET_CODE (operands[nops + i]) == SUBREG)
9153 operands[nops + i] = alter_subreg (operands + (nops + i));
9155 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9157 /* Don't reorder volatile memory references; it doesn't seem worth
9158 looking for the case where the order is ok anyway. */
9159 if (MEM_VOLATILE_P (operands[nops + i]))
9160 return 0;
9162 offset = const0_rtx;
9164 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9165 || (GET_CODE (reg) == SUBREG
9166 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9167 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9168 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9169 == REG)
9170 || (GET_CODE (reg) == SUBREG
9171 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9172 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9173 == CONST_INT)))
9175 if (i == 0)
9176 base_reg = REGNO (reg);
9177 else
9179 if (base_reg != (int) REGNO (reg))
9180 /* Not addressed from the same base register. */
9181 return 0;
9183 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9184 ? REGNO (operands[i])
9185 : REGNO (SUBREG_REG (operands[i])));
9187 /* If it isn't an integer register, or if it overwrites the
9188 base register but isn't the last insn in the list, then
9189 we can't do this. */
9190 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9191 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9192 return 0;
9194 unsorted_offsets[i] = INTVAL (offset);
9195 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9196 order[0] = i;
9198 else
9199 /* Not a suitable memory address. */
9200 return 0;
9203 /* All the useful information has now been extracted from the
9204 operands into unsorted_regs and unsorted_offsets; additionally,
9205 order[0] has been set to the lowest offset in the list. Sort
9206 the offsets into order, verifying that they are adjacent, and
9207 check that the register numbers are ascending. */
9208 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9209 return 0;
9211 if (base)
9213 *base = base_reg;
9215 for (i = 0; i < nops; i++)
9216 regs[i] = unsorted_regs[order[i]];
9218 *load_offset = unsorted_offsets[order[0]];
9221 if (unsorted_offsets[order[0]] == 0)
9222 ldm_case = 1; /* ldmia */
9223 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9224 ldm_case = 2; /* ldmib */
9225 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9226 ldm_case = 3; /* ldmda */
9227 else if (unsorted_offsets[order[nops - 1]] == -4)
9228 ldm_case = 4; /* ldmdb */
9229 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9230 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9231 ldm_case = 5;
9232 else
9233 return 0;
9235 if (!multiple_operation_profitable_p (false, nops,
9236 ldm_case == 5
9237 ? unsorted_offsets[order[0]] : 0))
9238 return 0;
9240 return ldm_case;
9243 const char *
9244 emit_ldm_seq (rtx *operands, int nops)
9246 int regs[MAX_LDM_STM_OPS];
9247 int base_reg;
9248 HOST_WIDE_INT offset;
9249 char buf[100];
9250 int i;
9252 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9254 case 1:
9255 strcpy (buf, "ldm%(ia%)\t");
9256 break;
9258 case 2:
9259 strcpy (buf, "ldm%(ib%)\t");
9260 break;
9262 case 3:
9263 strcpy (buf, "ldm%(da%)\t");
9264 break;
9266 case 4:
9267 strcpy (buf, "ldm%(db%)\t");
9268 break;
9270 case 5:
9271 if (offset >= 0)
9272 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9273 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9274 (long) offset);
9275 else
9276 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9277 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9278 (long) -offset);
9279 output_asm_insn (buf, operands);
9280 base_reg = regs[0];
9281 strcpy (buf, "ldm%(ia%)\t");
9282 break;
9284 default:
9285 gcc_unreachable ();
9288 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9289 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9291 for (i = 1; i < nops; i++)
9292 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9293 reg_names[regs[i]]);
9295 strcat (buf, "}\t%@ phole ldm");
9297 output_asm_insn (buf, operands);
9298 return "";
9302 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9303 HOST_WIDE_INT * load_offset)
9305 int unsorted_regs[MAX_LDM_STM_OPS];
9306 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9307 int order[MAX_LDM_STM_OPS];
9308 int base_reg = -1;
9309 int i, stm_case;
9311 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9312 easily extended if required. */
9313 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9315 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9317 /* Loop over the operands and check that the memory references are
9318 suitable (i.e. immediate offsets from the same base register). At
9319 the same time, extract the target register, and the memory
9320 offsets. */
9321 for (i = 0; i < nops; i++)
9323 rtx reg;
9324 rtx offset;
9326 /* Convert a subreg of a mem into the mem itself. */
9327 if (GET_CODE (operands[nops + i]) == SUBREG)
9328 operands[nops + i] = alter_subreg (operands + (nops + i));
9330 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9332 /* Don't reorder volatile memory references; it doesn't seem worth
9333 looking for the case where the order is ok anyway. */
9334 if (MEM_VOLATILE_P (operands[nops + i]))
9335 return 0;
9337 offset = const0_rtx;
9339 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9340 || (GET_CODE (reg) == SUBREG
9341 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9342 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9343 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9344 == REG)
9345 || (GET_CODE (reg) == SUBREG
9346 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9347 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9348 == CONST_INT)))
9350 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9351 ? REGNO (operands[i])
9352 : REGNO (SUBREG_REG (operands[i])));
9353 if (i == 0)
9354 base_reg = REGNO (reg);
9355 else if (base_reg != (int) REGNO (reg))
9356 /* Not addressed from the same base register. */
9357 return 0;
9359 /* If it isn't an integer register, then we can't do this. */
9360 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9361 return 0;
9363 unsorted_offsets[i] = INTVAL (offset);
9364 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9365 order[0] = i;
9367 else
9368 /* Not a suitable memory address. */
9369 return 0;
9372 /* All the useful information has now been extracted from the
9373 operands into unsorted_regs and unsorted_offsets; additionally,
9374 order[0] has been set to the lowest offset in the list. Sort
9375 the offsets into order, verifying that they are adjacent, and
9376 check that the register numbers are ascending. */
9377 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9378 return 0;
9380 if (base)
9382 *base = base_reg;
9384 for (i = 0; i < nops; i++)
9385 regs[i] = unsorted_regs[order[i]];
9387 *load_offset = unsorted_offsets[order[0]];
9390 if (unsorted_offsets[order[0]] == 0)
9391 stm_case = 1; /* stmia */
9392 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9393 stm_case = 2; /* stmib */
9394 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9395 stm_case = 3; /* stmda */
9396 else if (unsorted_offsets[order[nops - 1]] == -4)
9397 stm_case = 4; /* stmdb */
9398 else
9399 return 0;
9401 if (!multiple_operation_profitable_p (false, nops, 0))
9402 return 0;
9404 return stm_case;
9407 const char *
9408 emit_stm_seq (rtx *operands, int nops)
9410 int regs[MAX_LDM_STM_OPS];
9411 int base_reg;
9412 HOST_WIDE_INT offset;
9413 char buf[100];
9414 int i;
9416 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9418 case 1:
9419 strcpy (buf, "stm%(ia%)\t");
9420 break;
9422 case 2:
9423 strcpy (buf, "stm%(ib%)\t");
9424 break;
9426 case 3:
9427 strcpy (buf, "stm%(da%)\t");
9428 break;
9430 case 4:
9431 strcpy (buf, "stm%(db%)\t");
9432 break;
9434 default:
9435 gcc_unreachable ();
9438 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9439 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9441 for (i = 1; i < nops; i++)
9442 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9443 reg_names[regs[i]]);
9445 strcat (buf, "}\t%@ phole stm");
9447 output_asm_insn (buf, operands);
9448 return "";
9451 /* Routines for use in generating RTL. */
9454 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9455 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9457 HOST_WIDE_INT offset = *offsetp;
9458 int i = 0, j;
9459 rtx result;
9460 int sign = up ? 1 : -1;
9461 rtx mem, addr;
9463 /* XScale has load-store double instructions, but they have stricter
9464 alignment requirements than load-store multiple, so we cannot
9465 use them.
9467 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9468 the pipeline until completion.
9470 NREGS CYCLES
9476 An ldr instruction takes 1-3 cycles, but does not block the
9477 pipeline.
9479 NREGS CYCLES
9480 1 1-3
9481 2 2-6
9482 3 3-9
9483 4 4-12
9485 Best case ldr will always win. However, the more ldr instructions
9486 we issue, the less likely we are to be able to schedule them well.
9487 Using ldr instructions also increases code size.
9489 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9490 for counts of 3 or 4 regs. */
9491 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9493 rtx seq;
9495 start_sequence ();
9497 for (i = 0; i < count; i++)
9499 addr = plus_constant (from, i * 4 * sign);
9500 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9501 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9502 offset += 4 * sign;
9505 if (write_back)
9507 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9508 *offsetp = offset;
9511 seq = get_insns ();
9512 end_sequence ();
9514 return seq;
9517 result = gen_rtx_PARALLEL (VOIDmode,
9518 rtvec_alloc (count + (write_back ? 1 : 0)));
9519 if (write_back)
9521 XVECEXP (result, 0, 0)
9522 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9523 i = 1;
9524 count++;
9527 for (j = 0; i < count; i++, j++)
9529 addr = plus_constant (from, j * 4 * sign);
9530 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9531 XVECEXP (result, 0, i)
9532 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9533 offset += 4 * sign;
9536 if (write_back)
9537 *offsetp = offset;
9539 return result;
9543 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9544 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9546 HOST_WIDE_INT offset = *offsetp;
9547 int i = 0, j;
9548 rtx result;
9549 int sign = up ? 1 : -1;
9550 rtx mem, addr;
9552 /* See arm_gen_load_multiple for discussion of
9553 the pros/cons of ldm/stm usage for XScale. */
9554 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9556 rtx seq;
9558 start_sequence ();
9560 for (i = 0; i < count; i++)
9562 addr = plus_constant (to, i * 4 * sign);
9563 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9564 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9565 offset += 4 * sign;
9568 if (write_back)
9570 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9571 *offsetp = offset;
9574 seq = get_insns ();
9575 end_sequence ();
9577 return seq;
9580 result = gen_rtx_PARALLEL (VOIDmode,
9581 rtvec_alloc (count + (write_back ? 1 : 0)));
9582 if (write_back)
9584 XVECEXP (result, 0, 0)
9585 = gen_rtx_SET (VOIDmode, to,
9586 plus_constant (to, count * 4 * sign));
9587 i = 1;
9588 count++;
9591 for (j = 0; i < count; i++, j++)
9593 addr = plus_constant (to, j * 4 * sign);
9594 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9595 XVECEXP (result, 0, i)
9596 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9597 offset += 4 * sign;
9600 if (write_back)
9601 *offsetp = offset;
9603 return result;
9607 arm_gen_movmemqi (rtx *operands)
9609 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9610 HOST_WIDE_INT srcoffset, dstoffset;
9611 int i;
9612 rtx src, dst, srcbase, dstbase;
9613 rtx part_bytes_reg = NULL;
9614 rtx mem;
9616 if (GET_CODE (operands[2]) != CONST_INT
9617 || GET_CODE (operands[3]) != CONST_INT
9618 || INTVAL (operands[2]) > 64
9619 || INTVAL (operands[3]) & 3)
9620 return 0;
9622 dstbase = operands[0];
9623 srcbase = operands[1];
9625 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9626 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9628 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9629 out_words_to_go = INTVAL (operands[2]) / 4;
9630 last_bytes = INTVAL (operands[2]) & 3;
9631 dstoffset = srcoffset = 0;
9633 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9634 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9636 for (i = 0; in_words_to_go >= 2; i+=4)
9638 if (in_words_to_go > 4)
9639 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9640 srcbase, &srcoffset));
9641 else
9642 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9643 FALSE, srcbase, &srcoffset));
9645 if (out_words_to_go)
9647 if (out_words_to_go > 4)
9648 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9649 dstbase, &dstoffset));
9650 else if (out_words_to_go != 1)
9651 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9652 dst, TRUE,
9653 (last_bytes == 0
9654 ? FALSE : TRUE),
9655 dstbase, &dstoffset));
9656 else
9658 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9659 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9660 if (last_bytes != 0)
9662 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9663 dstoffset += 4;
9668 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9669 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9672 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9673 if (out_words_to_go)
9675 rtx sreg;
9677 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9678 sreg = copy_to_reg (mem);
9680 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9681 emit_move_insn (mem, sreg);
9682 in_words_to_go--;
9684 gcc_assert (!in_words_to_go); /* Sanity check */
9687 if (in_words_to_go)
9689 gcc_assert (in_words_to_go > 0);
9691 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9692 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9695 gcc_assert (!last_bytes || part_bytes_reg);
9697 if (BYTES_BIG_ENDIAN && last_bytes)
9699 rtx tmp = gen_reg_rtx (SImode);
9701 /* The bytes we want are in the top end of the word. */
9702 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9703 GEN_INT (8 * (4 - last_bytes))));
9704 part_bytes_reg = tmp;
9706 while (last_bytes)
9708 mem = adjust_automodify_address (dstbase, QImode,
9709 plus_constant (dst, last_bytes - 1),
9710 dstoffset + last_bytes - 1);
9711 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9713 if (--last_bytes)
9715 tmp = gen_reg_rtx (SImode);
9716 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9717 part_bytes_reg = tmp;
9722 else
9724 if (last_bytes > 1)
9726 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9727 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9728 last_bytes -= 2;
9729 if (last_bytes)
9731 rtx tmp = gen_reg_rtx (SImode);
9732 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9733 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9734 part_bytes_reg = tmp;
9735 dstoffset += 2;
9739 if (last_bytes)
9741 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9742 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9746 return 1;
9749 /* Select a dominance comparison mode if possible for a test of the general
9750 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9751 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9752 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9753 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9754 In all cases OP will be either EQ or NE, but we don't need to know which
9755 here. If we are unable to support a dominance comparison we return
9756 CC mode. This will then fail to match for the RTL expressions that
9757 generate this call. */
9758 enum machine_mode
9759 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9761 enum rtx_code cond1, cond2;
9762 int swapped = 0;
9764 /* Currently we will probably get the wrong result if the individual
9765 comparisons are not simple. This also ensures that it is safe to
9766 reverse a comparison if necessary. */
9767 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9768 != CCmode)
9769 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9770 != CCmode))
9771 return CCmode;
9773 /* The if_then_else variant of this tests the second condition if the
9774 first passes, but is true if the first fails. Reverse the first
9775 condition to get a true "inclusive-or" expression. */
9776 if (cond_or == DOM_CC_NX_OR_Y)
9777 cond1 = reverse_condition (cond1);
9779 /* If the comparisons are not equal, and one doesn't dominate the other,
9780 then we can't do this. */
9781 if (cond1 != cond2
9782 && !comparison_dominates_p (cond1, cond2)
9783 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9784 return CCmode;
9786 if (swapped)
9788 enum rtx_code temp = cond1;
9789 cond1 = cond2;
9790 cond2 = temp;
9793 switch (cond1)
9795 case EQ:
9796 if (cond_or == DOM_CC_X_AND_Y)
9797 return CC_DEQmode;
9799 switch (cond2)
9801 case EQ: return CC_DEQmode;
9802 case LE: return CC_DLEmode;
9803 case LEU: return CC_DLEUmode;
9804 case GE: return CC_DGEmode;
9805 case GEU: return CC_DGEUmode;
9806 default: gcc_unreachable ();
9809 case LT:
9810 if (cond_or == DOM_CC_X_AND_Y)
9811 return CC_DLTmode;
9813 switch (cond2)
9815 case LT:
9816 return CC_DLTmode;
9817 case LE:
9818 return CC_DLEmode;
9819 case NE:
9820 return CC_DNEmode;
9821 default:
9822 gcc_unreachable ();
9825 case GT:
9826 if (cond_or == DOM_CC_X_AND_Y)
9827 return CC_DGTmode;
9829 switch (cond2)
9831 case GT:
9832 return CC_DGTmode;
9833 case GE:
9834 return CC_DGEmode;
9835 case NE:
9836 return CC_DNEmode;
9837 default:
9838 gcc_unreachable ();
9841 case LTU:
9842 if (cond_or == DOM_CC_X_AND_Y)
9843 return CC_DLTUmode;
9845 switch (cond2)
9847 case LTU:
9848 return CC_DLTUmode;
9849 case LEU:
9850 return CC_DLEUmode;
9851 case NE:
9852 return CC_DNEmode;
9853 default:
9854 gcc_unreachable ();
9857 case GTU:
9858 if (cond_or == DOM_CC_X_AND_Y)
9859 return CC_DGTUmode;
9861 switch (cond2)
9863 case GTU:
9864 return CC_DGTUmode;
9865 case GEU:
9866 return CC_DGEUmode;
9867 case NE:
9868 return CC_DNEmode;
9869 default:
9870 gcc_unreachable ();
9873 /* The remaining cases only occur when both comparisons are the
9874 same. */
9875 case NE:
9876 gcc_assert (cond1 == cond2);
9877 return CC_DNEmode;
9879 case LE:
9880 gcc_assert (cond1 == cond2);
9881 return CC_DLEmode;
9883 case GE:
9884 gcc_assert (cond1 == cond2);
9885 return CC_DGEmode;
9887 case LEU:
9888 gcc_assert (cond1 == cond2);
9889 return CC_DLEUmode;
9891 case GEU:
9892 gcc_assert (cond1 == cond2);
9893 return CC_DGEUmode;
9895 default:
9896 gcc_unreachable ();
9900 enum machine_mode
9901 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9903 /* All floating point compares return CCFP if it is an equality
9904 comparison, and CCFPE otherwise. */
9905 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9907 switch (op)
9909 case EQ:
9910 case NE:
9911 case UNORDERED:
9912 case ORDERED:
9913 case UNLT:
9914 case UNLE:
9915 case UNGT:
9916 case UNGE:
9917 case UNEQ:
9918 case LTGT:
9919 return CCFPmode;
9921 case LT:
9922 case LE:
9923 case GT:
9924 case GE:
9925 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9926 return CCFPmode;
9927 return CCFPEmode;
9929 default:
9930 gcc_unreachable ();
9934 /* A compare with a shifted operand. Because of canonicalization, the
9935 comparison will have to be swapped when we emit the assembler. */
9936 if (GET_MODE (y) == SImode
9937 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9938 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9939 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9940 || GET_CODE (x) == ROTATERT))
9941 return CC_SWPmode;
9943 /* This operation is performed swapped, but since we only rely on the Z
9944 flag we don't need an additional mode. */
9945 if (GET_MODE (y) == SImode
9946 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9947 && GET_CODE (x) == NEG
9948 && (op == EQ || op == NE))
9949 return CC_Zmode;
9951 /* This is a special case that is used by combine to allow a
9952 comparison of a shifted byte load to be split into a zero-extend
9953 followed by a comparison of the shifted integer (only valid for
9954 equalities and unsigned inequalities). */
9955 if (GET_MODE (x) == SImode
9956 && GET_CODE (x) == ASHIFT
9957 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9958 && GET_CODE (XEXP (x, 0)) == SUBREG
9959 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9960 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9961 && (op == EQ || op == NE
9962 || op == GEU || op == GTU || op == LTU || op == LEU)
9963 && GET_CODE (y) == CONST_INT)
9964 return CC_Zmode;
9966 /* A construct for a conditional compare, if the false arm contains
9967 0, then both conditions must be true, otherwise either condition
9968 must be true. Not all conditions are possible, so CCmode is
9969 returned if it can't be done. */
9970 if (GET_CODE (x) == IF_THEN_ELSE
9971 && (XEXP (x, 2) == const0_rtx
9972 || XEXP (x, 2) == const1_rtx)
9973 && COMPARISON_P (XEXP (x, 0))
9974 && COMPARISON_P (XEXP (x, 1)))
9975 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9976 INTVAL (XEXP (x, 2)));
9978 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
9979 if (GET_CODE (x) == AND
9980 && COMPARISON_P (XEXP (x, 0))
9981 && COMPARISON_P (XEXP (x, 1)))
9982 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9983 DOM_CC_X_AND_Y);
9985 if (GET_CODE (x) == IOR
9986 && COMPARISON_P (XEXP (x, 0))
9987 && COMPARISON_P (XEXP (x, 1)))
9988 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9989 DOM_CC_X_OR_Y);
9991 /* An operation (on Thumb) where we want to test for a single bit.
9992 This is done by shifting that bit up into the top bit of a
9993 scratch register; we can then branch on the sign bit. */
9994 if (TARGET_THUMB1
9995 && GET_MODE (x) == SImode
9996 && (op == EQ || op == NE)
9997 && GET_CODE (x) == ZERO_EXTRACT
9998 && XEXP (x, 1) == const1_rtx)
9999 return CC_Nmode;
10001 /* An operation that sets the condition codes as a side-effect, the
10002 V flag is not set correctly, so we can only use comparisons where
10003 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10004 instead.) */
10005 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10006 if (GET_MODE (x) == SImode
10007 && y == const0_rtx
10008 && (op == EQ || op == NE || op == LT || op == GE)
10009 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10010 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10011 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10012 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10013 || GET_CODE (x) == LSHIFTRT
10014 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10015 || GET_CODE (x) == ROTATERT
10016 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10017 return CC_NOOVmode;
10019 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10020 return CC_Zmode;
10022 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10023 && GET_CODE (x) == PLUS
10024 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10025 return CC_Cmode;
10027 return CCmode;
10030 /* X and Y are two things to compare using CODE. Emit the compare insn and
10031 return the rtx for register 0 in the proper mode. FP means this is a
10032 floating point compare: I don't think that it is needed on the arm. */
10034 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10036 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
10037 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10039 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10041 return cc_reg;
10044 /* Generate a sequence of insns that will generate the correct return
10045 address mask depending on the physical architecture that the program
10046 is running on. */
10048 arm_gen_return_addr_mask (void)
10050 rtx reg = gen_reg_rtx (Pmode);
10052 emit_insn (gen_return_addr_mask (reg));
10053 return reg;
10056 void
10057 arm_reload_in_hi (rtx *operands)
10059 rtx ref = operands[1];
10060 rtx base, scratch;
10061 HOST_WIDE_INT offset = 0;
10063 if (GET_CODE (ref) == SUBREG)
10065 offset = SUBREG_BYTE (ref);
10066 ref = SUBREG_REG (ref);
10069 if (GET_CODE (ref) == REG)
10071 /* We have a pseudo which has been spilt onto the stack; there
10072 are two cases here: the first where there is a simple
10073 stack-slot replacement and a second where the stack-slot is
10074 out of range, or is used as a subreg. */
10075 if (reg_equiv_mem[REGNO (ref)])
10077 ref = reg_equiv_mem[REGNO (ref)];
10078 base = find_replacement (&XEXP (ref, 0));
10080 else
10081 /* The slot is out of range, or was dressed up in a SUBREG. */
10082 base = reg_equiv_address[REGNO (ref)];
10084 else
10085 base = find_replacement (&XEXP (ref, 0));
10087 /* Handle the case where the address is too complex to be offset by 1. */
10088 if (GET_CODE (base) == MINUS
10089 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10091 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10093 emit_set_insn (base_plus, base);
10094 base = base_plus;
10096 else if (GET_CODE (base) == PLUS)
10098 /* The addend must be CONST_INT, or we would have dealt with it above. */
10099 HOST_WIDE_INT hi, lo;
10101 offset += INTVAL (XEXP (base, 1));
10102 base = XEXP (base, 0);
10104 /* Rework the address into a legal sequence of insns. */
10105 /* Valid range for lo is -4095 -> 4095 */
10106 lo = (offset >= 0
10107 ? (offset & 0xfff)
10108 : -((-offset) & 0xfff));
10110 /* Corner case, if lo is the max offset then we would be out of range
10111 once we have added the additional 1 below, so bump the msb into the
10112 pre-loading insn(s). */
10113 if (lo == 4095)
10114 lo &= 0x7ff;
10116 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10117 ^ (HOST_WIDE_INT) 0x80000000)
10118 - (HOST_WIDE_INT) 0x80000000);
10120 gcc_assert (hi + lo == offset);
10122 if (hi != 0)
10124 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10126 /* Get the base address; addsi3 knows how to handle constants
10127 that require more than one insn. */
10128 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10129 base = base_plus;
10130 offset = lo;
10134 /* Operands[2] may overlap operands[0] (though it won't overlap
10135 operands[1]), that's why we asked for a DImode reg -- so we can
10136 use the bit that does not overlap. */
10137 if (REGNO (operands[2]) == REGNO (operands[0]))
10138 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10139 else
10140 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10142 emit_insn (gen_zero_extendqisi2 (scratch,
10143 gen_rtx_MEM (QImode,
10144 plus_constant (base,
10145 offset))));
10146 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10147 gen_rtx_MEM (QImode,
10148 plus_constant (base,
10149 offset + 1))));
10150 if (!BYTES_BIG_ENDIAN)
10151 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10152 gen_rtx_IOR (SImode,
10153 gen_rtx_ASHIFT
10154 (SImode,
10155 gen_rtx_SUBREG (SImode, operands[0], 0),
10156 GEN_INT (8)),
10157 scratch));
10158 else
10159 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10160 gen_rtx_IOR (SImode,
10161 gen_rtx_ASHIFT (SImode, scratch,
10162 GEN_INT (8)),
10163 gen_rtx_SUBREG (SImode, operands[0], 0)));
10166 /* Handle storing a half-word to memory during reload by synthesizing as two
10167 byte stores. Take care not to clobber the input values until after we
10168 have moved them somewhere safe. This code assumes that if the DImode
10169 scratch in operands[2] overlaps either the input value or output address
10170 in some way, then that value must die in this insn (we absolutely need
10171 two scratch registers for some corner cases). */
10172 void
10173 arm_reload_out_hi (rtx *operands)
10175 rtx ref = operands[0];
10176 rtx outval = operands[1];
10177 rtx base, scratch;
10178 HOST_WIDE_INT offset = 0;
10180 if (GET_CODE (ref) == SUBREG)
10182 offset = SUBREG_BYTE (ref);
10183 ref = SUBREG_REG (ref);
10186 if (GET_CODE (ref) == REG)
10188 /* We have a pseudo which has been spilt onto the stack; there
10189 are two cases here: the first where there is a simple
10190 stack-slot replacement and a second where the stack-slot is
10191 out of range, or is used as a subreg. */
10192 if (reg_equiv_mem[REGNO (ref)])
10194 ref = reg_equiv_mem[REGNO (ref)];
10195 base = find_replacement (&XEXP (ref, 0));
10197 else
10198 /* The slot is out of range, or was dressed up in a SUBREG. */
10199 base = reg_equiv_address[REGNO (ref)];
10201 else
10202 base = find_replacement (&XEXP (ref, 0));
10204 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10206 /* Handle the case where the address is too complex to be offset by 1. */
10207 if (GET_CODE (base) == MINUS
10208 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10210 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10212 /* Be careful not to destroy OUTVAL. */
10213 if (reg_overlap_mentioned_p (base_plus, outval))
10215 /* Updating base_plus might destroy outval, see if we can
10216 swap the scratch and base_plus. */
10217 if (!reg_overlap_mentioned_p (scratch, outval))
10219 rtx tmp = scratch;
10220 scratch = base_plus;
10221 base_plus = tmp;
10223 else
10225 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10227 /* Be conservative and copy OUTVAL into the scratch now,
10228 this should only be necessary if outval is a subreg
10229 of something larger than a word. */
10230 /* XXX Might this clobber base? I can't see how it can,
10231 since scratch is known to overlap with OUTVAL, and
10232 must be wider than a word. */
10233 emit_insn (gen_movhi (scratch_hi, outval));
10234 outval = scratch_hi;
10238 emit_set_insn (base_plus, base);
10239 base = base_plus;
10241 else if (GET_CODE (base) == PLUS)
10243 /* The addend must be CONST_INT, or we would have dealt with it above. */
10244 HOST_WIDE_INT hi, lo;
10246 offset += INTVAL (XEXP (base, 1));
10247 base = XEXP (base, 0);
10249 /* Rework the address into a legal sequence of insns. */
10250 /* Valid range for lo is -4095 -> 4095 */
10251 lo = (offset >= 0
10252 ? (offset & 0xfff)
10253 : -((-offset) & 0xfff));
10255 /* Corner case, if lo is the max offset then we would be out of range
10256 once we have added the additional 1 below, so bump the msb into the
10257 pre-loading insn(s). */
10258 if (lo == 4095)
10259 lo &= 0x7ff;
10261 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10262 ^ (HOST_WIDE_INT) 0x80000000)
10263 - (HOST_WIDE_INT) 0x80000000);
10265 gcc_assert (hi + lo == offset);
10267 if (hi != 0)
10269 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10271 /* Be careful not to destroy OUTVAL. */
10272 if (reg_overlap_mentioned_p (base_plus, outval))
10274 /* Updating base_plus might destroy outval, see if we
10275 can swap the scratch and base_plus. */
10276 if (!reg_overlap_mentioned_p (scratch, outval))
10278 rtx tmp = scratch;
10279 scratch = base_plus;
10280 base_plus = tmp;
10282 else
10284 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10286 /* Be conservative and copy outval into scratch now,
10287 this should only be necessary if outval is a
10288 subreg of something larger than a word. */
10289 /* XXX Might this clobber base? I can't see how it
10290 can, since scratch is known to overlap with
10291 outval. */
10292 emit_insn (gen_movhi (scratch_hi, outval));
10293 outval = scratch_hi;
10297 /* Get the base address; addsi3 knows how to handle constants
10298 that require more than one insn. */
10299 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10300 base = base_plus;
10301 offset = lo;
10305 if (BYTES_BIG_ENDIAN)
10307 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10308 plus_constant (base, offset + 1)),
10309 gen_lowpart (QImode, outval)));
10310 emit_insn (gen_lshrsi3 (scratch,
10311 gen_rtx_SUBREG (SImode, outval, 0),
10312 GEN_INT (8)));
10313 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10314 gen_lowpart (QImode, scratch)));
10316 else
10318 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10319 gen_lowpart (QImode, outval)));
10320 emit_insn (gen_lshrsi3 (scratch,
10321 gen_rtx_SUBREG (SImode, outval, 0),
10322 GEN_INT (8)));
10323 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10324 plus_constant (base, offset + 1)),
10325 gen_lowpart (QImode, scratch)));
10329 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10330 (padded to the size of a word) should be passed in a register. */
10332 static bool
10333 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10335 if (TARGET_AAPCS_BASED)
10336 return must_pass_in_stack_var_size (mode, type);
10337 else
10338 return must_pass_in_stack_var_size_or_pad (mode, type);
10342 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10343 Return true if an argument passed on the stack should be padded upwards,
10344 i.e. if the least-significant byte has useful data.
10345 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10346 aggregate types are placed in the lowest memory address. */
10348 bool
10349 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10351 if (!TARGET_AAPCS_BASED)
10352 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10354 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10355 return false;
10357 return true;
10361 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10362 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10363 byte of the register has useful data, and return the opposite if the
10364 most significant byte does.
10365 For AAPCS, small aggregates and small complex types are always padded
10366 upwards. */
10368 bool
10369 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10370 tree type, int first ATTRIBUTE_UNUSED)
10372 if (TARGET_AAPCS_BASED
10373 && BYTES_BIG_ENDIAN
10374 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10375 && int_size_in_bytes (type) <= 4)
10376 return true;
10378 /* Otherwise, use default padding. */
10379 return !BYTES_BIG_ENDIAN;
10383 /* Print a symbolic form of X to the debug file, F. */
10384 static void
10385 arm_print_value (FILE *f, rtx x)
10387 switch (GET_CODE (x))
10389 case CONST_INT:
10390 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10391 return;
10393 case CONST_DOUBLE:
10394 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10395 return;
10397 case CONST_VECTOR:
10399 int i;
10401 fprintf (f, "<");
10402 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10404 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10405 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10406 fputc (',', f);
10408 fprintf (f, ">");
10410 return;
10412 case CONST_STRING:
10413 fprintf (f, "\"%s\"", XSTR (x, 0));
10414 return;
10416 case SYMBOL_REF:
10417 fprintf (f, "`%s'", XSTR (x, 0));
10418 return;
10420 case LABEL_REF:
10421 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10422 return;
10424 case CONST:
10425 arm_print_value (f, XEXP (x, 0));
10426 return;
10428 case PLUS:
10429 arm_print_value (f, XEXP (x, 0));
10430 fprintf (f, "+");
10431 arm_print_value (f, XEXP (x, 1));
10432 return;
10434 case PC:
10435 fprintf (f, "pc");
10436 return;
10438 default:
10439 fprintf (f, "????");
10440 return;
10444 /* Routines for manipulation of the constant pool. */
10446 /* Arm instructions cannot load a large constant directly into a
10447 register; they have to come from a pc relative load. The constant
10448 must therefore be placed in the addressable range of the pc
10449 relative load. Depending on the precise pc relative load
10450 instruction the range is somewhere between 256 bytes and 4k. This
10451 means that we often have to dump a constant inside a function, and
10452 generate code to branch around it.
10454 It is important to minimize this, since the branches will slow
10455 things down and make the code larger.
10457 Normally we can hide the table after an existing unconditional
10458 branch so that there is no interruption of the flow, but in the
10459 worst case the code looks like this:
10461 ldr rn, L1
10463 b L2
10464 align
10465 L1: .long value
10469 ldr rn, L3
10471 b L4
10472 align
10473 L3: .long value
10477 We fix this by performing a scan after scheduling, which notices
10478 which instructions need to have their operands fetched from the
10479 constant table and builds the table.
10481 The algorithm starts by building a table of all the constants that
10482 need fixing up and all the natural barriers in the function (places
10483 where a constant table can be dropped without breaking the flow).
10484 For each fixup we note how far the pc-relative replacement will be
10485 able to reach and the offset of the instruction into the function.
10487 Having built the table we then group the fixes together to form
10488 tables that are as large as possible (subject to addressing
10489 constraints) and emit each table of constants after the last
10490 barrier that is within range of all the instructions in the group.
10491 If a group does not contain a barrier, then we forcibly create one
10492 by inserting a jump instruction into the flow. Once the table has
10493 been inserted, the insns are then modified to reference the
10494 relevant entry in the pool.
10496 Possible enhancements to the algorithm (not implemented) are:
10498 1) For some processors and object formats, there may be benefit in
10499 aligning the pools to the start of cache lines; this alignment
10500 would need to be taken into account when calculating addressability
10501 of a pool. */
10503 /* These typedefs are located at the start of this file, so that
10504 they can be used in the prototypes there. This comment is to
10505 remind readers of that fact so that the following structures
10506 can be understood more easily.
10508 typedef struct minipool_node Mnode;
10509 typedef struct minipool_fixup Mfix; */
10511 struct minipool_node
10513 /* Doubly linked chain of entries. */
10514 Mnode * next;
10515 Mnode * prev;
10516 /* The maximum offset into the code that this entry can be placed. While
10517 pushing fixes for forward references, all entries are sorted in order
10518 of increasing max_address. */
10519 HOST_WIDE_INT max_address;
10520 /* Similarly for an entry inserted for a backwards ref. */
10521 HOST_WIDE_INT min_address;
10522 /* The number of fixes referencing this entry. This can become zero
10523 if we "unpush" an entry. In this case we ignore the entry when we
10524 come to emit the code. */
10525 int refcount;
10526 /* The offset from the start of the minipool. */
10527 HOST_WIDE_INT offset;
10528 /* The value in table. */
10529 rtx value;
10530 /* The mode of value. */
10531 enum machine_mode mode;
10532 /* The size of the value. With iWMMXt enabled
10533 sizes > 4 also imply an alignment of 8-bytes. */
10534 int fix_size;
10537 struct minipool_fixup
10539 Mfix * next;
10540 rtx insn;
10541 HOST_WIDE_INT address;
10542 rtx * loc;
10543 enum machine_mode mode;
10544 int fix_size;
10545 rtx value;
10546 Mnode * minipool;
10547 HOST_WIDE_INT forwards;
10548 HOST_WIDE_INT backwards;
10551 /* Fixes less than a word need padding out to a word boundary. */
10552 #define MINIPOOL_FIX_SIZE(mode) \
10553 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10555 static Mnode * minipool_vector_head;
10556 static Mnode * minipool_vector_tail;
10557 static rtx minipool_vector_label;
10558 static int minipool_pad;
10560 /* The linked list of all minipool fixes required for this function. */
10561 Mfix * minipool_fix_head;
10562 Mfix * minipool_fix_tail;
10563 /* The fix entry for the current minipool, once it has been placed. */
10564 Mfix * minipool_barrier;
10566 /* Determines if INSN is the start of a jump table. Returns the end
10567 of the TABLE or NULL_RTX. */
10568 static rtx
10569 is_jump_table (rtx insn)
10571 rtx table;
10573 if (GET_CODE (insn) == JUMP_INSN
10574 && JUMP_LABEL (insn) != NULL
10575 && ((table = next_real_insn (JUMP_LABEL (insn)))
10576 == next_real_insn (insn))
10577 && table != NULL
10578 && GET_CODE (table) == JUMP_INSN
10579 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10580 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10581 return table;
10583 return NULL_RTX;
10586 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10587 #define JUMP_TABLES_IN_TEXT_SECTION 0
10588 #endif
10590 static HOST_WIDE_INT
10591 get_jump_table_size (rtx insn)
10593 /* ADDR_VECs only take room if read-only data does into the text
10594 section. */
10595 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10597 rtx body = PATTERN (insn);
10598 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10599 HOST_WIDE_INT size;
10600 HOST_WIDE_INT modesize;
10602 modesize = GET_MODE_SIZE (GET_MODE (body));
10603 size = modesize * XVECLEN (body, elt);
10604 switch (modesize)
10606 case 1:
10607 /* Round up size of TBB table to a halfword boundary. */
10608 size = (size + 1) & ~(HOST_WIDE_INT)1;
10609 break;
10610 case 2:
10611 /* No padding necessary for TBH. */
10612 break;
10613 case 4:
10614 /* Add two bytes for alignment on Thumb. */
10615 if (TARGET_THUMB)
10616 size += 2;
10617 break;
10618 default:
10619 gcc_unreachable ();
10621 return size;
10624 return 0;
10627 /* Move a minipool fix MP from its current location to before MAX_MP.
10628 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10629 constraints may need updating. */
10630 static Mnode *
10631 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10632 HOST_WIDE_INT max_address)
10634 /* The code below assumes these are different. */
10635 gcc_assert (mp != max_mp);
10637 if (max_mp == NULL)
10639 if (max_address < mp->max_address)
10640 mp->max_address = max_address;
10642 else
10644 if (max_address > max_mp->max_address - mp->fix_size)
10645 mp->max_address = max_mp->max_address - mp->fix_size;
10646 else
10647 mp->max_address = max_address;
10649 /* Unlink MP from its current position. Since max_mp is non-null,
10650 mp->prev must be non-null. */
10651 mp->prev->next = mp->next;
10652 if (mp->next != NULL)
10653 mp->next->prev = mp->prev;
10654 else
10655 minipool_vector_tail = mp->prev;
10657 /* Re-insert it before MAX_MP. */
10658 mp->next = max_mp;
10659 mp->prev = max_mp->prev;
10660 max_mp->prev = mp;
10662 if (mp->prev != NULL)
10663 mp->prev->next = mp;
10664 else
10665 minipool_vector_head = mp;
10668 /* Save the new entry. */
10669 max_mp = mp;
10671 /* Scan over the preceding entries and adjust their addresses as
10672 required. */
10673 while (mp->prev != NULL
10674 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10676 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10677 mp = mp->prev;
10680 return max_mp;
10683 /* Add a constant to the minipool for a forward reference. Returns the
10684 node added or NULL if the constant will not fit in this pool. */
10685 static Mnode *
10686 add_minipool_forward_ref (Mfix *fix)
10688 /* If set, max_mp is the first pool_entry that has a lower
10689 constraint than the one we are trying to add. */
10690 Mnode * max_mp = NULL;
10691 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10692 Mnode * mp;
10694 /* If the minipool starts before the end of FIX->INSN then this FIX
10695 can not be placed into the current pool. Furthermore, adding the
10696 new constant pool entry may cause the pool to start FIX_SIZE bytes
10697 earlier. */
10698 if (minipool_vector_head &&
10699 (fix->address + get_attr_length (fix->insn)
10700 >= minipool_vector_head->max_address - fix->fix_size))
10701 return NULL;
10703 /* Scan the pool to see if a constant with the same value has
10704 already been added. While we are doing this, also note the
10705 location where we must insert the constant if it doesn't already
10706 exist. */
10707 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10709 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10710 && fix->mode == mp->mode
10711 && (GET_CODE (fix->value) != CODE_LABEL
10712 || (CODE_LABEL_NUMBER (fix->value)
10713 == CODE_LABEL_NUMBER (mp->value)))
10714 && rtx_equal_p (fix->value, mp->value))
10716 /* More than one fix references this entry. */
10717 mp->refcount++;
10718 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10721 /* Note the insertion point if necessary. */
10722 if (max_mp == NULL
10723 && mp->max_address > max_address)
10724 max_mp = mp;
10726 /* If we are inserting an 8-bytes aligned quantity and
10727 we have not already found an insertion point, then
10728 make sure that all such 8-byte aligned quantities are
10729 placed at the start of the pool. */
10730 if (ARM_DOUBLEWORD_ALIGN
10731 && max_mp == NULL
10732 && fix->fix_size >= 8
10733 && mp->fix_size < 8)
10735 max_mp = mp;
10736 max_address = mp->max_address;
10740 /* The value is not currently in the minipool, so we need to create
10741 a new entry for it. If MAX_MP is NULL, the entry will be put on
10742 the end of the list since the placement is less constrained than
10743 any existing entry. Otherwise, we insert the new fix before
10744 MAX_MP and, if necessary, adjust the constraints on the other
10745 entries. */
10746 mp = XNEW (Mnode);
10747 mp->fix_size = fix->fix_size;
10748 mp->mode = fix->mode;
10749 mp->value = fix->value;
10750 mp->refcount = 1;
10751 /* Not yet required for a backwards ref. */
10752 mp->min_address = -65536;
10754 if (max_mp == NULL)
10756 mp->max_address = max_address;
10757 mp->next = NULL;
10758 mp->prev = minipool_vector_tail;
10760 if (mp->prev == NULL)
10762 minipool_vector_head = mp;
10763 minipool_vector_label = gen_label_rtx ();
10765 else
10766 mp->prev->next = mp;
10768 minipool_vector_tail = mp;
10770 else
10772 if (max_address > max_mp->max_address - mp->fix_size)
10773 mp->max_address = max_mp->max_address - mp->fix_size;
10774 else
10775 mp->max_address = max_address;
10777 mp->next = max_mp;
10778 mp->prev = max_mp->prev;
10779 max_mp->prev = mp;
10780 if (mp->prev != NULL)
10781 mp->prev->next = mp;
10782 else
10783 minipool_vector_head = mp;
10786 /* Save the new entry. */
10787 max_mp = mp;
10789 /* Scan over the preceding entries and adjust their addresses as
10790 required. */
10791 while (mp->prev != NULL
10792 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10794 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10795 mp = mp->prev;
10798 return max_mp;
10801 static Mnode *
10802 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10803 HOST_WIDE_INT min_address)
10805 HOST_WIDE_INT offset;
10807 /* The code below assumes these are different. */
10808 gcc_assert (mp != min_mp);
10810 if (min_mp == NULL)
10812 if (min_address > mp->min_address)
10813 mp->min_address = min_address;
10815 else
10817 /* We will adjust this below if it is too loose. */
10818 mp->min_address = min_address;
10820 /* Unlink MP from its current position. Since min_mp is non-null,
10821 mp->next must be non-null. */
10822 mp->next->prev = mp->prev;
10823 if (mp->prev != NULL)
10824 mp->prev->next = mp->next;
10825 else
10826 minipool_vector_head = mp->next;
10828 /* Reinsert it after MIN_MP. */
10829 mp->prev = min_mp;
10830 mp->next = min_mp->next;
10831 min_mp->next = mp;
10832 if (mp->next != NULL)
10833 mp->next->prev = mp;
10834 else
10835 minipool_vector_tail = mp;
10838 min_mp = mp;
10840 offset = 0;
10841 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10843 mp->offset = offset;
10844 if (mp->refcount > 0)
10845 offset += mp->fix_size;
10847 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10848 mp->next->min_address = mp->min_address + mp->fix_size;
10851 return min_mp;
10854 /* Add a constant to the minipool for a backward reference. Returns the
10855 node added or NULL if the constant will not fit in this pool.
10857 Note that the code for insertion for a backwards reference can be
10858 somewhat confusing because the calculated offsets for each fix do
10859 not take into account the size of the pool (which is still under
10860 construction. */
10861 static Mnode *
10862 add_minipool_backward_ref (Mfix *fix)
10864 /* If set, min_mp is the last pool_entry that has a lower constraint
10865 than the one we are trying to add. */
10866 Mnode *min_mp = NULL;
10867 /* This can be negative, since it is only a constraint. */
10868 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10869 Mnode *mp;
10871 /* If we can't reach the current pool from this insn, or if we can't
10872 insert this entry at the end of the pool without pushing other
10873 fixes out of range, then we don't try. This ensures that we
10874 can't fail later on. */
10875 if (min_address >= minipool_barrier->address
10876 || (minipool_vector_tail->min_address + fix->fix_size
10877 >= minipool_barrier->address))
10878 return NULL;
10880 /* Scan the pool to see if a constant with the same value has
10881 already been added. While we are doing this, also note the
10882 location where we must insert the constant if it doesn't already
10883 exist. */
10884 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10886 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10887 && fix->mode == mp->mode
10888 && (GET_CODE (fix->value) != CODE_LABEL
10889 || (CODE_LABEL_NUMBER (fix->value)
10890 == CODE_LABEL_NUMBER (mp->value)))
10891 && rtx_equal_p (fix->value, mp->value)
10892 /* Check that there is enough slack to move this entry to the
10893 end of the table (this is conservative). */
10894 && (mp->max_address
10895 > (minipool_barrier->address
10896 + minipool_vector_tail->offset
10897 + minipool_vector_tail->fix_size)))
10899 mp->refcount++;
10900 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10903 if (min_mp != NULL)
10904 mp->min_address += fix->fix_size;
10905 else
10907 /* Note the insertion point if necessary. */
10908 if (mp->min_address < min_address)
10910 /* For now, we do not allow the insertion of 8-byte alignment
10911 requiring nodes anywhere but at the start of the pool. */
10912 if (ARM_DOUBLEWORD_ALIGN
10913 && fix->fix_size >= 8 && mp->fix_size < 8)
10914 return NULL;
10915 else
10916 min_mp = mp;
10918 else if (mp->max_address
10919 < minipool_barrier->address + mp->offset + fix->fix_size)
10921 /* Inserting before this entry would push the fix beyond
10922 its maximum address (which can happen if we have
10923 re-located a forwards fix); force the new fix to come
10924 after it. */
10925 if (ARM_DOUBLEWORD_ALIGN
10926 && fix->fix_size >= 8 && mp->fix_size < 8)
10927 return NULL;
10928 else
10930 min_mp = mp;
10931 min_address = mp->min_address + fix->fix_size;
10934 /* Do not insert a non-8-byte aligned quantity before 8-byte
10935 aligned quantities. */
10936 else if (ARM_DOUBLEWORD_ALIGN
10937 && fix->fix_size < 8
10938 && mp->fix_size >= 8)
10940 min_mp = mp;
10941 min_address = mp->min_address + fix->fix_size;
10946 /* We need to create a new entry. */
10947 mp = XNEW (Mnode);
10948 mp->fix_size = fix->fix_size;
10949 mp->mode = fix->mode;
10950 mp->value = fix->value;
10951 mp->refcount = 1;
10952 mp->max_address = minipool_barrier->address + 65536;
10954 mp->min_address = min_address;
10956 if (min_mp == NULL)
10958 mp->prev = NULL;
10959 mp->next = minipool_vector_head;
10961 if (mp->next == NULL)
10963 minipool_vector_tail = mp;
10964 minipool_vector_label = gen_label_rtx ();
10966 else
10967 mp->next->prev = mp;
10969 minipool_vector_head = mp;
10971 else
10973 mp->next = min_mp->next;
10974 mp->prev = min_mp;
10975 min_mp->next = mp;
10977 if (mp->next != NULL)
10978 mp->next->prev = mp;
10979 else
10980 minipool_vector_tail = mp;
10983 /* Save the new entry. */
10984 min_mp = mp;
10986 if (mp->prev)
10987 mp = mp->prev;
10988 else
10989 mp->offset = 0;
10991 /* Scan over the following entries and adjust their offsets. */
10992 while (mp->next != NULL)
10994 if (mp->next->min_address < mp->min_address + mp->fix_size)
10995 mp->next->min_address = mp->min_address + mp->fix_size;
10997 if (mp->refcount)
10998 mp->next->offset = mp->offset + mp->fix_size;
10999 else
11000 mp->next->offset = mp->offset;
11002 mp = mp->next;
11005 return min_mp;
11008 static void
11009 assign_minipool_offsets (Mfix *barrier)
11011 HOST_WIDE_INT offset = 0;
11012 Mnode *mp;
11014 minipool_barrier = barrier;
11016 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11018 mp->offset = offset;
11020 if (mp->refcount > 0)
11021 offset += mp->fix_size;
11025 /* Output the literal table */
11026 static void
11027 dump_minipool (rtx scan)
11029 Mnode * mp;
11030 Mnode * nmp;
11031 int align64 = 0;
11033 if (ARM_DOUBLEWORD_ALIGN)
11034 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11035 if (mp->refcount > 0 && mp->fix_size >= 8)
11037 align64 = 1;
11038 break;
11041 if (dump_file)
11042 fprintf (dump_file,
11043 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11044 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11046 scan = emit_label_after (gen_label_rtx (), scan);
11047 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11048 scan = emit_label_after (minipool_vector_label, scan);
11050 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11052 if (mp->refcount > 0)
11054 if (dump_file)
11056 fprintf (dump_file,
11057 ";; Offset %u, min %ld, max %ld ",
11058 (unsigned) mp->offset, (unsigned long) mp->min_address,
11059 (unsigned long) mp->max_address);
11060 arm_print_value (dump_file, mp->value);
11061 fputc ('\n', dump_file);
11064 switch (mp->fix_size)
11066 #ifdef HAVE_consttable_1
11067 case 1:
11068 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11069 break;
11071 #endif
11072 #ifdef HAVE_consttable_2
11073 case 2:
11074 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11075 break;
11077 #endif
11078 #ifdef HAVE_consttable_4
11079 case 4:
11080 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11081 break;
11083 #endif
11084 #ifdef HAVE_consttable_8
11085 case 8:
11086 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11087 break;
11089 #endif
11090 #ifdef HAVE_consttable_16
11091 case 16:
11092 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11093 break;
11095 #endif
11096 default:
11097 gcc_unreachable ();
11101 nmp = mp->next;
11102 free (mp);
11105 minipool_vector_head = minipool_vector_tail = NULL;
11106 scan = emit_insn_after (gen_consttable_end (), scan);
11107 scan = emit_barrier_after (scan);
11110 /* Return the cost of forcibly inserting a barrier after INSN. */
11111 static int
11112 arm_barrier_cost (rtx insn)
11114 /* Basing the location of the pool on the loop depth is preferable,
11115 but at the moment, the basic block information seems to be
11116 corrupt by this stage of the compilation. */
11117 int base_cost = 50;
11118 rtx next = next_nonnote_insn (insn);
11120 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11121 base_cost -= 20;
11123 switch (GET_CODE (insn))
11125 case CODE_LABEL:
11126 /* It will always be better to place the table before the label, rather
11127 than after it. */
11128 return 50;
11130 case INSN:
11131 case CALL_INSN:
11132 return base_cost;
11134 case JUMP_INSN:
11135 return base_cost - 10;
11137 default:
11138 return base_cost + 10;
11142 /* Find the best place in the insn stream in the range
11143 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11144 Create the barrier by inserting a jump and add a new fix entry for
11145 it. */
11146 static Mfix *
11147 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11149 HOST_WIDE_INT count = 0;
11150 rtx barrier;
11151 rtx from = fix->insn;
11152 /* The instruction after which we will insert the jump. */
11153 rtx selected = NULL;
11154 int selected_cost;
11155 /* The address at which the jump instruction will be placed. */
11156 HOST_WIDE_INT selected_address;
11157 Mfix * new_fix;
11158 HOST_WIDE_INT max_count = max_address - fix->address;
11159 rtx label = gen_label_rtx ();
11161 selected_cost = arm_barrier_cost (from);
11162 selected_address = fix->address;
11164 while (from && count < max_count)
11166 rtx tmp;
11167 int new_cost;
11169 /* This code shouldn't have been called if there was a natural barrier
11170 within range. */
11171 gcc_assert (GET_CODE (from) != BARRIER);
11173 /* Count the length of this insn. */
11174 count += get_attr_length (from);
11176 /* If there is a jump table, add its length. */
11177 tmp = is_jump_table (from);
11178 if (tmp != NULL)
11180 count += get_jump_table_size (tmp);
11182 /* Jump tables aren't in a basic block, so base the cost on
11183 the dispatch insn. If we select this location, we will
11184 still put the pool after the table. */
11185 new_cost = arm_barrier_cost (from);
11187 if (count < max_count
11188 && (!selected || new_cost <= selected_cost))
11190 selected = tmp;
11191 selected_cost = new_cost;
11192 selected_address = fix->address + count;
11195 /* Continue after the dispatch table. */
11196 from = NEXT_INSN (tmp);
11197 continue;
11200 new_cost = arm_barrier_cost (from);
11202 if (count < max_count
11203 && (!selected || new_cost <= selected_cost))
11205 selected = from;
11206 selected_cost = new_cost;
11207 selected_address = fix->address + count;
11210 from = NEXT_INSN (from);
11213 /* Make sure that we found a place to insert the jump. */
11214 gcc_assert (selected);
11216 /* Create a new JUMP_INSN that branches around a barrier. */
11217 from = emit_jump_insn_after (gen_jump (label), selected);
11218 JUMP_LABEL (from) = label;
11219 barrier = emit_barrier_after (from);
11220 emit_label_after (label, barrier);
11222 /* Create a minipool barrier entry for the new barrier. */
11223 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11224 new_fix->insn = barrier;
11225 new_fix->address = selected_address;
11226 new_fix->next = fix->next;
11227 fix->next = new_fix;
11229 return new_fix;
11232 /* Record that there is a natural barrier in the insn stream at
11233 ADDRESS. */
11234 static void
11235 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11237 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11239 fix->insn = insn;
11240 fix->address = address;
11242 fix->next = NULL;
11243 if (minipool_fix_head != NULL)
11244 minipool_fix_tail->next = fix;
11245 else
11246 minipool_fix_head = fix;
11248 minipool_fix_tail = fix;
11251 /* Record INSN, which will need fixing up to load a value from the
11252 minipool. ADDRESS is the offset of the insn since the start of the
11253 function; LOC is a pointer to the part of the insn which requires
11254 fixing; VALUE is the constant that must be loaded, which is of type
11255 MODE. */
11256 static void
11257 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11258 enum machine_mode mode, rtx value)
11260 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11262 fix->insn = insn;
11263 fix->address = address;
11264 fix->loc = loc;
11265 fix->mode = mode;
11266 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11267 fix->value = value;
11268 fix->forwards = get_attr_pool_range (insn);
11269 fix->backwards = get_attr_neg_pool_range (insn);
11270 fix->minipool = NULL;
11272 /* If an insn doesn't have a range defined for it, then it isn't
11273 expecting to be reworked by this code. Better to stop now than
11274 to generate duff assembly code. */
11275 gcc_assert (fix->forwards || fix->backwards);
11277 /* If an entry requires 8-byte alignment then assume all constant pools
11278 require 4 bytes of padding. Trying to do this later on a per-pool
11279 basis is awkward because existing pool entries have to be modified. */
11280 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11281 minipool_pad = 4;
11283 if (dump_file)
11285 fprintf (dump_file,
11286 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11287 GET_MODE_NAME (mode),
11288 INSN_UID (insn), (unsigned long) address,
11289 -1 * (long)fix->backwards, (long)fix->forwards);
11290 arm_print_value (dump_file, fix->value);
11291 fprintf (dump_file, "\n");
11294 /* Add it to the chain of fixes. */
11295 fix->next = NULL;
11297 if (minipool_fix_head != NULL)
11298 minipool_fix_tail->next = fix;
11299 else
11300 minipool_fix_head = fix;
11302 minipool_fix_tail = fix;
11305 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11306 Returns the number of insns needed, or 99 if we don't know how to
11307 do it. */
11309 arm_const_double_inline_cost (rtx val)
11311 rtx lowpart, highpart;
11312 enum machine_mode mode;
11314 mode = GET_MODE (val);
11316 if (mode == VOIDmode)
11317 mode = DImode;
11319 gcc_assert (GET_MODE_SIZE (mode) == 8);
11321 lowpart = gen_lowpart (SImode, val);
11322 highpart = gen_highpart_mode (SImode, mode, val);
11324 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11325 gcc_assert (GET_CODE (highpart) == CONST_INT);
11327 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11328 NULL_RTX, NULL_RTX, 0, 0)
11329 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11330 NULL_RTX, NULL_RTX, 0, 0));
11333 /* Return true if it is worthwhile to split a 64-bit constant into two
11334 32-bit operations. This is the case if optimizing for size, or
11335 if we have load delay slots, or if one 32-bit part can be done with
11336 a single data operation. */
11337 bool
11338 arm_const_double_by_parts (rtx val)
11340 enum machine_mode mode = GET_MODE (val);
11341 rtx part;
11343 if (optimize_size || arm_ld_sched)
11344 return true;
11346 if (mode == VOIDmode)
11347 mode = DImode;
11349 part = gen_highpart_mode (SImode, mode, val);
11351 gcc_assert (GET_CODE (part) == CONST_INT);
11353 if (const_ok_for_arm (INTVAL (part))
11354 || const_ok_for_arm (~INTVAL (part)))
11355 return true;
11357 part = gen_lowpart (SImode, val);
11359 gcc_assert (GET_CODE (part) == CONST_INT);
11361 if (const_ok_for_arm (INTVAL (part))
11362 || const_ok_for_arm (~INTVAL (part)))
11363 return true;
11365 return false;
11368 /* Scan INSN and note any of its operands that need fixing.
11369 If DO_PUSHES is false we do not actually push any of the fixups
11370 needed. The function returns TRUE if any fixups were needed/pushed.
11371 This is used by arm_memory_load_p() which needs to know about loads
11372 of constants that will be converted into minipool loads. */
11373 static bool
11374 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11376 bool result = false;
11377 int opno;
11379 extract_insn (insn);
11381 if (!constrain_operands (1))
11382 fatal_insn_not_found (insn);
11384 if (recog_data.n_alternatives == 0)
11385 return false;
11387 /* Fill in recog_op_alt with information about the constraints of
11388 this insn. */
11389 preprocess_constraints ();
11391 for (opno = 0; opno < recog_data.n_operands; opno++)
11393 /* Things we need to fix can only occur in inputs. */
11394 if (recog_data.operand_type[opno] != OP_IN)
11395 continue;
11397 /* If this alternative is a memory reference, then any mention
11398 of constants in this alternative is really to fool reload
11399 into allowing us to accept one there. We need to fix them up
11400 now so that we output the right code. */
11401 if (recog_op_alt[opno][which_alternative].memory_ok)
11403 rtx op = recog_data.operand[opno];
11405 if (CONSTANT_P (op))
11407 if (do_pushes)
11408 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11409 recog_data.operand_mode[opno], op);
11410 result = true;
11412 else if (GET_CODE (op) == MEM
11413 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11414 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11416 if (do_pushes)
11418 rtx cop = avoid_constant_pool_reference (op);
11420 /* Casting the address of something to a mode narrower
11421 than a word can cause avoid_constant_pool_reference()
11422 to return the pool reference itself. That's no good to
11423 us here. Lets just hope that we can use the
11424 constant pool value directly. */
11425 if (op == cop)
11426 cop = get_pool_constant (XEXP (op, 0));
11428 push_minipool_fix (insn, address,
11429 recog_data.operand_loc[opno],
11430 recog_data.operand_mode[opno], cop);
11433 result = true;
11438 return result;
11441 /* Gcc puts the pool in the wrong place for ARM, since we can only
11442 load addresses a limited distance around the pc. We do some
11443 special munging to move the constant pool values to the correct
11444 point in the code. */
11445 static void
11446 arm_reorg (void)
11448 rtx insn;
11449 HOST_WIDE_INT address = 0;
11450 Mfix * fix;
11452 minipool_fix_head = minipool_fix_tail = NULL;
11454 /* The first insn must always be a note, or the code below won't
11455 scan it properly. */
11456 insn = get_insns ();
11457 gcc_assert (GET_CODE (insn) == NOTE);
11458 minipool_pad = 0;
11460 /* Scan all the insns and record the operands that will need fixing. */
11461 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11463 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11464 && (arm_cirrus_insn_p (insn)
11465 || GET_CODE (insn) == JUMP_INSN
11466 || arm_memory_load_p (insn)))
11467 cirrus_reorg (insn);
11469 if (GET_CODE (insn) == BARRIER)
11470 push_minipool_barrier (insn, address);
11471 else if (INSN_P (insn))
11473 rtx table;
11475 note_invalid_constants (insn, address, true);
11476 address += get_attr_length (insn);
11478 /* If the insn is a vector jump, add the size of the table
11479 and skip the table. */
11480 if ((table = is_jump_table (insn)) != NULL)
11482 address += get_jump_table_size (table);
11483 insn = table;
11488 fix = minipool_fix_head;
11490 /* Now scan the fixups and perform the required changes. */
11491 while (fix)
11493 Mfix * ftmp;
11494 Mfix * fdel;
11495 Mfix * last_added_fix;
11496 Mfix * last_barrier = NULL;
11497 Mfix * this_fix;
11499 /* Skip any further barriers before the next fix. */
11500 while (fix && GET_CODE (fix->insn) == BARRIER)
11501 fix = fix->next;
11503 /* No more fixes. */
11504 if (fix == NULL)
11505 break;
11507 last_added_fix = NULL;
11509 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11511 if (GET_CODE (ftmp->insn) == BARRIER)
11513 if (ftmp->address >= minipool_vector_head->max_address)
11514 break;
11516 last_barrier = ftmp;
11518 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11519 break;
11521 last_added_fix = ftmp; /* Keep track of the last fix added. */
11524 /* If we found a barrier, drop back to that; any fixes that we
11525 could have reached but come after the barrier will now go in
11526 the next mini-pool. */
11527 if (last_barrier != NULL)
11529 /* Reduce the refcount for those fixes that won't go into this
11530 pool after all. */
11531 for (fdel = last_barrier->next;
11532 fdel && fdel != ftmp;
11533 fdel = fdel->next)
11535 fdel->minipool->refcount--;
11536 fdel->minipool = NULL;
11539 ftmp = last_barrier;
11541 else
11543 /* ftmp is first fix that we can't fit into this pool and
11544 there no natural barriers that we could use. Insert a
11545 new barrier in the code somewhere between the previous
11546 fix and this one, and arrange to jump around it. */
11547 HOST_WIDE_INT max_address;
11549 /* The last item on the list of fixes must be a barrier, so
11550 we can never run off the end of the list of fixes without
11551 last_barrier being set. */
11552 gcc_assert (ftmp);
11554 max_address = minipool_vector_head->max_address;
11555 /* Check that there isn't another fix that is in range that
11556 we couldn't fit into this pool because the pool was
11557 already too large: we need to put the pool before such an
11558 instruction. The pool itself may come just after the
11559 fix because create_fix_barrier also allows space for a
11560 jump instruction. */
11561 if (ftmp->address < max_address)
11562 max_address = ftmp->address + 1;
11564 last_barrier = create_fix_barrier (last_added_fix, max_address);
11567 assign_minipool_offsets (last_barrier);
11569 while (ftmp)
11571 if (GET_CODE (ftmp->insn) != BARRIER
11572 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11573 == NULL))
11574 break;
11576 ftmp = ftmp->next;
11579 /* Scan over the fixes we have identified for this pool, fixing them
11580 up and adding the constants to the pool itself. */
11581 for (this_fix = fix; this_fix && ftmp != this_fix;
11582 this_fix = this_fix->next)
11583 if (GET_CODE (this_fix->insn) != BARRIER)
11585 rtx addr
11586 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11587 minipool_vector_label),
11588 this_fix->minipool->offset);
11589 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11592 dump_minipool (last_barrier->insn);
11593 fix = ftmp;
11596 /* From now on we must synthesize any constants that we can't handle
11597 directly. This can happen if the RTL gets split during final
11598 instruction generation. */
11599 after_arm_reorg = 1;
11601 /* Free the minipool memory. */
11602 obstack_free (&minipool_obstack, minipool_startobj);
11605 /* Routines to output assembly language. */
11607 /* If the rtx is the correct value then return the string of the number.
11608 In this way we can ensure that valid double constants are generated even
11609 when cross compiling. */
11610 const char *
11611 fp_immediate_constant (rtx x)
11613 REAL_VALUE_TYPE r;
11614 int i;
11616 if (!fp_consts_inited)
11617 init_fp_table ();
11619 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11620 for (i = 0; i < 8; i++)
11621 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11622 return strings_fp[i];
11624 gcc_unreachable ();
11627 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11628 static const char *
11629 fp_const_from_val (REAL_VALUE_TYPE *r)
11631 int i;
11633 if (!fp_consts_inited)
11634 init_fp_table ();
11636 for (i = 0; i < 8; i++)
11637 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11638 return strings_fp[i];
11640 gcc_unreachable ();
11643 /* Output the operands of a LDM/STM instruction to STREAM.
11644 MASK is the ARM register set mask of which only bits 0-15 are important.
11645 REG is the base register, either the frame pointer or the stack pointer,
11646 INSTR is the possibly suffixed load or store instruction.
11647 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11649 static void
11650 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11651 unsigned long mask, int rfe)
11653 unsigned i;
11654 bool not_first = FALSE;
11656 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11657 fputc ('\t', stream);
11658 asm_fprintf (stream, instr, reg);
11659 fputc ('{', stream);
11661 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11662 if (mask & (1 << i))
11664 if (not_first)
11665 fprintf (stream, ", ");
11667 asm_fprintf (stream, "%r", i);
11668 not_first = TRUE;
11671 if (rfe)
11672 fprintf (stream, "}^\n");
11673 else
11674 fprintf (stream, "}\n");
11678 /* Output a FLDMD instruction to STREAM.
11679 BASE if the register containing the address.
11680 REG and COUNT specify the register range.
11681 Extra registers may be added to avoid hardware bugs.
11683 We output FLDMD even for ARMv5 VFP implementations. Although
11684 FLDMD is technically not supported until ARMv6, it is believed
11685 that all VFP implementations support its use in this context. */
11687 static void
11688 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11690 int i;
11692 /* Workaround ARM10 VFPr1 bug. */
11693 if (count == 2 && !arm_arch6)
11695 if (reg == 15)
11696 reg--;
11697 count++;
11700 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11701 load into multiple parts if we have to handle more than 16 registers. */
11702 if (count > 16)
11704 vfp_output_fldmd (stream, base, reg, 16);
11705 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11706 return;
11709 fputc ('\t', stream);
11710 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11712 for (i = reg; i < reg + count; i++)
11714 if (i > reg)
11715 fputs (", ", stream);
11716 asm_fprintf (stream, "d%d", i);
11718 fputs ("}\n", stream);
11723 /* Output the assembly for a store multiple. */
11725 const char *
11726 vfp_output_fstmd (rtx * operands)
11728 char pattern[100];
11729 int p;
11730 int base;
11731 int i;
11733 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11734 p = strlen (pattern);
11736 gcc_assert (GET_CODE (operands[1]) == REG);
11738 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11739 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11741 p += sprintf (&pattern[p], ", d%d", base + i);
11743 strcpy (&pattern[p], "}");
11745 output_asm_insn (pattern, operands);
11746 return "";
11750 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11751 number of bytes pushed. */
11753 static int
11754 vfp_emit_fstmd (int base_reg, int count)
11756 rtx par;
11757 rtx dwarf;
11758 rtx tmp, reg;
11759 int i;
11761 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11762 register pairs are stored by a store multiple insn. We avoid this
11763 by pushing an extra pair. */
11764 if (count == 2 && !arm_arch6)
11766 if (base_reg == LAST_VFP_REGNUM - 3)
11767 base_reg -= 2;
11768 count++;
11771 /* FSTMD may not store more than 16 doubleword registers at once. Split
11772 larger stores into multiple parts (up to a maximum of two, in
11773 practice). */
11774 if (count > 16)
11776 int saved;
11777 /* NOTE: base_reg is an internal register number, so each D register
11778 counts as 2. */
11779 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11780 saved += vfp_emit_fstmd (base_reg, 16);
11781 return saved;
11784 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11785 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11787 reg = gen_rtx_REG (DFmode, base_reg);
11788 base_reg += 2;
11790 XVECEXP (par, 0, 0)
11791 = gen_rtx_SET (VOIDmode,
11792 gen_frame_mem
11793 (BLKmode,
11794 gen_rtx_PRE_MODIFY (Pmode,
11795 stack_pointer_rtx,
11796 plus_constant
11797 (stack_pointer_rtx,
11798 - (count * 8)))
11800 gen_rtx_UNSPEC (BLKmode,
11801 gen_rtvec (1, reg),
11802 UNSPEC_PUSH_MULT));
11804 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11805 plus_constant (stack_pointer_rtx, -(count * 8)));
11806 RTX_FRAME_RELATED_P (tmp) = 1;
11807 XVECEXP (dwarf, 0, 0) = tmp;
11809 tmp = gen_rtx_SET (VOIDmode,
11810 gen_frame_mem (DFmode, stack_pointer_rtx),
11811 reg);
11812 RTX_FRAME_RELATED_P (tmp) = 1;
11813 XVECEXP (dwarf, 0, 1) = tmp;
11815 for (i = 1; i < count; i++)
11817 reg = gen_rtx_REG (DFmode, base_reg);
11818 base_reg += 2;
11819 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11821 tmp = gen_rtx_SET (VOIDmode,
11822 gen_frame_mem (DFmode,
11823 plus_constant (stack_pointer_rtx,
11824 i * 8)),
11825 reg);
11826 RTX_FRAME_RELATED_P (tmp) = 1;
11827 XVECEXP (dwarf, 0, i + 1) = tmp;
11830 par = emit_insn (par);
11831 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11832 RTX_FRAME_RELATED_P (par) = 1;
11834 return count * 8;
11837 /* Emit a call instruction with pattern PAT. ADDR is the address of
11838 the call target. */
11840 void
11841 arm_emit_call_insn (rtx pat, rtx addr)
11843 rtx insn;
11845 insn = emit_call_insn (pat);
11847 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11848 If the call might use such an entry, add a use of the PIC register
11849 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11850 if (TARGET_VXWORKS_RTP
11851 && flag_pic
11852 && GET_CODE (addr) == SYMBOL_REF
11853 && (SYMBOL_REF_DECL (addr)
11854 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11855 : !SYMBOL_REF_LOCAL_P (addr)))
11857 require_pic_register ();
11858 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11862 /* Output a 'call' insn. */
11863 const char *
11864 output_call (rtx *operands)
11866 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11868 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11869 if (REGNO (operands[0]) == LR_REGNUM)
11871 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11872 output_asm_insn ("mov%?\t%0, %|lr", operands);
11875 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11877 if (TARGET_INTERWORK || arm_arch4t)
11878 output_asm_insn ("bx%?\t%0", operands);
11879 else
11880 output_asm_insn ("mov%?\t%|pc, %0", operands);
11882 return "";
11885 /* Output a 'call' insn that is a reference in memory. This is
11886 disabled for ARMv5 and we prefer a blx instead because otherwise
11887 there's a significant performance overhead. */
11888 const char *
11889 output_call_mem (rtx *operands)
11891 gcc_assert (!arm_arch5);
11892 if (TARGET_INTERWORK)
11894 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11895 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11896 output_asm_insn ("bx%?\t%|ip", operands);
11898 else if (regno_use_in (LR_REGNUM, operands[0]))
11900 /* LR is used in the memory address. We load the address in the
11901 first instruction. It's safe to use IP as the target of the
11902 load since the call will kill it anyway. */
11903 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11904 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11905 if (arm_arch4t)
11906 output_asm_insn ("bx%?\t%|ip", operands);
11907 else
11908 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11910 else
11912 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11913 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11916 return "";
11920 /* Output a move from arm registers to an fpa registers.
11921 OPERANDS[0] is an fpa register.
11922 OPERANDS[1] is the first registers of an arm register pair. */
11923 const char *
11924 output_mov_long_double_fpa_from_arm (rtx *operands)
11926 int arm_reg0 = REGNO (operands[1]);
11927 rtx ops[3];
11929 gcc_assert (arm_reg0 != IP_REGNUM);
11931 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11932 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11933 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11935 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11936 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11938 return "";
11941 /* Output a move from an fpa register to arm registers.
11942 OPERANDS[0] is the first registers of an arm register pair.
11943 OPERANDS[1] is an fpa register. */
11944 const char *
11945 output_mov_long_double_arm_from_fpa (rtx *operands)
11947 int arm_reg0 = REGNO (operands[0]);
11948 rtx ops[3];
11950 gcc_assert (arm_reg0 != IP_REGNUM);
11952 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11953 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11954 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11956 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11957 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11958 return "";
11961 /* Output a move from arm registers to arm registers of a long double
11962 OPERANDS[0] is the destination.
11963 OPERANDS[1] is the source. */
11964 const char *
11965 output_mov_long_double_arm_from_arm (rtx *operands)
11967 /* We have to be careful here because the two might overlap. */
11968 int dest_start = REGNO (operands[0]);
11969 int src_start = REGNO (operands[1]);
11970 rtx ops[2];
11971 int i;
11973 if (dest_start < src_start)
11975 for (i = 0; i < 3; i++)
11977 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11978 ops[1] = gen_rtx_REG (SImode, src_start + i);
11979 output_asm_insn ("mov%?\t%0, %1", ops);
11982 else
11984 for (i = 2; i >= 0; i--)
11986 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11987 ops[1] = gen_rtx_REG (SImode, src_start + i);
11988 output_asm_insn ("mov%?\t%0, %1", ops);
11992 return "";
11995 void
11996 arm_emit_movpair (rtx dest, rtx src)
11998 /* If the src is an immediate, simplify it. */
11999 if (CONST_INT_P (src))
12001 HOST_WIDE_INT val = INTVAL (src);
12002 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12003 if ((val >> 16) & 0x0000ffff)
12004 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12005 GEN_INT (16)),
12006 GEN_INT ((val >> 16) & 0x0000ffff));
12007 return;
12009 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12010 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12013 /* Output a move from arm registers to an fpa registers.
12014 OPERANDS[0] is an fpa register.
12015 OPERANDS[1] is the first registers of an arm register pair. */
12016 const char *
12017 output_mov_double_fpa_from_arm (rtx *operands)
12019 int arm_reg0 = REGNO (operands[1]);
12020 rtx ops[2];
12022 gcc_assert (arm_reg0 != IP_REGNUM);
12024 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12025 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12026 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12027 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12028 return "";
12031 /* Output a move from an fpa register to arm registers.
12032 OPERANDS[0] is the first registers of an arm register pair.
12033 OPERANDS[1] is an fpa register. */
12034 const char *
12035 output_mov_double_arm_from_fpa (rtx *operands)
12037 int arm_reg0 = REGNO (operands[0]);
12038 rtx ops[2];
12040 gcc_assert (arm_reg0 != IP_REGNUM);
12042 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12043 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12044 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12045 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12046 return "";
12049 /* Output a move between double words.
12050 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12051 or MEM<-REG and all MEMs must be offsettable addresses. */
12052 const char *
12053 output_move_double (rtx *operands)
12055 enum rtx_code code0 = GET_CODE (operands[0]);
12056 enum rtx_code code1 = GET_CODE (operands[1]);
12057 rtx otherops[3];
12059 if (code0 == REG)
12061 unsigned int reg0 = REGNO (operands[0]);
12063 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12065 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12067 switch (GET_CODE (XEXP (operands[1], 0)))
12069 case REG:
12070 if (TARGET_LDRD
12071 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12072 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12073 else
12074 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12075 break;
12077 case PRE_INC:
12078 gcc_assert (TARGET_LDRD);
12079 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12080 break;
12082 case PRE_DEC:
12083 if (TARGET_LDRD)
12084 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12085 else
12086 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12087 break;
12089 case POST_INC:
12090 if (TARGET_LDRD)
12091 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12092 else
12093 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12094 break;
12096 case POST_DEC:
12097 gcc_assert (TARGET_LDRD);
12098 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12099 break;
12101 case PRE_MODIFY:
12102 case POST_MODIFY:
12103 /* Autoicrement addressing modes should never have overlapping
12104 base and destination registers, and overlapping index registers
12105 are already prohibited, so this doesn't need to worry about
12106 fix_cm3_ldrd. */
12107 otherops[0] = operands[0];
12108 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12109 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12111 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12113 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12115 /* Registers overlap so split out the increment. */
12116 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12117 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12119 else
12121 /* Use a single insn if we can.
12122 FIXME: IWMMXT allows offsets larger than ldrd can
12123 handle, fix these up with a pair of ldr. */
12124 if (TARGET_THUMB2
12125 || GET_CODE (otherops[2]) != CONST_INT
12126 || (INTVAL (otherops[2]) > -256
12127 && INTVAL (otherops[2]) < 256))
12128 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12129 else
12131 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12132 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12136 else
12138 /* Use a single insn if we can.
12139 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12140 fix these up with a pair of ldr. */
12141 if (TARGET_THUMB2
12142 || GET_CODE (otherops[2]) != CONST_INT
12143 || (INTVAL (otherops[2]) > -256
12144 && INTVAL (otherops[2]) < 256))
12145 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12146 else
12148 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12149 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12152 break;
12154 case LABEL_REF:
12155 case CONST:
12156 /* We might be able to use ldrd %0, %1 here. However the range is
12157 different to ldr/adr, and it is broken on some ARMv7-M
12158 implementations. */
12159 /* Use the second register of the pair to avoid problematic
12160 overlap. */
12161 otherops[1] = operands[1];
12162 output_asm_insn ("adr%?\t%0, %1", otherops);
12163 operands[1] = otherops[0];
12164 if (TARGET_LDRD)
12165 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12166 else
12167 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12168 break;
12170 /* ??? This needs checking for thumb2. */
12171 default:
12172 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12173 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12175 otherops[0] = operands[0];
12176 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12177 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12179 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12181 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12183 switch ((int) INTVAL (otherops[2]))
12185 case -8:
12186 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12187 return "";
12188 case -4:
12189 if (TARGET_THUMB2)
12190 break;
12191 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12192 return "";
12193 case 4:
12194 if (TARGET_THUMB2)
12195 break;
12196 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12197 return "";
12200 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12201 operands[1] = otherops[0];
12202 if (TARGET_LDRD
12203 && (GET_CODE (otherops[2]) == REG
12204 || TARGET_THUMB2
12205 || (GET_CODE (otherops[2]) == CONST_INT
12206 && INTVAL (otherops[2]) > -256
12207 && INTVAL (otherops[2]) < 256)))
12209 if (reg_overlap_mentioned_p (operands[0],
12210 otherops[2]))
12212 rtx tmp;
12213 /* Swap base and index registers over to
12214 avoid a conflict. */
12215 tmp = otherops[1];
12216 otherops[1] = otherops[2];
12217 otherops[2] = tmp;
12219 /* If both registers conflict, it will usually
12220 have been fixed by a splitter. */
12221 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12222 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12224 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12225 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12227 else
12229 otherops[0] = operands[0];
12230 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12232 return "";
12235 if (GET_CODE (otherops[2]) == CONST_INT)
12237 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12238 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12239 else
12240 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12242 else
12243 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12245 else
12246 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12248 if (TARGET_LDRD)
12249 return "ldr%(d%)\t%0, [%1]";
12251 return "ldm%(ia%)\t%1, %M0";
12253 else
12255 otherops[1] = adjust_address (operands[1], SImode, 4);
12256 /* Take care of overlapping base/data reg. */
12257 if (reg_mentioned_p (operands[0], operands[1]))
12259 output_asm_insn ("ldr%?\t%0, %1", otherops);
12260 output_asm_insn ("ldr%?\t%0, %1", operands);
12262 else
12264 output_asm_insn ("ldr%?\t%0, %1", operands);
12265 output_asm_insn ("ldr%?\t%0, %1", otherops);
12270 else
12272 /* Constraints should ensure this. */
12273 gcc_assert (code0 == MEM && code1 == REG);
12274 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12276 switch (GET_CODE (XEXP (operands[0], 0)))
12278 case REG:
12279 if (TARGET_LDRD)
12280 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12281 else
12282 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12283 break;
12285 case PRE_INC:
12286 gcc_assert (TARGET_LDRD);
12287 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12288 break;
12290 case PRE_DEC:
12291 if (TARGET_LDRD)
12292 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12293 else
12294 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12295 break;
12297 case POST_INC:
12298 if (TARGET_LDRD)
12299 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12300 else
12301 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12302 break;
12304 case POST_DEC:
12305 gcc_assert (TARGET_LDRD);
12306 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12307 break;
12309 case PRE_MODIFY:
12310 case POST_MODIFY:
12311 otherops[0] = operands[1];
12312 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12313 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12315 /* IWMMXT allows offsets larger than ldrd can handle,
12316 fix these up with a pair of ldr. */
12317 if (!TARGET_THUMB2
12318 && GET_CODE (otherops[2]) == CONST_INT
12319 && (INTVAL(otherops[2]) <= -256
12320 || INTVAL(otherops[2]) >= 256))
12322 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12324 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12325 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12327 else
12329 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12330 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12333 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12334 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12335 else
12336 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12337 break;
12339 case PLUS:
12340 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12341 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12343 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12345 case -8:
12346 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12347 return "";
12349 case -4:
12350 if (TARGET_THUMB2)
12351 break;
12352 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12353 return "";
12355 case 4:
12356 if (TARGET_THUMB2)
12357 break;
12358 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12359 return "";
12362 if (TARGET_LDRD
12363 && (GET_CODE (otherops[2]) == REG
12364 || TARGET_THUMB2
12365 || (GET_CODE (otherops[2]) == CONST_INT
12366 && INTVAL (otherops[2]) > -256
12367 && INTVAL (otherops[2]) < 256)))
12369 otherops[0] = operands[1];
12370 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12371 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12372 return "";
12374 /* Fall through */
12376 default:
12377 otherops[0] = adjust_address (operands[0], SImode, 4);
12378 otherops[1] = operands[1];
12379 output_asm_insn ("str%?\t%1, %0", operands);
12380 output_asm_insn ("str%?\t%H1, %0", otherops);
12384 return "";
12387 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12388 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12390 const char *
12391 output_move_quad (rtx *operands)
12393 if (REG_P (operands[0]))
12395 /* Load, or reg->reg move. */
12397 if (MEM_P (operands[1]))
12399 switch (GET_CODE (XEXP (operands[1], 0)))
12401 case REG:
12402 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12403 break;
12405 case LABEL_REF:
12406 case CONST:
12407 output_asm_insn ("adr%?\t%0, %1", operands);
12408 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12409 break;
12411 default:
12412 gcc_unreachable ();
12415 else
12417 rtx ops[2];
12418 int dest, src, i;
12420 gcc_assert (REG_P (operands[1]));
12422 dest = REGNO (operands[0]);
12423 src = REGNO (operands[1]);
12425 /* This seems pretty dumb, but hopefully GCC won't try to do it
12426 very often. */
12427 if (dest < src)
12428 for (i = 0; i < 4; i++)
12430 ops[0] = gen_rtx_REG (SImode, dest + i);
12431 ops[1] = gen_rtx_REG (SImode, src + i);
12432 output_asm_insn ("mov%?\t%0, %1", ops);
12434 else
12435 for (i = 3; i >= 0; i--)
12437 ops[0] = gen_rtx_REG (SImode, dest + i);
12438 ops[1] = gen_rtx_REG (SImode, src + i);
12439 output_asm_insn ("mov%?\t%0, %1", ops);
12443 else
12445 gcc_assert (MEM_P (operands[0]));
12446 gcc_assert (REG_P (operands[1]));
12447 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12449 switch (GET_CODE (XEXP (operands[0], 0)))
12451 case REG:
12452 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12453 break;
12455 default:
12456 gcc_unreachable ();
12460 return "";
12463 /* Output a VFP load or store instruction. */
12465 const char *
12466 output_move_vfp (rtx *operands)
12468 rtx reg, mem, addr, ops[2];
12469 int load = REG_P (operands[0]);
12470 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12471 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12472 const char *templ;
12473 char buff[50];
12474 enum machine_mode mode;
12476 reg = operands[!load];
12477 mem = operands[load];
12479 mode = GET_MODE (reg);
12481 gcc_assert (REG_P (reg));
12482 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12483 gcc_assert (mode == SFmode
12484 || mode == DFmode
12485 || mode == SImode
12486 || mode == DImode
12487 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12488 gcc_assert (MEM_P (mem));
12490 addr = XEXP (mem, 0);
12492 switch (GET_CODE (addr))
12494 case PRE_DEC:
12495 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12496 ops[0] = XEXP (addr, 0);
12497 ops[1] = reg;
12498 break;
12500 case POST_INC:
12501 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12502 ops[0] = XEXP (addr, 0);
12503 ops[1] = reg;
12504 break;
12506 default:
12507 templ = "f%s%c%%?\t%%%s0, %%1%s";
12508 ops[0] = reg;
12509 ops[1] = mem;
12510 break;
12513 sprintf (buff, templ,
12514 load ? "ld" : "st",
12515 dp ? 'd' : 's',
12516 dp ? "P" : "",
12517 integer_p ? "\t%@ int" : "");
12518 output_asm_insn (buff, ops);
12520 return "";
12523 /* Output a Neon quad-word load or store, or a load or store for
12524 larger structure modes.
12526 WARNING: The ordering of elements is weird in big-endian mode,
12527 because we use VSTM, as required by the EABI. GCC RTL defines
12528 element ordering based on in-memory order. This can be differ
12529 from the architectural ordering of elements within a NEON register.
12530 The intrinsics defined in arm_neon.h use the NEON register element
12531 ordering, not the GCC RTL element ordering.
12533 For example, the in-memory ordering of a big-endian a quadword
12534 vector with 16-bit elements when stored from register pair {d0,d1}
12535 will be (lowest address first, d0[N] is NEON register element N):
12537 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12539 When necessary, quadword registers (dN, dN+1) are moved to ARM
12540 registers from rN in the order:
12542 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12544 So that STM/LDM can be used on vectors in ARM registers, and the
12545 same memory layout will result as if VSTM/VLDM were used. */
12547 const char *
12548 output_move_neon (rtx *operands)
12550 rtx reg, mem, addr, ops[2];
12551 int regno, load = REG_P (operands[0]);
12552 const char *templ;
12553 char buff[50];
12554 enum machine_mode mode;
12556 reg = operands[!load];
12557 mem = operands[load];
12559 mode = GET_MODE (reg);
12561 gcc_assert (REG_P (reg));
12562 regno = REGNO (reg);
12563 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12564 || NEON_REGNO_OK_FOR_QUAD (regno));
12565 gcc_assert (VALID_NEON_DREG_MODE (mode)
12566 || VALID_NEON_QREG_MODE (mode)
12567 || VALID_NEON_STRUCT_MODE (mode));
12568 gcc_assert (MEM_P (mem));
12570 addr = XEXP (mem, 0);
12572 /* Strip off const from addresses like (const (plus (...))). */
12573 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12574 addr = XEXP (addr, 0);
12576 switch (GET_CODE (addr))
12578 case POST_INC:
12579 templ = "v%smia%%?\t%%0!, %%h1";
12580 ops[0] = XEXP (addr, 0);
12581 ops[1] = reg;
12582 break;
12584 case PRE_DEC:
12585 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12586 templ = "v%smdb%%?\t%%0!, %%h1";
12587 ops[0] = XEXP (addr, 0);
12588 ops[1] = reg;
12589 break;
12591 case POST_MODIFY:
12592 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12593 gcc_unreachable ();
12595 case LABEL_REF:
12596 case PLUS:
12598 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12599 int i;
12600 int overlap = -1;
12601 for (i = 0; i < nregs; i++)
12603 /* We're only using DImode here because it's a convenient size. */
12604 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12605 ops[1] = adjust_address (mem, DImode, 8 * i);
12606 if (reg_overlap_mentioned_p (ops[0], mem))
12608 gcc_assert (overlap == -1);
12609 overlap = i;
12611 else
12613 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12614 output_asm_insn (buff, ops);
12617 if (overlap != -1)
12619 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12620 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12621 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12622 output_asm_insn (buff, ops);
12625 return "";
12628 default:
12629 templ = "v%smia%%?\t%%m0, %%h1";
12630 ops[0] = mem;
12631 ops[1] = reg;
12634 sprintf (buff, templ, load ? "ld" : "st");
12635 output_asm_insn (buff, ops);
12637 return "";
12640 /* Output an ADD r, s, #n where n may be too big for one instruction.
12641 If adding zero to one register, output nothing. */
12642 const char *
12643 output_add_immediate (rtx *operands)
12645 HOST_WIDE_INT n = INTVAL (operands[2]);
12647 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12649 if (n < 0)
12650 output_multi_immediate (operands,
12651 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12652 -n);
12653 else
12654 output_multi_immediate (operands,
12655 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12659 return "";
12662 /* Output a multiple immediate operation.
12663 OPERANDS is the vector of operands referred to in the output patterns.
12664 INSTR1 is the output pattern to use for the first constant.
12665 INSTR2 is the output pattern to use for subsequent constants.
12666 IMMED_OP is the index of the constant slot in OPERANDS.
12667 N is the constant value. */
12668 static const char *
12669 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12670 int immed_op, HOST_WIDE_INT n)
12672 #if HOST_BITS_PER_WIDE_INT > 32
12673 n &= 0xffffffff;
12674 #endif
12676 if (n == 0)
12678 /* Quick and easy output. */
12679 operands[immed_op] = const0_rtx;
12680 output_asm_insn (instr1, operands);
12682 else
12684 int i;
12685 const char * instr = instr1;
12687 /* Note that n is never zero here (which would give no output). */
12688 for (i = 0; i < 32; i += 2)
12690 if (n & (3 << i))
12692 operands[immed_op] = GEN_INT (n & (255 << i));
12693 output_asm_insn (instr, operands);
12694 instr = instr2;
12695 i += 6;
12700 return "";
12703 /* Return the name of a shifter operation. */
12704 static const char *
12705 arm_shift_nmem(enum rtx_code code)
12707 switch (code)
12709 case ASHIFT:
12710 return ARM_LSL_NAME;
12712 case ASHIFTRT:
12713 return "asr";
12715 case LSHIFTRT:
12716 return "lsr";
12718 case ROTATERT:
12719 return "ror";
12721 default:
12722 abort();
12726 /* Return the appropriate ARM instruction for the operation code.
12727 The returned result should not be overwritten. OP is the rtx of the
12728 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12729 was shifted. */
12730 const char *
12731 arithmetic_instr (rtx op, int shift_first_arg)
12733 switch (GET_CODE (op))
12735 case PLUS:
12736 return "add";
12738 case MINUS:
12739 return shift_first_arg ? "rsb" : "sub";
12741 case IOR:
12742 return "orr";
12744 case XOR:
12745 return "eor";
12747 case AND:
12748 return "and";
12750 case ASHIFT:
12751 case ASHIFTRT:
12752 case LSHIFTRT:
12753 case ROTATERT:
12754 return arm_shift_nmem(GET_CODE(op));
12756 default:
12757 gcc_unreachable ();
12761 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12762 for the operation code. The returned result should not be overwritten.
12763 OP is the rtx code of the shift.
12764 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12765 shift. */
12766 static const char *
12767 shift_op (rtx op, HOST_WIDE_INT *amountp)
12769 const char * mnem;
12770 enum rtx_code code = GET_CODE (op);
12772 switch (GET_CODE (XEXP (op, 1)))
12774 case REG:
12775 case SUBREG:
12776 *amountp = -1;
12777 break;
12779 case CONST_INT:
12780 *amountp = INTVAL (XEXP (op, 1));
12781 break;
12783 default:
12784 gcc_unreachable ();
12787 switch (code)
12789 case ROTATE:
12790 gcc_assert (*amountp != -1);
12791 *amountp = 32 - *amountp;
12792 code = ROTATERT;
12794 /* Fall through. */
12796 case ASHIFT:
12797 case ASHIFTRT:
12798 case LSHIFTRT:
12799 case ROTATERT:
12800 mnem = arm_shift_nmem(code);
12801 break;
12803 case MULT:
12804 /* We never have to worry about the amount being other than a
12805 power of 2, since this case can never be reloaded from a reg. */
12806 gcc_assert (*amountp != -1);
12807 *amountp = int_log2 (*amountp);
12808 return ARM_LSL_NAME;
12810 default:
12811 gcc_unreachable ();
12814 if (*amountp != -1)
12816 /* This is not 100% correct, but follows from the desire to merge
12817 multiplication by a power of 2 with the recognizer for a
12818 shift. >=32 is not a valid shift for "lsl", so we must try and
12819 output a shift that produces the correct arithmetical result.
12820 Using lsr #32 is identical except for the fact that the carry bit
12821 is not set correctly if we set the flags; but we never use the
12822 carry bit from such an operation, so we can ignore that. */
12823 if (code == ROTATERT)
12824 /* Rotate is just modulo 32. */
12825 *amountp &= 31;
12826 else if (*amountp != (*amountp & 31))
12828 if (code == ASHIFT)
12829 mnem = "lsr";
12830 *amountp = 32;
12833 /* Shifts of 0 are no-ops. */
12834 if (*amountp == 0)
12835 return NULL;
12838 return mnem;
12841 /* Obtain the shift from the POWER of two. */
12843 static HOST_WIDE_INT
12844 int_log2 (HOST_WIDE_INT power)
12846 HOST_WIDE_INT shift = 0;
12848 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12850 gcc_assert (shift <= 31);
12851 shift++;
12854 return shift;
12857 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12858 because /bin/as is horribly restrictive. The judgement about
12859 whether or not each character is 'printable' (and can be output as
12860 is) or not (and must be printed with an octal escape) must be made
12861 with reference to the *host* character set -- the situation is
12862 similar to that discussed in the comments above pp_c_char in
12863 c-pretty-print.c. */
12865 #define MAX_ASCII_LEN 51
12867 void
12868 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12870 int i;
12871 int len_so_far = 0;
12873 fputs ("\t.ascii\t\"", stream);
12875 for (i = 0; i < len; i++)
12877 int c = p[i];
12879 if (len_so_far >= MAX_ASCII_LEN)
12881 fputs ("\"\n\t.ascii\t\"", stream);
12882 len_so_far = 0;
12885 if (ISPRINT (c))
12887 if (c == '\\' || c == '\"')
12889 putc ('\\', stream);
12890 len_so_far++;
12892 putc (c, stream);
12893 len_so_far++;
12895 else
12897 fprintf (stream, "\\%03o", c);
12898 len_so_far += 4;
12902 fputs ("\"\n", stream);
12905 /* Compute the register save mask for registers 0 through 12
12906 inclusive. This code is used by arm_compute_save_reg_mask. */
12908 static unsigned long
12909 arm_compute_save_reg0_reg12_mask (void)
12911 unsigned long func_type = arm_current_func_type ();
12912 unsigned long save_reg_mask = 0;
12913 unsigned int reg;
12915 if (IS_INTERRUPT (func_type))
12917 unsigned int max_reg;
12918 /* Interrupt functions must not corrupt any registers,
12919 even call clobbered ones. If this is a leaf function
12920 we can just examine the registers used by the RTL, but
12921 otherwise we have to assume that whatever function is
12922 called might clobber anything, and so we have to save
12923 all the call-clobbered registers as well. */
12924 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12925 /* FIQ handlers have registers r8 - r12 banked, so
12926 we only need to check r0 - r7, Normal ISRs only
12927 bank r14 and r15, so we must check up to r12.
12928 r13 is the stack pointer which is always preserved,
12929 so we do not need to consider it here. */
12930 max_reg = 7;
12931 else
12932 max_reg = 12;
12934 for (reg = 0; reg <= max_reg; reg++)
12935 if (df_regs_ever_live_p (reg)
12936 || (! current_function_is_leaf && call_used_regs[reg]))
12937 save_reg_mask |= (1 << reg);
12939 /* Also save the pic base register if necessary. */
12940 if (flag_pic
12941 && !TARGET_SINGLE_PIC_BASE
12942 && arm_pic_register != INVALID_REGNUM
12943 && crtl->uses_pic_offset_table)
12944 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12946 else if (IS_VOLATILE(func_type))
12948 /* For noreturn functions we historically omitted register saves
12949 altogether. However this really messes up debugging. As a
12950 compromise save just the frame pointers. Combined with the link
12951 register saved elsewhere this should be sufficient to get
12952 a backtrace. */
12953 if (frame_pointer_needed)
12954 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12955 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
12956 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12957 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
12958 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
12960 else
12962 /* In the normal case we only need to save those registers
12963 which are call saved and which are used by this function. */
12964 for (reg = 0; reg <= 11; reg++)
12965 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12966 save_reg_mask |= (1 << reg);
12968 /* Handle the frame pointer as a special case. */
12969 if (frame_pointer_needed)
12970 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12972 /* If we aren't loading the PIC register,
12973 don't stack it even though it may be live. */
12974 if (flag_pic
12975 && !TARGET_SINGLE_PIC_BASE
12976 && arm_pic_register != INVALID_REGNUM
12977 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
12978 || crtl->uses_pic_offset_table))
12979 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12981 /* The prologue will copy SP into R0, so save it. */
12982 if (IS_STACKALIGN (func_type))
12983 save_reg_mask |= 1;
12986 /* Save registers so the exception handler can modify them. */
12987 if (crtl->calls_eh_return)
12989 unsigned int i;
12991 for (i = 0; ; i++)
12993 reg = EH_RETURN_DATA_REGNO (i);
12994 if (reg == INVALID_REGNUM)
12995 break;
12996 save_reg_mask |= 1 << reg;
13000 return save_reg_mask;
13004 /* Compute the number of bytes used to store the static chain register on the
13005 stack, above the stack frame. We need to know this accurately to get the
13006 alignment of the rest of the stack frame correct. */
13008 static int arm_compute_static_chain_stack_bytes (void)
13010 unsigned long func_type = arm_current_func_type ();
13011 int static_chain_stack_bytes = 0;
13013 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13014 IS_NESTED (func_type) &&
13015 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13016 static_chain_stack_bytes = 4;
13018 return static_chain_stack_bytes;
13022 /* Compute a bit mask of which registers need to be
13023 saved on the stack for the current function.
13024 This is used by arm_get_frame_offsets, which may add extra registers. */
13026 static unsigned long
13027 arm_compute_save_reg_mask (void)
13029 unsigned int save_reg_mask = 0;
13030 unsigned long func_type = arm_current_func_type ();
13031 unsigned int reg;
13033 if (IS_NAKED (func_type))
13034 /* This should never really happen. */
13035 return 0;
13037 /* If we are creating a stack frame, then we must save the frame pointer,
13038 IP (which will hold the old stack pointer), LR and the PC. */
13039 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13040 save_reg_mask |=
13041 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13042 | (1 << IP_REGNUM)
13043 | (1 << LR_REGNUM)
13044 | (1 << PC_REGNUM);
13046 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13048 /* Decide if we need to save the link register.
13049 Interrupt routines have their own banked link register,
13050 so they never need to save it.
13051 Otherwise if we do not use the link register we do not need to save
13052 it. If we are pushing other registers onto the stack however, we
13053 can save an instruction in the epilogue by pushing the link register
13054 now and then popping it back into the PC. This incurs extra memory
13055 accesses though, so we only do it when optimizing for size, and only
13056 if we know that we will not need a fancy return sequence. */
13057 if (df_regs_ever_live_p (LR_REGNUM)
13058 || (save_reg_mask
13059 && optimize_size
13060 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13061 && !crtl->calls_eh_return))
13062 save_reg_mask |= 1 << LR_REGNUM;
13064 if (cfun->machine->lr_save_eliminated)
13065 save_reg_mask &= ~ (1 << LR_REGNUM);
13067 if (TARGET_REALLY_IWMMXT
13068 && ((bit_count (save_reg_mask)
13069 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13070 arm_compute_static_chain_stack_bytes())
13071 ) % 2) != 0)
13073 /* The total number of registers that are going to be pushed
13074 onto the stack is odd. We need to ensure that the stack
13075 is 64-bit aligned before we start to save iWMMXt registers,
13076 and also before we start to create locals. (A local variable
13077 might be a double or long long which we will load/store using
13078 an iWMMXt instruction). Therefore we need to push another
13079 ARM register, so that the stack will be 64-bit aligned. We
13080 try to avoid using the arg registers (r0 -r3) as they might be
13081 used to pass values in a tail call. */
13082 for (reg = 4; reg <= 12; reg++)
13083 if ((save_reg_mask & (1 << reg)) == 0)
13084 break;
13086 if (reg <= 12)
13087 save_reg_mask |= (1 << reg);
13088 else
13090 cfun->machine->sibcall_blocked = 1;
13091 save_reg_mask |= (1 << 3);
13095 /* We may need to push an additional register for use initializing the
13096 PIC base register. */
13097 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13098 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13100 reg = thumb_find_work_register (1 << 4);
13101 if (!call_used_regs[reg])
13102 save_reg_mask |= (1 << reg);
13105 return save_reg_mask;
13109 /* Compute a bit mask of which registers need to be
13110 saved on the stack for the current function. */
13111 static unsigned long
13112 thumb1_compute_save_reg_mask (void)
13114 unsigned long mask;
13115 unsigned reg;
13117 mask = 0;
13118 for (reg = 0; reg < 12; reg ++)
13119 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13120 mask |= 1 << reg;
13122 if (flag_pic
13123 && !TARGET_SINGLE_PIC_BASE
13124 && arm_pic_register != INVALID_REGNUM
13125 && crtl->uses_pic_offset_table)
13126 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13128 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13129 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13130 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13132 /* LR will also be pushed if any lo regs are pushed. */
13133 if (mask & 0xff || thumb_force_lr_save ())
13134 mask |= (1 << LR_REGNUM);
13136 /* Make sure we have a low work register if we need one.
13137 We will need one if we are going to push a high register,
13138 but we are not currently intending to push a low register. */
13139 if ((mask & 0xff) == 0
13140 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13142 /* Use thumb_find_work_register to choose which register
13143 we will use. If the register is live then we will
13144 have to push it. Use LAST_LO_REGNUM as our fallback
13145 choice for the register to select. */
13146 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13147 /* Make sure the register returned by thumb_find_work_register is
13148 not part of the return value. */
13149 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13150 reg = LAST_LO_REGNUM;
13152 if (! call_used_regs[reg])
13153 mask |= 1 << reg;
13156 /* The 504 below is 8 bytes less than 512 because there are two possible
13157 alignment words. We can't tell here if they will be present or not so we
13158 have to play it safe and assume that they are. */
13159 if ((CALLER_INTERWORKING_SLOT_SIZE +
13160 ROUND_UP_WORD (get_frame_size ()) +
13161 crtl->outgoing_args_size) >= 504)
13163 /* This is the same as the code in thumb1_expand_prologue() which
13164 determines which register to use for stack decrement. */
13165 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13166 if (mask & (1 << reg))
13167 break;
13169 if (reg > LAST_LO_REGNUM)
13171 /* Make sure we have a register available for stack decrement. */
13172 mask |= 1 << LAST_LO_REGNUM;
13176 return mask;
13180 /* Return the number of bytes required to save VFP registers. */
13181 static int
13182 arm_get_vfp_saved_size (void)
13184 unsigned int regno;
13185 int count;
13186 int saved;
13188 saved = 0;
13189 /* Space for saved VFP registers. */
13190 if (TARGET_HARD_FLOAT && TARGET_VFP)
13192 count = 0;
13193 for (regno = FIRST_VFP_REGNUM;
13194 regno < LAST_VFP_REGNUM;
13195 regno += 2)
13197 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13198 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13200 if (count > 0)
13202 /* Workaround ARM10 VFPr1 bug. */
13203 if (count == 2 && !arm_arch6)
13204 count++;
13205 saved += count * 8;
13207 count = 0;
13209 else
13210 count++;
13212 if (count > 0)
13214 if (count == 2 && !arm_arch6)
13215 count++;
13216 saved += count * 8;
13219 return saved;
13223 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13224 everything bar the final return instruction. */
13225 const char *
13226 output_return_instruction (rtx operand, int really_return, int reverse)
13228 char conditional[10];
13229 char instr[100];
13230 unsigned reg;
13231 unsigned long live_regs_mask;
13232 unsigned long func_type;
13233 arm_stack_offsets *offsets;
13235 func_type = arm_current_func_type ();
13237 if (IS_NAKED (func_type))
13238 return "";
13240 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13242 /* If this function was declared non-returning, and we have
13243 found a tail call, then we have to trust that the called
13244 function won't return. */
13245 if (really_return)
13247 rtx ops[2];
13249 /* Otherwise, trap an attempted return by aborting. */
13250 ops[0] = operand;
13251 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13252 : "abort");
13253 assemble_external_libcall (ops[1]);
13254 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13257 return "";
13260 gcc_assert (!cfun->calls_alloca || really_return);
13262 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13264 cfun->machine->return_used_this_function = 1;
13266 offsets = arm_get_frame_offsets ();
13267 live_regs_mask = offsets->saved_regs_mask;
13269 if (live_regs_mask)
13271 const char * return_reg;
13273 /* If we do not have any special requirements for function exit
13274 (e.g. interworking) then we can load the return address
13275 directly into the PC. Otherwise we must load it into LR. */
13276 if (really_return
13277 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13278 return_reg = reg_names[PC_REGNUM];
13279 else
13280 return_reg = reg_names[LR_REGNUM];
13282 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13284 /* There are three possible reasons for the IP register
13285 being saved. 1) a stack frame was created, in which case
13286 IP contains the old stack pointer, or 2) an ISR routine
13287 corrupted it, or 3) it was saved to align the stack on
13288 iWMMXt. In case 1, restore IP into SP, otherwise just
13289 restore IP. */
13290 if (frame_pointer_needed)
13292 live_regs_mask &= ~ (1 << IP_REGNUM);
13293 live_regs_mask |= (1 << SP_REGNUM);
13295 else
13296 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13299 /* On some ARM architectures it is faster to use LDR rather than
13300 LDM to load a single register. On other architectures, the
13301 cost is the same. In 26 bit mode, or for exception handlers,
13302 we have to use LDM to load the PC so that the CPSR is also
13303 restored. */
13304 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13305 if (live_regs_mask == (1U << reg))
13306 break;
13308 if (reg <= LAST_ARM_REGNUM
13309 && (reg != LR_REGNUM
13310 || ! really_return
13311 || ! IS_INTERRUPT (func_type)))
13313 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13314 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13316 else
13318 char *p;
13319 int first = 1;
13321 /* Generate the load multiple instruction to restore the
13322 registers. Note we can get here, even if
13323 frame_pointer_needed is true, but only if sp already
13324 points to the base of the saved core registers. */
13325 if (live_regs_mask & (1 << SP_REGNUM))
13327 unsigned HOST_WIDE_INT stack_adjust;
13329 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13330 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13332 if (stack_adjust && arm_arch5 && TARGET_ARM)
13333 if (TARGET_UNIFIED_ASM)
13334 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13335 else
13336 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13337 else
13339 /* If we can't use ldmib (SA110 bug),
13340 then try to pop r3 instead. */
13341 if (stack_adjust)
13342 live_regs_mask |= 1 << 3;
13344 if (TARGET_UNIFIED_ASM)
13345 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13346 else
13347 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13350 else
13351 if (TARGET_UNIFIED_ASM)
13352 sprintf (instr, "pop%s\t{", conditional);
13353 else
13354 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13356 p = instr + strlen (instr);
13358 for (reg = 0; reg <= SP_REGNUM; reg++)
13359 if (live_regs_mask & (1 << reg))
13361 int l = strlen (reg_names[reg]);
13363 if (first)
13364 first = 0;
13365 else
13367 memcpy (p, ", ", 2);
13368 p += 2;
13371 memcpy (p, "%|", 2);
13372 memcpy (p + 2, reg_names[reg], l);
13373 p += l + 2;
13376 if (live_regs_mask & (1 << LR_REGNUM))
13378 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13379 /* If returning from an interrupt, restore the CPSR. */
13380 if (IS_INTERRUPT (func_type))
13381 strcat (p, "^");
13383 else
13384 strcpy (p, "}");
13387 output_asm_insn (instr, & operand);
13389 /* See if we need to generate an extra instruction to
13390 perform the actual function return. */
13391 if (really_return
13392 && func_type != ARM_FT_INTERWORKED
13393 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13395 /* The return has already been handled
13396 by loading the LR into the PC. */
13397 really_return = 0;
13401 if (really_return)
13403 switch ((int) ARM_FUNC_TYPE (func_type))
13405 case ARM_FT_ISR:
13406 case ARM_FT_FIQ:
13407 /* ??? This is wrong for unified assembly syntax. */
13408 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13409 break;
13411 case ARM_FT_INTERWORKED:
13412 sprintf (instr, "bx%s\t%%|lr", conditional);
13413 break;
13415 case ARM_FT_EXCEPTION:
13416 /* ??? This is wrong for unified assembly syntax. */
13417 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13418 break;
13420 default:
13421 /* Use bx if it's available. */
13422 if (arm_arch5 || arm_arch4t)
13423 sprintf (instr, "bx%s\t%%|lr", conditional);
13424 else
13425 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13426 break;
13429 output_asm_insn (instr, & operand);
13432 return "";
13435 /* Write the function name into the code section, directly preceding
13436 the function prologue.
13438 Code will be output similar to this:
13440 .ascii "arm_poke_function_name", 0
13441 .align
13443 .word 0xff000000 + (t1 - t0)
13444 arm_poke_function_name
13445 mov ip, sp
13446 stmfd sp!, {fp, ip, lr, pc}
13447 sub fp, ip, #4
13449 When performing a stack backtrace, code can inspect the value
13450 of 'pc' stored at 'fp' + 0. If the trace function then looks
13451 at location pc - 12 and the top 8 bits are set, then we know
13452 that there is a function name embedded immediately preceding this
13453 location and has length ((pc[-3]) & 0xff000000).
13455 We assume that pc is declared as a pointer to an unsigned long.
13457 It is of no benefit to output the function name if we are assembling
13458 a leaf function. These function types will not contain a stack
13459 backtrace structure, therefore it is not possible to determine the
13460 function name. */
13461 void
13462 arm_poke_function_name (FILE *stream, const char *name)
13464 unsigned long alignlength;
13465 unsigned long length;
13466 rtx x;
13468 length = strlen (name) + 1;
13469 alignlength = ROUND_UP_WORD (length);
13471 ASM_OUTPUT_ASCII (stream, name, length);
13472 ASM_OUTPUT_ALIGN (stream, 2);
13473 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13474 assemble_aligned_integer (UNITS_PER_WORD, x);
13477 /* Place some comments into the assembler stream
13478 describing the current function. */
13479 static void
13480 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13482 unsigned long func_type;
13484 if (TARGET_THUMB1)
13486 thumb1_output_function_prologue (f, frame_size);
13487 return;
13490 /* Sanity check. */
13491 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13493 func_type = arm_current_func_type ();
13495 switch ((int) ARM_FUNC_TYPE (func_type))
13497 default:
13498 case ARM_FT_NORMAL:
13499 break;
13500 case ARM_FT_INTERWORKED:
13501 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13502 break;
13503 case ARM_FT_ISR:
13504 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13505 break;
13506 case ARM_FT_FIQ:
13507 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13508 break;
13509 case ARM_FT_EXCEPTION:
13510 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13511 break;
13514 if (IS_NAKED (func_type))
13515 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13517 if (IS_VOLATILE (func_type))
13518 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13520 if (IS_NESTED (func_type))
13521 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13522 if (IS_STACKALIGN (func_type))
13523 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13525 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13526 crtl->args.size,
13527 crtl->args.pretend_args_size, frame_size);
13529 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13530 frame_pointer_needed,
13531 cfun->machine->uses_anonymous_args);
13533 if (cfun->machine->lr_save_eliminated)
13534 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13536 if (crtl->calls_eh_return)
13537 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13541 const char *
13542 arm_output_epilogue (rtx sibling)
13544 int reg;
13545 unsigned long saved_regs_mask;
13546 unsigned long func_type;
13547 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13548 frame that is $fp + 4 for a non-variadic function. */
13549 int floats_offset = 0;
13550 rtx operands[3];
13551 FILE * f = asm_out_file;
13552 unsigned int lrm_count = 0;
13553 int really_return = (sibling == NULL);
13554 int start_reg;
13555 arm_stack_offsets *offsets;
13557 /* If we have already generated the return instruction
13558 then it is futile to generate anything else. */
13559 if (use_return_insn (FALSE, sibling) &&
13560 (cfun->machine->return_used_this_function != 0))
13561 return "";
13563 func_type = arm_current_func_type ();
13565 if (IS_NAKED (func_type))
13566 /* Naked functions don't have epilogues. */
13567 return "";
13569 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13571 rtx op;
13573 /* A volatile function should never return. Call abort. */
13574 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13575 assemble_external_libcall (op);
13576 output_asm_insn ("bl\t%a0", &op);
13578 return "";
13581 /* If we are throwing an exception, then we really must be doing a
13582 return, so we can't tail-call. */
13583 gcc_assert (!crtl->calls_eh_return || really_return);
13585 offsets = arm_get_frame_offsets ();
13586 saved_regs_mask = offsets->saved_regs_mask;
13588 if (TARGET_IWMMXT)
13589 lrm_count = bit_count (saved_regs_mask);
13591 floats_offset = offsets->saved_args;
13592 /* Compute how far away the floats will be. */
13593 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13594 if (saved_regs_mask & (1 << reg))
13595 floats_offset += 4;
13597 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13599 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13600 int vfp_offset = offsets->frame;
13602 if (TARGET_FPA_EMU2)
13604 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13605 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13607 floats_offset += 12;
13608 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13609 reg, FP_REGNUM, floats_offset - vfp_offset);
13612 else
13614 start_reg = LAST_FPA_REGNUM;
13616 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13618 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13620 floats_offset += 12;
13622 /* We can't unstack more than four registers at once. */
13623 if (start_reg - reg == 3)
13625 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13626 reg, FP_REGNUM, floats_offset - vfp_offset);
13627 start_reg = reg - 1;
13630 else
13632 if (reg != start_reg)
13633 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13634 reg + 1, start_reg - reg,
13635 FP_REGNUM, floats_offset - vfp_offset);
13636 start_reg = reg - 1;
13640 /* Just in case the last register checked also needs unstacking. */
13641 if (reg != start_reg)
13642 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13643 reg + 1, start_reg - reg,
13644 FP_REGNUM, floats_offset - vfp_offset);
13647 if (TARGET_HARD_FLOAT && TARGET_VFP)
13649 int saved_size;
13651 /* The fldmd insns do not have base+offset addressing
13652 modes, so we use IP to hold the address. */
13653 saved_size = arm_get_vfp_saved_size ();
13655 if (saved_size > 0)
13657 floats_offset += saved_size;
13658 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13659 FP_REGNUM, floats_offset - vfp_offset);
13661 start_reg = FIRST_VFP_REGNUM;
13662 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13664 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13665 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13667 if (start_reg != reg)
13668 vfp_output_fldmd (f, IP_REGNUM,
13669 (start_reg - FIRST_VFP_REGNUM) / 2,
13670 (reg - start_reg) / 2);
13671 start_reg = reg + 2;
13674 if (start_reg != reg)
13675 vfp_output_fldmd (f, IP_REGNUM,
13676 (start_reg - FIRST_VFP_REGNUM) / 2,
13677 (reg - start_reg) / 2);
13680 if (TARGET_IWMMXT)
13682 /* The frame pointer is guaranteed to be non-double-word aligned.
13683 This is because it is set to (old_stack_pointer - 4) and the
13684 old_stack_pointer was double word aligned. Thus the offset to
13685 the iWMMXt registers to be loaded must also be non-double-word
13686 sized, so that the resultant address *is* double-word aligned.
13687 We can ignore floats_offset since that was already included in
13688 the live_regs_mask. */
13689 lrm_count += (lrm_count % 2 ? 2 : 1);
13691 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13692 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13694 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13695 reg, FP_REGNUM, lrm_count * 4);
13696 lrm_count += 2;
13700 /* saved_regs_mask should contain the IP, which at the time of stack
13701 frame generation actually contains the old stack pointer. So a
13702 quick way to unwind the stack is just pop the IP register directly
13703 into the stack pointer. */
13704 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13705 saved_regs_mask &= ~ (1 << IP_REGNUM);
13706 saved_regs_mask |= (1 << SP_REGNUM);
13708 /* There are two registers left in saved_regs_mask - LR and PC. We
13709 only need to restore the LR register (the return address), but to
13710 save time we can load it directly into the PC, unless we need a
13711 special function exit sequence, or we are not really returning. */
13712 if (really_return
13713 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13714 && !crtl->calls_eh_return)
13715 /* Delete the LR from the register mask, so that the LR on
13716 the stack is loaded into the PC in the register mask. */
13717 saved_regs_mask &= ~ (1 << LR_REGNUM);
13718 else
13719 saved_regs_mask &= ~ (1 << PC_REGNUM);
13721 /* We must use SP as the base register, because SP is one of the
13722 registers being restored. If an interrupt or page fault
13723 happens in the ldm instruction, the SP might or might not
13724 have been restored. That would be bad, as then SP will no
13725 longer indicate the safe area of stack, and we can get stack
13726 corruption. Using SP as the base register means that it will
13727 be reset correctly to the original value, should an interrupt
13728 occur. If the stack pointer already points at the right
13729 place, then omit the subtraction. */
13730 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13731 || cfun->calls_alloca)
13732 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13733 4 * bit_count (saved_regs_mask));
13734 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13736 if (IS_INTERRUPT (func_type))
13737 /* Interrupt handlers will have pushed the
13738 IP onto the stack, so restore it now. */
13739 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13741 else
13743 /* This branch is executed for ARM mode (non-apcs frames) and
13744 Thumb-2 mode. Frame layout is essentially the same for those
13745 cases, except that in ARM mode frame pointer points to the
13746 first saved register, while in Thumb-2 mode the frame pointer points
13747 to the last saved register.
13749 It is possible to make frame pointer point to last saved
13750 register in both cases, and remove some conditionals below.
13751 That means that fp setup in prologue would be just "mov fp, sp"
13752 and sp restore in epilogue would be just "mov sp, fp", whereas
13753 now we have to use add/sub in those cases. However, the value
13754 of that would be marginal, as both mov and add/sub are 32-bit
13755 in ARM mode, and it would require extra conditionals
13756 in arm_expand_prologue to distingish ARM-apcs-frame case
13757 (where frame pointer is required to point at first register)
13758 and ARM-non-apcs-frame. Therefore, such change is postponed
13759 until real need arise. */
13760 unsigned HOST_WIDE_INT amount;
13761 int rfe;
13762 /* Restore stack pointer if necessary. */
13763 if (TARGET_ARM && frame_pointer_needed)
13765 operands[0] = stack_pointer_rtx;
13766 operands[1] = hard_frame_pointer_rtx;
13768 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13769 output_add_immediate (operands);
13771 else
13773 if (frame_pointer_needed)
13775 /* For Thumb-2 restore sp from the frame pointer.
13776 Operand restrictions mean we have to incrememnt FP, then copy
13777 to SP. */
13778 amount = offsets->locals_base - offsets->saved_regs;
13779 operands[0] = hard_frame_pointer_rtx;
13781 else
13783 unsigned long count;
13784 operands[0] = stack_pointer_rtx;
13785 amount = offsets->outgoing_args - offsets->saved_regs;
13786 /* pop call clobbered registers if it avoids a
13787 separate stack adjustment. */
13788 count = offsets->saved_regs - offsets->saved_args;
13789 if (optimize_size
13790 && count != 0
13791 && !crtl->calls_eh_return
13792 && bit_count(saved_regs_mask) * 4 == count
13793 && !IS_INTERRUPT (func_type)
13794 && !crtl->tail_call_emit)
13796 unsigned long mask;
13797 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13798 mask ^= 0xf;
13799 mask &= ~saved_regs_mask;
13800 reg = 0;
13801 while (bit_count (mask) * 4 > amount)
13803 while ((mask & (1 << reg)) == 0)
13804 reg++;
13805 mask &= ~(1 << reg);
13807 if (bit_count (mask) * 4 == amount) {
13808 amount = 0;
13809 saved_regs_mask |= mask;
13814 if (amount)
13816 operands[1] = operands[0];
13817 operands[2] = GEN_INT (amount);
13818 output_add_immediate (operands);
13820 if (frame_pointer_needed)
13821 asm_fprintf (f, "\tmov\t%r, %r\n",
13822 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13825 if (TARGET_FPA_EMU2)
13827 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13828 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13829 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13830 reg, SP_REGNUM);
13832 else
13834 start_reg = FIRST_FPA_REGNUM;
13836 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13838 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13840 if (reg - start_reg == 3)
13842 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13843 start_reg, SP_REGNUM);
13844 start_reg = reg + 1;
13847 else
13849 if (reg != start_reg)
13850 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13851 start_reg, reg - start_reg,
13852 SP_REGNUM);
13854 start_reg = reg + 1;
13858 /* Just in case the last register checked also needs unstacking. */
13859 if (reg != start_reg)
13860 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13861 start_reg, reg - start_reg, SP_REGNUM);
13864 if (TARGET_HARD_FLOAT && TARGET_VFP)
13866 int end_reg = LAST_VFP_REGNUM + 1;
13868 /* Scan the registers in reverse order. We need to match
13869 any groupings made in the prologue and generate matching
13870 pop operations. */
13871 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
13873 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13874 && (!df_regs_ever_live_p (reg + 1)
13875 || call_used_regs[reg + 1]))
13877 if (end_reg > reg + 2)
13878 vfp_output_fldmd (f, SP_REGNUM,
13879 (reg + 2 - FIRST_VFP_REGNUM) / 2,
13880 (end_reg - (reg + 2)) / 2);
13881 end_reg = reg;
13884 if (end_reg > reg + 2)
13885 vfp_output_fldmd (f, SP_REGNUM, 0,
13886 (end_reg - (reg + 2)) / 2);
13889 if (TARGET_IWMMXT)
13890 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13891 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13892 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13894 /* If we can, restore the LR into the PC. */
13895 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13896 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13897 && !IS_STACKALIGN (func_type)
13898 && really_return
13899 && crtl->args.pretend_args_size == 0
13900 && saved_regs_mask & (1 << LR_REGNUM)
13901 && !crtl->calls_eh_return)
13903 saved_regs_mask &= ~ (1 << LR_REGNUM);
13904 saved_regs_mask |= (1 << PC_REGNUM);
13905 rfe = IS_INTERRUPT (func_type);
13907 else
13908 rfe = 0;
13910 /* Load the registers off the stack. If we only have one register
13911 to load use the LDR instruction - it is faster. For Thumb-2
13912 always use pop and the assembler will pick the best instruction.*/
13913 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13914 && !IS_INTERRUPT(func_type))
13916 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13918 else if (saved_regs_mask)
13920 if (saved_regs_mask & (1 << SP_REGNUM))
13921 /* Note - write back to the stack register is not enabled
13922 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13923 in the list of registers and if we add writeback the
13924 instruction becomes UNPREDICTABLE. */
13925 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13926 rfe);
13927 else if (TARGET_ARM)
13928 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13929 rfe);
13930 else
13931 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13934 if (crtl->args.pretend_args_size)
13936 /* Unwind the pre-pushed regs. */
13937 operands[0] = operands[1] = stack_pointer_rtx;
13938 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13939 output_add_immediate (operands);
13943 /* We may have already restored PC directly from the stack. */
13944 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13945 return "";
13947 /* Stack adjustment for exception handler. */
13948 if (crtl->calls_eh_return)
13949 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13950 ARM_EH_STACKADJ_REGNUM);
13952 /* Generate the return instruction. */
13953 switch ((int) ARM_FUNC_TYPE (func_type))
13955 case ARM_FT_ISR:
13956 case ARM_FT_FIQ:
13957 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13958 break;
13960 case ARM_FT_EXCEPTION:
13961 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13962 break;
13964 case ARM_FT_INTERWORKED:
13965 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13966 break;
13968 default:
13969 if (IS_STACKALIGN (func_type))
13971 /* See comment in arm_expand_prologue. */
13972 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
13974 if (arm_arch5 || arm_arch4t)
13975 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13976 else
13977 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13978 break;
13981 return "";
13984 static void
13985 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
13986 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
13988 arm_stack_offsets *offsets;
13990 if (TARGET_THUMB1)
13992 int regno;
13994 /* Emit any call-via-reg trampolines that are needed for v4t support
13995 of call_reg and call_value_reg type insns. */
13996 for (regno = 0; regno < LR_REGNUM; regno++)
13998 rtx label = cfun->machine->call_via[regno];
14000 if (label != NULL)
14002 switch_to_section (function_section (current_function_decl));
14003 targetm.asm_out.internal_label (asm_out_file, "L",
14004 CODE_LABEL_NUMBER (label));
14005 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14009 /* ??? Probably not safe to set this here, since it assumes that a
14010 function will be emitted as assembly immediately after we generate
14011 RTL for it. This does not happen for inline functions. */
14012 cfun->machine->return_used_this_function = 0;
14014 else /* TARGET_32BIT */
14016 /* We need to take into account any stack-frame rounding. */
14017 offsets = arm_get_frame_offsets ();
14019 gcc_assert (!use_return_insn (FALSE, NULL)
14020 || (cfun->machine->return_used_this_function != 0)
14021 || offsets->saved_regs == offsets->outgoing_args
14022 || frame_pointer_needed);
14024 /* Reset the ARM-specific per-function variables. */
14025 after_arm_reorg = 0;
14029 /* Generate and emit an insn that we will recognize as a push_multi.
14030 Unfortunately, since this insn does not reflect very well the actual
14031 semantics of the operation, we need to annotate the insn for the benefit
14032 of DWARF2 frame unwind information. */
14033 static rtx
14034 emit_multi_reg_push (unsigned long mask)
14036 int num_regs = 0;
14037 int num_dwarf_regs;
14038 int i, j;
14039 rtx par;
14040 rtx dwarf;
14041 int dwarf_par_index;
14042 rtx tmp, reg;
14044 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14045 if (mask & (1 << i))
14046 num_regs++;
14048 gcc_assert (num_regs && num_regs <= 16);
14050 /* We don't record the PC in the dwarf frame information. */
14051 num_dwarf_regs = num_regs;
14052 if (mask & (1 << PC_REGNUM))
14053 num_dwarf_regs--;
14055 /* For the body of the insn we are going to generate an UNSPEC in
14056 parallel with several USEs. This allows the insn to be recognized
14057 by the push_multi pattern in the arm.md file.
14059 The body of the insn looks something like this:
14061 (parallel [
14062 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14063 (const_int:SI <num>)))
14064 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14065 (use (reg:SI XX))
14066 (use (reg:SI YY))
14070 For the frame note however, we try to be more explicit and actually
14071 show each register being stored into the stack frame, plus a (single)
14072 decrement of the stack pointer. We do it this way in order to be
14073 friendly to the stack unwinding code, which only wants to see a single
14074 stack decrement per instruction. The RTL we generate for the note looks
14075 something like this:
14077 (sequence [
14078 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14079 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14080 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14081 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14085 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14086 instead we'd have a parallel expression detailing all
14087 the stores to the various memory addresses so that debug
14088 information is more up-to-date. Remember however while writing
14089 this to take care of the constraints with the push instruction.
14091 Note also that this has to be taken care of for the VFP registers.
14093 For more see PR43399. */
14095 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14096 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14097 dwarf_par_index = 1;
14099 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14101 if (mask & (1 << i))
14103 reg = gen_rtx_REG (SImode, i);
14105 XVECEXP (par, 0, 0)
14106 = gen_rtx_SET (VOIDmode,
14107 gen_frame_mem
14108 (BLKmode,
14109 gen_rtx_PRE_MODIFY (Pmode,
14110 stack_pointer_rtx,
14111 plus_constant
14112 (stack_pointer_rtx,
14113 -4 * num_regs))
14115 gen_rtx_UNSPEC (BLKmode,
14116 gen_rtvec (1, reg),
14117 UNSPEC_PUSH_MULT));
14119 if (i != PC_REGNUM)
14121 tmp = gen_rtx_SET (VOIDmode,
14122 gen_frame_mem (SImode, stack_pointer_rtx),
14123 reg);
14124 RTX_FRAME_RELATED_P (tmp) = 1;
14125 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14126 dwarf_par_index++;
14129 break;
14133 for (j = 1, i++; j < num_regs; i++)
14135 if (mask & (1 << i))
14137 reg = gen_rtx_REG (SImode, i);
14139 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14141 if (i != PC_REGNUM)
14144 = gen_rtx_SET (VOIDmode,
14145 gen_frame_mem
14146 (SImode,
14147 plus_constant (stack_pointer_rtx,
14148 4 * j)),
14149 reg);
14150 RTX_FRAME_RELATED_P (tmp) = 1;
14151 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14154 j++;
14158 par = emit_insn (par);
14160 tmp = gen_rtx_SET (VOIDmode,
14161 stack_pointer_rtx,
14162 plus_constant (stack_pointer_rtx, -4 * num_regs));
14163 RTX_FRAME_RELATED_P (tmp) = 1;
14164 XVECEXP (dwarf, 0, 0) = tmp;
14166 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14168 return par;
14171 /* Calculate the size of the return value that is passed in registers. */
14172 static unsigned
14173 arm_size_return_regs (void)
14175 enum machine_mode mode;
14177 if (crtl->return_rtx != 0)
14178 mode = GET_MODE (crtl->return_rtx);
14179 else
14180 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14182 return GET_MODE_SIZE (mode);
14185 static rtx
14186 emit_sfm (int base_reg, int count)
14188 rtx par;
14189 rtx dwarf;
14190 rtx tmp, reg;
14191 int i;
14193 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14194 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14196 reg = gen_rtx_REG (XFmode, base_reg++);
14198 XVECEXP (par, 0, 0)
14199 = gen_rtx_SET (VOIDmode,
14200 gen_frame_mem
14201 (BLKmode,
14202 gen_rtx_PRE_MODIFY (Pmode,
14203 stack_pointer_rtx,
14204 plus_constant
14205 (stack_pointer_rtx,
14206 -12 * count))
14208 gen_rtx_UNSPEC (BLKmode,
14209 gen_rtvec (1, reg),
14210 UNSPEC_PUSH_MULT));
14211 tmp = gen_rtx_SET (VOIDmode,
14212 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14213 RTX_FRAME_RELATED_P (tmp) = 1;
14214 XVECEXP (dwarf, 0, 1) = tmp;
14216 for (i = 1; i < count; i++)
14218 reg = gen_rtx_REG (XFmode, base_reg++);
14219 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14221 tmp = gen_rtx_SET (VOIDmode,
14222 gen_frame_mem (XFmode,
14223 plus_constant (stack_pointer_rtx,
14224 i * 12)),
14225 reg);
14226 RTX_FRAME_RELATED_P (tmp) = 1;
14227 XVECEXP (dwarf, 0, i + 1) = tmp;
14230 tmp = gen_rtx_SET (VOIDmode,
14231 stack_pointer_rtx,
14232 plus_constant (stack_pointer_rtx, -12 * count));
14234 RTX_FRAME_RELATED_P (tmp) = 1;
14235 XVECEXP (dwarf, 0, 0) = tmp;
14237 par = emit_insn (par);
14238 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14240 return par;
14244 /* Return true if the current function needs to save/restore LR. */
14246 static bool
14247 thumb_force_lr_save (void)
14249 return !cfun->machine->lr_save_eliminated
14250 && (!leaf_function_p ()
14251 || thumb_far_jump_used_p ()
14252 || df_regs_ever_live_p (LR_REGNUM));
14256 /* Compute the distance from register FROM to register TO.
14257 These can be the arg pointer (26), the soft frame pointer (25),
14258 the stack pointer (13) or the hard frame pointer (11).
14259 In thumb mode r7 is used as the soft frame pointer, if needed.
14260 Typical stack layout looks like this:
14262 old stack pointer -> | |
14263 ----
14264 | | \
14265 | | saved arguments for
14266 | | vararg functions
14267 | | /
14269 hard FP & arg pointer -> | | \
14270 | | stack
14271 | | frame
14272 | | /
14274 | | \
14275 | | call saved
14276 | | registers
14277 soft frame pointer -> | | /
14279 | | \
14280 | | local
14281 | | variables
14282 locals base pointer -> | | /
14284 | | \
14285 | | outgoing
14286 | | arguments
14287 current stack pointer -> | | /
14290 For a given function some or all of these stack components
14291 may not be needed, giving rise to the possibility of
14292 eliminating some of the registers.
14294 The values returned by this function must reflect the behavior
14295 of arm_expand_prologue() and arm_compute_save_reg_mask().
14297 The sign of the number returned reflects the direction of stack
14298 growth, so the values are positive for all eliminations except
14299 from the soft frame pointer to the hard frame pointer.
14301 SFP may point just inside the local variables block to ensure correct
14302 alignment. */
14305 /* Calculate stack offsets. These are used to calculate register elimination
14306 offsets and in prologue/epilogue code. Also calculates which registers
14307 should be saved. */
14309 static arm_stack_offsets *
14310 arm_get_frame_offsets (void)
14312 struct arm_stack_offsets *offsets;
14313 unsigned long func_type;
14314 int leaf;
14315 int saved;
14316 int core_saved;
14317 HOST_WIDE_INT frame_size;
14318 int i;
14320 offsets = &cfun->machine->stack_offsets;
14322 /* We need to know if we are a leaf function. Unfortunately, it
14323 is possible to be called after start_sequence has been called,
14324 which causes get_insns to return the insns for the sequence,
14325 not the function, which will cause leaf_function_p to return
14326 the incorrect result.
14328 to know about leaf functions once reload has completed, and the
14329 frame size cannot be changed after that time, so we can safely
14330 use the cached value. */
14332 if (reload_completed)
14333 return offsets;
14335 /* Initially this is the size of the local variables. It will translated
14336 into an offset once we have determined the size of preceding data. */
14337 frame_size = ROUND_UP_WORD (get_frame_size ());
14339 leaf = leaf_function_p ();
14341 /* Space for variadic functions. */
14342 offsets->saved_args = crtl->args.pretend_args_size;
14344 /* In Thumb mode this is incorrect, but never used. */
14345 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14346 arm_compute_static_chain_stack_bytes();
14348 if (TARGET_32BIT)
14350 unsigned int regno;
14352 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14353 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14354 saved = core_saved;
14356 /* We know that SP will be doubleword aligned on entry, and we must
14357 preserve that condition at any subroutine call. We also require the
14358 soft frame pointer to be doubleword aligned. */
14360 if (TARGET_REALLY_IWMMXT)
14362 /* Check for the call-saved iWMMXt registers. */
14363 for (regno = FIRST_IWMMXT_REGNUM;
14364 regno <= LAST_IWMMXT_REGNUM;
14365 regno++)
14366 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14367 saved += 8;
14370 func_type = arm_current_func_type ();
14371 if (! IS_VOLATILE (func_type))
14373 /* Space for saved FPA registers. */
14374 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14375 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14376 saved += 12;
14378 /* Space for saved VFP registers. */
14379 if (TARGET_HARD_FLOAT && TARGET_VFP)
14380 saved += arm_get_vfp_saved_size ();
14383 else /* TARGET_THUMB1 */
14385 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14386 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14387 saved = core_saved;
14388 if (TARGET_BACKTRACE)
14389 saved += 16;
14392 /* Saved registers include the stack frame. */
14393 offsets->saved_regs = offsets->saved_args + saved +
14394 arm_compute_static_chain_stack_bytes();
14395 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14396 /* A leaf function does not need any stack alignment if it has nothing
14397 on the stack. */
14398 if (leaf && frame_size == 0)
14400 offsets->outgoing_args = offsets->soft_frame;
14401 offsets->locals_base = offsets->soft_frame;
14402 return offsets;
14405 /* Ensure SFP has the correct alignment. */
14406 if (ARM_DOUBLEWORD_ALIGN
14407 && (offsets->soft_frame & 7))
14409 offsets->soft_frame += 4;
14410 /* Try to align stack by pushing an extra reg. Don't bother doing this
14411 when there is a stack frame as the alignment will be rolled into
14412 the normal stack adjustment. */
14413 if (frame_size + crtl->outgoing_args_size == 0)
14415 int reg = -1;
14417 /* If it is safe to use r3, then do so. This sometimes
14418 generates better code on Thumb-2 by avoiding the need to
14419 use 32-bit push/pop instructions. */
14420 if (!crtl->tail_call_emit
14421 && arm_size_return_regs () <= 12)
14423 reg = 3;
14425 else
14426 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14428 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14430 reg = i;
14431 break;
14435 if (reg != -1)
14437 offsets->saved_regs += 4;
14438 offsets->saved_regs_mask |= (1 << reg);
14443 offsets->locals_base = offsets->soft_frame + frame_size;
14444 offsets->outgoing_args = (offsets->locals_base
14445 + crtl->outgoing_args_size);
14447 if (ARM_DOUBLEWORD_ALIGN)
14449 /* Ensure SP remains doubleword aligned. */
14450 if (offsets->outgoing_args & 7)
14451 offsets->outgoing_args += 4;
14452 gcc_assert (!(offsets->outgoing_args & 7));
14455 return offsets;
14459 /* Calculate the relative offsets for the different stack pointers. Positive
14460 offsets are in the direction of stack growth. */
14462 HOST_WIDE_INT
14463 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14465 arm_stack_offsets *offsets;
14467 offsets = arm_get_frame_offsets ();
14469 /* OK, now we have enough information to compute the distances.
14470 There must be an entry in these switch tables for each pair
14471 of registers in ELIMINABLE_REGS, even if some of the entries
14472 seem to be redundant or useless. */
14473 switch (from)
14475 case ARG_POINTER_REGNUM:
14476 switch (to)
14478 case THUMB_HARD_FRAME_POINTER_REGNUM:
14479 return 0;
14481 case FRAME_POINTER_REGNUM:
14482 /* This is the reverse of the soft frame pointer
14483 to hard frame pointer elimination below. */
14484 return offsets->soft_frame - offsets->saved_args;
14486 case ARM_HARD_FRAME_POINTER_REGNUM:
14487 /* This is only non-zero in the case where the static chain register
14488 is stored above the frame. */
14489 return offsets->frame - offsets->saved_args - 4;
14491 case STACK_POINTER_REGNUM:
14492 /* If nothing has been pushed on the stack at all
14493 then this will return -4. This *is* correct! */
14494 return offsets->outgoing_args - (offsets->saved_args + 4);
14496 default:
14497 gcc_unreachable ();
14499 gcc_unreachable ();
14501 case FRAME_POINTER_REGNUM:
14502 switch (to)
14504 case THUMB_HARD_FRAME_POINTER_REGNUM:
14505 return 0;
14507 case ARM_HARD_FRAME_POINTER_REGNUM:
14508 /* The hard frame pointer points to the top entry in the
14509 stack frame. The soft frame pointer to the bottom entry
14510 in the stack frame. If there is no stack frame at all,
14511 then they are identical. */
14513 return offsets->frame - offsets->soft_frame;
14515 case STACK_POINTER_REGNUM:
14516 return offsets->outgoing_args - offsets->soft_frame;
14518 default:
14519 gcc_unreachable ();
14521 gcc_unreachable ();
14523 default:
14524 /* You cannot eliminate from the stack pointer.
14525 In theory you could eliminate from the hard frame
14526 pointer to the stack pointer, but this will never
14527 happen, since if a stack frame is not needed the
14528 hard frame pointer will never be used. */
14529 gcc_unreachable ();
14533 /* Given FROM and TO register numbers, say whether this elimination is
14534 allowed. Frame pointer elimination is automatically handled.
14536 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14537 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14538 pointer, we must eliminate FRAME_POINTER_REGNUM into
14539 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14540 ARG_POINTER_REGNUM. */
14542 bool
14543 arm_can_eliminate (const int from, const int to)
14545 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14546 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14547 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14548 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14549 true);
14552 /* Emit RTL to save coprocessor registers on function entry. Returns the
14553 number of bytes pushed. */
14555 static int
14556 arm_save_coproc_regs(void)
14558 int saved_size = 0;
14559 unsigned reg;
14560 unsigned start_reg;
14561 rtx insn;
14563 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14564 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14566 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14567 insn = gen_rtx_MEM (V2SImode, insn);
14568 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14569 RTX_FRAME_RELATED_P (insn) = 1;
14570 saved_size += 8;
14573 /* Save any floating point call-saved registers used by this
14574 function. */
14575 if (TARGET_FPA_EMU2)
14577 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14578 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14580 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14581 insn = gen_rtx_MEM (XFmode, insn);
14582 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14583 RTX_FRAME_RELATED_P (insn) = 1;
14584 saved_size += 12;
14587 else
14589 start_reg = LAST_FPA_REGNUM;
14591 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14593 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14595 if (start_reg - reg == 3)
14597 insn = emit_sfm (reg, 4);
14598 RTX_FRAME_RELATED_P (insn) = 1;
14599 saved_size += 48;
14600 start_reg = reg - 1;
14603 else
14605 if (start_reg != reg)
14607 insn = emit_sfm (reg + 1, start_reg - reg);
14608 RTX_FRAME_RELATED_P (insn) = 1;
14609 saved_size += (start_reg - reg) * 12;
14611 start_reg = reg - 1;
14615 if (start_reg != reg)
14617 insn = emit_sfm (reg + 1, start_reg - reg);
14618 saved_size += (start_reg - reg) * 12;
14619 RTX_FRAME_RELATED_P (insn) = 1;
14622 if (TARGET_HARD_FLOAT && TARGET_VFP)
14624 start_reg = FIRST_VFP_REGNUM;
14626 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14628 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14629 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14631 if (start_reg != reg)
14632 saved_size += vfp_emit_fstmd (start_reg,
14633 (reg - start_reg) / 2);
14634 start_reg = reg + 2;
14637 if (start_reg != reg)
14638 saved_size += vfp_emit_fstmd (start_reg,
14639 (reg - start_reg) / 2);
14641 return saved_size;
14645 /* Set the Thumb frame pointer from the stack pointer. */
14647 static void
14648 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14650 HOST_WIDE_INT amount;
14651 rtx insn, dwarf;
14653 amount = offsets->outgoing_args - offsets->locals_base;
14654 if (amount < 1024)
14655 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14656 stack_pointer_rtx, GEN_INT (amount)));
14657 else
14659 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14660 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14661 expects the first two operands to be the same. */
14662 if (TARGET_THUMB2)
14664 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14665 stack_pointer_rtx,
14666 hard_frame_pointer_rtx));
14668 else
14670 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14671 hard_frame_pointer_rtx,
14672 stack_pointer_rtx));
14674 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14675 plus_constant (stack_pointer_rtx, amount));
14676 RTX_FRAME_RELATED_P (dwarf) = 1;
14677 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14680 RTX_FRAME_RELATED_P (insn) = 1;
14683 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14684 function. */
14685 void
14686 arm_expand_prologue (void)
14688 rtx amount;
14689 rtx insn;
14690 rtx ip_rtx;
14691 unsigned long live_regs_mask;
14692 unsigned long func_type;
14693 int fp_offset = 0;
14694 int saved_pretend_args = 0;
14695 int saved_regs = 0;
14696 unsigned HOST_WIDE_INT args_to_push;
14697 arm_stack_offsets *offsets;
14699 func_type = arm_current_func_type ();
14701 /* Naked functions don't have prologues. */
14702 if (IS_NAKED (func_type))
14703 return;
14705 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14706 args_to_push = crtl->args.pretend_args_size;
14708 /* Compute which register we will have to save onto the stack. */
14709 offsets = arm_get_frame_offsets ();
14710 live_regs_mask = offsets->saved_regs_mask;
14712 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14714 if (IS_STACKALIGN (func_type))
14716 rtx dwarf;
14717 rtx r0;
14718 rtx r1;
14719 /* Handle a word-aligned stack pointer. We generate the following:
14721 mov r0, sp
14722 bic r1, r0, #7
14723 mov sp, r1
14724 <save and restore r0 in normal prologue/epilogue>
14725 mov sp, r0
14726 bx lr
14728 The unwinder doesn't need to know about the stack realignment.
14729 Just tell it we saved SP in r0. */
14730 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14732 r0 = gen_rtx_REG (SImode, 0);
14733 r1 = gen_rtx_REG (SImode, 1);
14734 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14735 compiler won't choke. */
14736 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14737 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14738 insn = gen_movsi (r0, stack_pointer_rtx);
14739 RTX_FRAME_RELATED_P (insn) = 1;
14740 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14741 emit_insn (insn);
14742 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14743 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14746 /* For APCS frames, if IP register is clobbered
14747 when creating frame, save that register in a special
14748 way. */
14749 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14751 if (IS_INTERRUPT (func_type))
14753 /* Interrupt functions must not corrupt any registers.
14754 Creating a frame pointer however, corrupts the IP
14755 register, so we must push it first. */
14756 insn = emit_multi_reg_push (1 << IP_REGNUM);
14758 /* Do not set RTX_FRAME_RELATED_P on this insn.
14759 The dwarf stack unwinding code only wants to see one
14760 stack decrement per function, and this is not it. If
14761 this instruction is labeled as being part of the frame
14762 creation sequence then dwarf2out_frame_debug_expr will
14763 die when it encounters the assignment of IP to FP
14764 later on, since the use of SP here establishes SP as
14765 the CFA register and not IP.
14767 Anyway this instruction is not really part of the stack
14768 frame creation although it is part of the prologue. */
14770 else if (IS_NESTED (func_type))
14772 /* The Static chain register is the same as the IP register
14773 used as a scratch register during stack frame creation.
14774 To get around this need to find somewhere to store IP
14775 whilst the frame is being created. We try the following
14776 places in order:
14778 1. The last argument register.
14779 2. A slot on the stack above the frame. (This only
14780 works if the function is not a varargs function).
14781 3. Register r3, after pushing the argument registers
14782 onto the stack.
14784 Note - we only need to tell the dwarf2 backend about the SP
14785 adjustment in the second variant; the static chain register
14786 doesn't need to be unwound, as it doesn't contain a value
14787 inherited from the caller. */
14789 if (df_regs_ever_live_p (3) == false)
14790 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14791 else if (args_to_push == 0)
14793 rtx dwarf;
14795 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14796 saved_regs += 4;
14798 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14799 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14800 fp_offset = 4;
14802 /* Just tell the dwarf backend that we adjusted SP. */
14803 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14804 plus_constant (stack_pointer_rtx,
14805 -fp_offset));
14806 RTX_FRAME_RELATED_P (insn) = 1;
14807 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14809 else
14811 /* Store the args on the stack. */
14812 if (cfun->machine->uses_anonymous_args)
14813 insn = emit_multi_reg_push
14814 ((0xf0 >> (args_to_push / 4)) & 0xf);
14815 else
14816 insn = emit_insn
14817 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14818 GEN_INT (- args_to_push)));
14820 RTX_FRAME_RELATED_P (insn) = 1;
14822 saved_pretend_args = 1;
14823 fp_offset = args_to_push;
14824 args_to_push = 0;
14826 /* Now reuse r3 to preserve IP. */
14827 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14831 insn = emit_set_insn (ip_rtx,
14832 plus_constant (stack_pointer_rtx, fp_offset));
14833 RTX_FRAME_RELATED_P (insn) = 1;
14836 if (args_to_push)
14838 /* Push the argument registers, or reserve space for them. */
14839 if (cfun->machine->uses_anonymous_args)
14840 insn = emit_multi_reg_push
14841 ((0xf0 >> (args_to_push / 4)) & 0xf);
14842 else
14843 insn = emit_insn
14844 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14845 GEN_INT (- args_to_push)));
14846 RTX_FRAME_RELATED_P (insn) = 1;
14849 /* If this is an interrupt service routine, and the link register
14850 is going to be pushed, and we're not generating extra
14851 push of IP (needed when frame is needed and frame layout if apcs),
14852 subtracting four from LR now will mean that the function return
14853 can be done with a single instruction. */
14854 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14855 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14856 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14857 && TARGET_ARM)
14859 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14861 emit_set_insn (lr, plus_constant (lr, -4));
14864 if (live_regs_mask)
14866 saved_regs += bit_count (live_regs_mask) * 4;
14867 if (optimize_size && !frame_pointer_needed
14868 && saved_regs == offsets->saved_regs - offsets->saved_args)
14870 /* If no coprocessor registers are being pushed and we don't have
14871 to worry about a frame pointer then push extra registers to
14872 create the stack frame. This is done is a way that does not
14873 alter the frame layout, so is independent of the epilogue. */
14874 int n;
14875 int frame;
14876 n = 0;
14877 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14878 n++;
14879 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14880 if (frame && n * 4 >= frame)
14882 n = frame / 4;
14883 live_regs_mask |= (1 << n) - 1;
14884 saved_regs += frame;
14887 insn = emit_multi_reg_push (live_regs_mask);
14888 RTX_FRAME_RELATED_P (insn) = 1;
14891 if (! IS_VOLATILE (func_type))
14892 saved_regs += arm_save_coproc_regs ();
14894 if (frame_pointer_needed && TARGET_ARM)
14896 /* Create the new frame pointer. */
14897 if (TARGET_APCS_FRAME)
14899 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14900 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14901 RTX_FRAME_RELATED_P (insn) = 1;
14903 if (IS_NESTED (func_type))
14905 /* Recover the static chain register. */
14906 if (!df_regs_ever_live_p (3)
14907 || saved_pretend_args)
14908 insn = gen_rtx_REG (SImode, 3);
14909 else /* if (crtl->args.pretend_args_size == 0) */
14911 insn = plus_constant (hard_frame_pointer_rtx, 4);
14912 insn = gen_frame_mem (SImode, insn);
14914 emit_set_insn (ip_rtx, insn);
14915 /* Add a USE to stop propagate_one_insn() from barfing. */
14916 emit_insn (gen_prologue_use (ip_rtx));
14919 else
14921 insn = GEN_INT (saved_regs - 4);
14922 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14923 stack_pointer_rtx, insn));
14924 RTX_FRAME_RELATED_P (insn) = 1;
14928 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14930 /* This add can produce multiple insns for a large constant, so we
14931 need to get tricky. */
14932 rtx last = get_last_insn ();
14934 amount = GEN_INT (offsets->saved_args + saved_regs
14935 - offsets->outgoing_args);
14937 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14938 amount));
14941 last = last ? NEXT_INSN (last) : get_insns ();
14942 RTX_FRAME_RELATED_P (last) = 1;
14944 while (last != insn);
14946 /* If the frame pointer is needed, emit a special barrier that
14947 will prevent the scheduler from moving stores to the frame
14948 before the stack adjustment. */
14949 if (frame_pointer_needed)
14950 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14951 hard_frame_pointer_rtx));
14955 if (frame_pointer_needed && TARGET_THUMB2)
14956 thumb_set_frame_pointer (offsets);
14958 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14960 unsigned long mask;
14962 mask = live_regs_mask;
14963 mask &= THUMB2_WORK_REGS;
14964 if (!IS_NESTED (func_type))
14965 mask |= (1 << IP_REGNUM);
14966 arm_load_pic_register (mask);
14969 /* If we are profiling, make sure no instructions are scheduled before
14970 the call to mcount. Similarly if the user has requested no
14971 scheduling in the prolog. Similarly if we want non-call exceptions
14972 using the EABI unwinder, to prevent faulting instructions from being
14973 swapped with a stack adjustment. */
14974 if (crtl->profile || !TARGET_SCHED_PROLOG
14975 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
14976 emit_insn (gen_blockage ());
14978 /* If the link register is being kept alive, with the return address in it,
14979 then make sure that it does not get reused by the ce2 pass. */
14980 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
14981 cfun->machine->lr_save_eliminated = 1;
14984 /* Print condition code to STREAM. Helper function for arm_print_operand. */
14985 static void
14986 arm_print_condition (FILE *stream)
14988 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
14990 /* Branch conversion is not implemented for Thumb-2. */
14991 if (TARGET_THUMB)
14993 output_operand_lossage ("predicated Thumb instruction");
14994 return;
14996 if (current_insn_predicate != NULL)
14998 output_operand_lossage
14999 ("predicated instruction in conditional sequence");
15000 return;
15003 fputs (arm_condition_codes[arm_current_cc], stream);
15005 else if (current_insn_predicate)
15007 enum arm_cond_code code;
15009 if (TARGET_THUMB1)
15011 output_operand_lossage ("predicated Thumb instruction");
15012 return;
15015 code = get_arm_condition_code (current_insn_predicate);
15016 fputs (arm_condition_codes[code], stream);
15021 /* If CODE is 'd', then the X is a condition operand and the instruction
15022 should only be executed if the condition is true.
15023 if CODE is 'D', then the X is a condition operand and the instruction
15024 should only be executed if the condition is false: however, if the mode
15025 of the comparison is CCFPEmode, then always execute the instruction -- we
15026 do this because in these circumstances !GE does not necessarily imply LT;
15027 in these cases the instruction pattern will take care to make sure that
15028 an instruction containing %d will follow, thereby undoing the effects of
15029 doing this instruction unconditionally.
15030 If CODE is 'N' then X is a floating point operand that must be negated
15031 before output.
15032 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15033 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15034 void
15035 arm_print_operand (FILE *stream, rtx x, int code)
15037 switch (code)
15039 case '@':
15040 fputs (ASM_COMMENT_START, stream);
15041 return;
15043 case '_':
15044 fputs (user_label_prefix, stream);
15045 return;
15047 case '|':
15048 fputs (REGISTER_PREFIX, stream);
15049 return;
15051 case '?':
15052 arm_print_condition (stream);
15053 return;
15055 case '(':
15056 /* Nothing in unified syntax, otherwise the current condition code. */
15057 if (!TARGET_UNIFIED_ASM)
15058 arm_print_condition (stream);
15059 break;
15061 case ')':
15062 /* The current condition code in unified syntax, otherwise nothing. */
15063 if (TARGET_UNIFIED_ASM)
15064 arm_print_condition (stream);
15065 break;
15067 case '.':
15068 /* The current condition code for a condition code setting instruction.
15069 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15070 if (TARGET_UNIFIED_ASM)
15072 fputc('s', stream);
15073 arm_print_condition (stream);
15075 else
15077 arm_print_condition (stream);
15078 fputc('s', stream);
15080 return;
15082 case '!':
15083 /* If the instruction is conditionally executed then print
15084 the current condition code, otherwise print 's'. */
15085 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15086 if (current_insn_predicate)
15087 arm_print_condition (stream);
15088 else
15089 fputc('s', stream);
15090 break;
15092 /* %# is a "break" sequence. It doesn't output anything, but is used to
15093 separate e.g. operand numbers from following text, if that text consists
15094 of further digits which we don't want to be part of the operand
15095 number. */
15096 case '#':
15097 return;
15099 case 'N':
15101 REAL_VALUE_TYPE r;
15102 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15103 r = real_value_negate (&r);
15104 fprintf (stream, "%s", fp_const_from_val (&r));
15106 return;
15108 /* An integer or symbol address without a preceding # sign. */
15109 case 'c':
15110 switch (GET_CODE (x))
15112 case CONST_INT:
15113 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15114 break;
15116 case SYMBOL_REF:
15117 output_addr_const (stream, x);
15118 break;
15120 default:
15121 gcc_unreachable ();
15123 return;
15125 case 'B':
15126 if (GET_CODE (x) == CONST_INT)
15128 HOST_WIDE_INT val;
15129 val = ARM_SIGN_EXTEND (~INTVAL (x));
15130 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15132 else
15134 putc ('~', stream);
15135 output_addr_const (stream, x);
15137 return;
15139 case 'L':
15140 /* The low 16 bits of an immediate constant. */
15141 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15142 return;
15144 case 'i':
15145 fprintf (stream, "%s", arithmetic_instr (x, 1));
15146 return;
15148 /* Truncate Cirrus shift counts. */
15149 case 's':
15150 if (GET_CODE (x) == CONST_INT)
15152 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15153 return;
15155 arm_print_operand (stream, x, 0);
15156 return;
15158 case 'I':
15159 fprintf (stream, "%s", arithmetic_instr (x, 0));
15160 return;
15162 case 'S':
15164 HOST_WIDE_INT val;
15165 const char *shift;
15167 if (!shift_operator (x, SImode))
15169 output_operand_lossage ("invalid shift operand");
15170 break;
15173 shift = shift_op (x, &val);
15175 if (shift)
15177 fprintf (stream, ", %s ", shift);
15178 if (val == -1)
15179 arm_print_operand (stream, XEXP (x, 1), 0);
15180 else
15181 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15184 return;
15186 /* An explanation of the 'Q', 'R' and 'H' register operands:
15188 In a pair of registers containing a DI or DF value the 'Q'
15189 operand returns the register number of the register containing
15190 the least significant part of the value. The 'R' operand returns
15191 the register number of the register containing the most
15192 significant part of the value.
15194 The 'H' operand returns the higher of the two register numbers.
15195 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15196 same as the 'Q' operand, since the most significant part of the
15197 value is held in the lower number register. The reverse is true
15198 on systems where WORDS_BIG_ENDIAN is false.
15200 The purpose of these operands is to distinguish between cases
15201 where the endian-ness of the values is important (for example
15202 when they are added together), and cases where the endian-ness
15203 is irrelevant, but the order of register operations is important.
15204 For example when loading a value from memory into a register
15205 pair, the endian-ness does not matter. Provided that the value
15206 from the lower memory address is put into the lower numbered
15207 register, and the value from the higher address is put into the
15208 higher numbered register, the load will work regardless of whether
15209 the value being loaded is big-wordian or little-wordian. The
15210 order of the two register loads can matter however, if the address
15211 of the memory location is actually held in one of the registers
15212 being overwritten by the load. */
15213 case 'Q':
15214 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15216 output_operand_lossage ("invalid operand for code '%c'", code);
15217 return;
15220 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15221 return;
15223 case 'R':
15224 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15226 output_operand_lossage ("invalid operand for code '%c'", code);
15227 return;
15230 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15231 return;
15233 case 'H':
15234 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15236 output_operand_lossage ("invalid operand for code '%c'", code);
15237 return;
15240 asm_fprintf (stream, "%r", REGNO (x) + 1);
15241 return;
15243 case 'J':
15244 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15246 output_operand_lossage ("invalid operand for code '%c'", code);
15247 return;
15250 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15251 return;
15253 case 'K':
15254 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15256 output_operand_lossage ("invalid operand for code '%c'", code);
15257 return;
15260 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15261 return;
15263 case 'm':
15264 asm_fprintf (stream, "%r",
15265 GET_CODE (XEXP (x, 0)) == REG
15266 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15267 return;
15269 case 'M':
15270 asm_fprintf (stream, "{%r-%r}",
15271 REGNO (x),
15272 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15273 return;
15275 /* Like 'M', but writing doubleword vector registers, for use by Neon
15276 insns. */
15277 case 'h':
15279 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15280 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15281 if (numregs == 1)
15282 asm_fprintf (stream, "{d%d}", regno);
15283 else
15284 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15286 return;
15288 case 'd':
15289 /* CONST_TRUE_RTX means always -- that's the default. */
15290 if (x == const_true_rtx)
15291 return;
15293 if (!COMPARISON_P (x))
15295 output_operand_lossage ("invalid operand for code '%c'", code);
15296 return;
15299 fputs (arm_condition_codes[get_arm_condition_code (x)],
15300 stream);
15301 return;
15303 case 'D':
15304 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15305 want to do that. */
15306 if (x == const_true_rtx)
15308 output_operand_lossage ("instruction never executed");
15309 return;
15311 if (!COMPARISON_P (x))
15313 output_operand_lossage ("invalid operand for code '%c'", code);
15314 return;
15317 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15318 (get_arm_condition_code (x))],
15319 stream);
15320 return;
15322 /* Cirrus registers can be accessed in a variety of ways:
15323 single floating point (f)
15324 double floating point (d)
15325 32bit integer (fx)
15326 64bit integer (dx). */
15327 case 'W': /* Cirrus register in F mode. */
15328 case 'X': /* Cirrus register in D mode. */
15329 case 'Y': /* Cirrus register in FX mode. */
15330 case 'Z': /* Cirrus register in DX mode. */
15331 gcc_assert (GET_CODE (x) == REG
15332 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15334 fprintf (stream, "mv%s%s",
15335 code == 'W' ? "f"
15336 : code == 'X' ? "d"
15337 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15339 return;
15341 /* Print cirrus register in the mode specified by the register's mode. */
15342 case 'V':
15344 int mode = GET_MODE (x);
15346 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15348 output_operand_lossage ("invalid operand for code '%c'", code);
15349 return;
15352 fprintf (stream, "mv%s%s",
15353 mode == DFmode ? "d"
15354 : mode == SImode ? "fx"
15355 : mode == DImode ? "dx"
15356 : "f", reg_names[REGNO (x)] + 2);
15358 return;
15361 case 'U':
15362 if (GET_CODE (x) != REG
15363 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15364 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15365 /* Bad value for wCG register number. */
15367 output_operand_lossage ("invalid operand for code '%c'", code);
15368 return;
15371 else
15372 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15373 return;
15375 /* Print an iWMMXt control register name. */
15376 case 'w':
15377 if (GET_CODE (x) != CONST_INT
15378 || INTVAL (x) < 0
15379 || INTVAL (x) >= 16)
15380 /* Bad value for wC register number. */
15382 output_operand_lossage ("invalid operand for code '%c'", code);
15383 return;
15386 else
15388 static const char * wc_reg_names [16] =
15390 "wCID", "wCon", "wCSSF", "wCASF",
15391 "wC4", "wC5", "wC6", "wC7",
15392 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15393 "wC12", "wC13", "wC14", "wC15"
15396 fprintf (stream, wc_reg_names [INTVAL (x)]);
15398 return;
15400 /* Print the high single-precision register of a VFP double-precision
15401 register. */
15402 case 'p':
15404 int mode = GET_MODE (x);
15405 int regno;
15407 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15409 output_operand_lossage ("invalid operand for code '%c'", code);
15410 return;
15413 regno = REGNO (x);
15414 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15416 output_operand_lossage ("invalid operand for code '%c'", code);
15417 return;
15420 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15422 return;
15424 /* Print a VFP/Neon double precision or quad precision register name. */
15425 case 'P':
15426 case 'q':
15428 int mode = GET_MODE (x);
15429 int is_quad = (code == 'q');
15430 int regno;
15432 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15434 output_operand_lossage ("invalid operand for code '%c'", code);
15435 return;
15438 if (GET_CODE (x) != REG
15439 || !IS_VFP_REGNUM (REGNO (x)))
15441 output_operand_lossage ("invalid operand for code '%c'", code);
15442 return;
15445 regno = REGNO (x);
15446 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15447 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15449 output_operand_lossage ("invalid operand for code '%c'", code);
15450 return;
15453 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15454 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15456 return;
15458 /* These two codes print the low/high doubleword register of a Neon quad
15459 register, respectively. For pair-structure types, can also print
15460 low/high quadword registers. */
15461 case 'e':
15462 case 'f':
15464 int mode = GET_MODE (x);
15465 int regno;
15467 if ((GET_MODE_SIZE (mode) != 16
15468 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15470 output_operand_lossage ("invalid operand for code '%c'", code);
15471 return;
15474 regno = REGNO (x);
15475 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15477 output_operand_lossage ("invalid operand for code '%c'", code);
15478 return;
15481 if (GET_MODE_SIZE (mode) == 16)
15482 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15483 + (code == 'f' ? 1 : 0));
15484 else
15485 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15486 + (code == 'f' ? 1 : 0));
15488 return;
15490 /* Print a VFPv3 floating-point constant, represented as an integer
15491 index. */
15492 case 'G':
15494 int index = vfp3_const_double_index (x);
15495 gcc_assert (index != -1);
15496 fprintf (stream, "%d", index);
15498 return;
15500 /* Print bits representing opcode features for Neon.
15502 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15503 and polynomials as unsigned.
15505 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15507 Bit 2 is 1 for rounding functions, 0 otherwise. */
15509 /* Identify the type as 's', 'u', 'p' or 'f'. */
15510 case 'T':
15512 HOST_WIDE_INT bits = INTVAL (x);
15513 fputc ("uspf"[bits & 3], stream);
15515 return;
15517 /* Likewise, but signed and unsigned integers are both 'i'. */
15518 case 'F':
15520 HOST_WIDE_INT bits = INTVAL (x);
15521 fputc ("iipf"[bits & 3], stream);
15523 return;
15525 /* As for 'T', but emit 'u' instead of 'p'. */
15526 case 't':
15528 HOST_WIDE_INT bits = INTVAL (x);
15529 fputc ("usuf"[bits & 3], stream);
15531 return;
15533 /* Bit 2: rounding (vs none). */
15534 case 'O':
15536 HOST_WIDE_INT bits = INTVAL (x);
15537 fputs ((bits & 4) != 0 ? "r" : "", stream);
15539 return;
15541 /* Memory operand for vld1/vst1 instruction. */
15542 case 'A':
15544 rtx addr;
15545 bool postinc = FALSE;
15546 gcc_assert (GET_CODE (x) == MEM);
15547 addr = XEXP (x, 0);
15548 if (GET_CODE (addr) == POST_INC)
15550 postinc = 1;
15551 addr = XEXP (addr, 0);
15553 asm_fprintf (stream, "[%r]", REGNO (addr));
15554 if (postinc)
15555 fputs("!", stream);
15557 return;
15559 /* Translate an S register number into a D register number and element index. */
15560 case 'y':
15562 int mode = GET_MODE (x);
15563 int regno;
15565 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15567 output_operand_lossage ("invalid operand for code '%c'", code);
15568 return;
15571 regno = REGNO (x);
15572 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15574 output_operand_lossage ("invalid operand for code '%c'", code);
15575 return;
15578 regno = regno - FIRST_VFP_REGNUM;
15579 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15581 return;
15583 /* Register specifier for vld1.16/vst1.16. Translate the S register
15584 number into a D register number and element index. */
15585 case 'z':
15587 int mode = GET_MODE (x);
15588 int regno;
15590 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15592 output_operand_lossage ("invalid operand for code '%c'", code);
15593 return;
15596 regno = REGNO (x);
15597 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15599 output_operand_lossage ("invalid operand for code '%c'", code);
15600 return;
15603 regno = regno - FIRST_VFP_REGNUM;
15604 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15606 return;
15608 default:
15609 if (x == 0)
15611 output_operand_lossage ("missing operand");
15612 return;
15615 switch (GET_CODE (x))
15617 case REG:
15618 asm_fprintf (stream, "%r", REGNO (x));
15619 break;
15621 case MEM:
15622 output_memory_reference_mode = GET_MODE (x);
15623 output_address (XEXP (x, 0));
15624 break;
15626 case CONST_DOUBLE:
15627 if (TARGET_NEON)
15629 char fpstr[20];
15630 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15631 sizeof (fpstr), 0, 1);
15632 fprintf (stream, "#%s", fpstr);
15634 else
15635 fprintf (stream, "#%s", fp_immediate_constant (x));
15636 break;
15638 default:
15639 gcc_assert (GET_CODE (x) != NEG);
15640 fputc ('#', stream);
15641 if (GET_CODE (x) == HIGH)
15643 fputs (":lower16:", stream);
15644 x = XEXP (x, 0);
15647 output_addr_const (stream, x);
15648 break;
15653 /* Target hook for assembling integer objects. The ARM version needs to
15654 handle word-sized values specially. */
15655 static bool
15656 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15658 enum machine_mode mode;
15660 if (size == UNITS_PER_WORD && aligned_p)
15662 fputs ("\t.word\t", asm_out_file);
15663 output_addr_const (asm_out_file, x);
15665 /* Mark symbols as position independent. We only do this in the
15666 .text segment, not in the .data segment. */
15667 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15668 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15670 /* See legitimize_pic_address for an explanation of the
15671 TARGET_VXWORKS_RTP check. */
15672 if (TARGET_VXWORKS_RTP
15673 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15674 fputs ("(GOT)", asm_out_file);
15675 else
15676 fputs ("(GOTOFF)", asm_out_file);
15678 fputc ('\n', asm_out_file);
15679 return true;
15682 mode = GET_MODE (x);
15684 if (arm_vector_mode_supported_p (mode))
15686 int i, units;
15688 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15690 units = CONST_VECTOR_NUNITS (x);
15691 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15693 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15694 for (i = 0; i < units; i++)
15696 rtx elt = CONST_VECTOR_ELT (x, i);
15697 assemble_integer
15698 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15700 else
15701 for (i = 0; i < units; i++)
15703 rtx elt = CONST_VECTOR_ELT (x, i);
15704 REAL_VALUE_TYPE rval;
15706 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15708 assemble_real
15709 (rval, GET_MODE_INNER (mode),
15710 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15713 return true;
15716 return default_assemble_integer (x, size, aligned_p);
15719 static void
15720 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15722 section *s;
15724 if (!TARGET_AAPCS_BASED)
15726 (is_ctor ?
15727 default_named_section_asm_out_constructor
15728 : default_named_section_asm_out_destructor) (symbol, priority);
15729 return;
15732 /* Put these in the .init_array section, using a special relocation. */
15733 if (priority != DEFAULT_INIT_PRIORITY)
15735 char buf[18];
15736 sprintf (buf, "%s.%.5u",
15737 is_ctor ? ".init_array" : ".fini_array",
15738 priority);
15739 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15741 else if (is_ctor)
15742 s = ctors_section;
15743 else
15744 s = dtors_section;
15746 switch_to_section (s);
15747 assemble_align (POINTER_SIZE);
15748 fputs ("\t.word\t", asm_out_file);
15749 output_addr_const (asm_out_file, symbol);
15750 fputs ("(target1)\n", asm_out_file);
15753 /* Add a function to the list of static constructors. */
15755 static void
15756 arm_elf_asm_constructor (rtx symbol, int priority)
15758 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15761 /* Add a function to the list of static destructors. */
15763 static void
15764 arm_elf_asm_destructor (rtx symbol, int priority)
15766 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15769 /* A finite state machine takes care of noticing whether or not instructions
15770 can be conditionally executed, and thus decrease execution time and code
15771 size by deleting branch instructions. The fsm is controlled by
15772 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15774 /* The state of the fsm controlling condition codes are:
15775 0: normal, do nothing special
15776 1: make ASM_OUTPUT_OPCODE not output this instruction
15777 2: make ASM_OUTPUT_OPCODE not output this instruction
15778 3: make instructions conditional
15779 4: make instructions conditional
15781 State transitions (state->state by whom under condition):
15782 0 -> 1 final_prescan_insn if the `target' is a label
15783 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15784 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15785 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15786 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15787 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15788 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15789 (the target insn is arm_target_insn).
15791 If the jump clobbers the conditions then we use states 2 and 4.
15793 A similar thing can be done with conditional return insns.
15795 XXX In case the `target' is an unconditional branch, this conditionalising
15796 of the instructions always reduces code size, but not always execution
15797 time. But then, I want to reduce the code size to somewhere near what
15798 /bin/cc produces. */
15800 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15801 instructions. When a COND_EXEC instruction is seen the subsequent
15802 instructions are scanned so that multiple conditional instructions can be
15803 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15804 specify the length and true/false mask for the IT block. These will be
15805 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15807 /* Returns the index of the ARM condition code string in
15808 `arm_condition_codes'. COMPARISON should be an rtx like
15809 `(eq (...) (...))'. */
15810 static enum arm_cond_code
15811 get_arm_condition_code (rtx comparison)
15813 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15814 enum arm_cond_code code;
15815 enum rtx_code comp_code = GET_CODE (comparison);
15817 if (GET_MODE_CLASS (mode) != MODE_CC)
15818 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15819 XEXP (comparison, 1));
15821 switch (mode)
15823 case CC_DNEmode: code = ARM_NE; goto dominance;
15824 case CC_DEQmode: code = ARM_EQ; goto dominance;
15825 case CC_DGEmode: code = ARM_GE; goto dominance;
15826 case CC_DGTmode: code = ARM_GT; goto dominance;
15827 case CC_DLEmode: code = ARM_LE; goto dominance;
15828 case CC_DLTmode: code = ARM_LT; goto dominance;
15829 case CC_DGEUmode: code = ARM_CS; goto dominance;
15830 case CC_DGTUmode: code = ARM_HI; goto dominance;
15831 case CC_DLEUmode: code = ARM_LS; goto dominance;
15832 case CC_DLTUmode: code = ARM_CC;
15834 dominance:
15835 gcc_assert (comp_code == EQ || comp_code == NE);
15837 if (comp_code == EQ)
15838 return ARM_INVERSE_CONDITION_CODE (code);
15839 return code;
15841 case CC_NOOVmode:
15842 switch (comp_code)
15844 case NE: return ARM_NE;
15845 case EQ: return ARM_EQ;
15846 case GE: return ARM_PL;
15847 case LT: return ARM_MI;
15848 default: gcc_unreachable ();
15851 case CC_Zmode:
15852 switch (comp_code)
15854 case NE: return ARM_NE;
15855 case EQ: return ARM_EQ;
15856 default: gcc_unreachable ();
15859 case CC_Nmode:
15860 switch (comp_code)
15862 case NE: return ARM_MI;
15863 case EQ: return ARM_PL;
15864 default: gcc_unreachable ();
15867 case CCFPEmode:
15868 case CCFPmode:
15869 /* These encodings assume that AC=1 in the FPA system control
15870 byte. This allows us to handle all cases except UNEQ and
15871 LTGT. */
15872 switch (comp_code)
15874 case GE: return ARM_GE;
15875 case GT: return ARM_GT;
15876 case LE: return ARM_LS;
15877 case LT: return ARM_MI;
15878 case NE: return ARM_NE;
15879 case EQ: return ARM_EQ;
15880 case ORDERED: return ARM_VC;
15881 case UNORDERED: return ARM_VS;
15882 case UNLT: return ARM_LT;
15883 case UNLE: return ARM_LE;
15884 case UNGT: return ARM_HI;
15885 case UNGE: return ARM_PL;
15886 /* UNEQ and LTGT do not have a representation. */
15887 case UNEQ: /* Fall through. */
15888 case LTGT: /* Fall through. */
15889 default: gcc_unreachable ();
15892 case CC_SWPmode:
15893 switch (comp_code)
15895 case NE: return ARM_NE;
15896 case EQ: return ARM_EQ;
15897 case GE: return ARM_LE;
15898 case GT: return ARM_LT;
15899 case LE: return ARM_GE;
15900 case LT: return ARM_GT;
15901 case GEU: return ARM_LS;
15902 case GTU: return ARM_CC;
15903 case LEU: return ARM_CS;
15904 case LTU: return ARM_HI;
15905 default: gcc_unreachable ();
15908 case CC_Cmode:
15909 switch (comp_code)
15911 case LTU: return ARM_CS;
15912 case GEU: return ARM_CC;
15913 default: gcc_unreachable ();
15916 case CCmode:
15917 switch (comp_code)
15919 case NE: return ARM_NE;
15920 case EQ: return ARM_EQ;
15921 case GE: return ARM_GE;
15922 case GT: return ARM_GT;
15923 case LE: return ARM_LE;
15924 case LT: return ARM_LT;
15925 case GEU: return ARM_CS;
15926 case GTU: return ARM_HI;
15927 case LEU: return ARM_LS;
15928 case LTU: return ARM_CC;
15929 default: gcc_unreachable ();
15932 default: gcc_unreachable ();
15936 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15937 instructions. */
15938 void
15939 thumb2_final_prescan_insn (rtx insn)
15941 rtx first_insn = insn;
15942 rtx body = PATTERN (insn);
15943 rtx predicate;
15944 enum arm_cond_code code;
15945 int n;
15946 int mask;
15948 /* Remove the previous insn from the count of insns to be output. */
15949 if (arm_condexec_count)
15950 arm_condexec_count--;
15952 /* Nothing to do if we are already inside a conditional block. */
15953 if (arm_condexec_count)
15954 return;
15956 if (GET_CODE (body) != COND_EXEC)
15957 return;
15959 /* Conditional jumps are implemented directly. */
15960 if (GET_CODE (insn) == JUMP_INSN)
15961 return;
15963 predicate = COND_EXEC_TEST (body);
15964 arm_current_cc = get_arm_condition_code (predicate);
15966 n = get_attr_ce_count (insn);
15967 arm_condexec_count = 1;
15968 arm_condexec_mask = (1 << n) - 1;
15969 arm_condexec_masklen = n;
15970 /* See if subsequent instructions can be combined into the same block. */
15971 for (;;)
15973 insn = next_nonnote_insn (insn);
15975 /* Jumping into the middle of an IT block is illegal, so a label or
15976 barrier terminates the block. */
15977 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
15978 break;
15980 body = PATTERN (insn);
15981 /* USE and CLOBBER aren't really insns, so just skip them. */
15982 if (GET_CODE (body) == USE
15983 || GET_CODE (body) == CLOBBER)
15984 continue;
15986 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
15987 if (GET_CODE (body) != COND_EXEC)
15988 break;
15989 /* Allow up to 4 conditionally executed instructions in a block. */
15990 n = get_attr_ce_count (insn);
15991 if (arm_condexec_masklen + n > 4)
15992 break;
15994 predicate = COND_EXEC_TEST (body);
15995 code = get_arm_condition_code (predicate);
15996 mask = (1 << n) - 1;
15997 if (arm_current_cc == code)
15998 arm_condexec_mask |= (mask << arm_condexec_masklen);
15999 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16000 break;
16002 arm_condexec_count++;
16003 arm_condexec_masklen += n;
16005 /* A jump must be the last instruction in a conditional block. */
16006 if (GET_CODE(insn) == JUMP_INSN)
16007 break;
16009 /* Restore recog_data (getting the attributes of other insns can
16010 destroy this array, but final.c assumes that it remains intact
16011 across this call). */
16012 extract_constrain_insn_cached (first_insn);
16015 void
16016 arm_final_prescan_insn (rtx insn)
16018 /* BODY will hold the body of INSN. */
16019 rtx body = PATTERN (insn);
16021 /* This will be 1 if trying to repeat the trick, and things need to be
16022 reversed if it appears to fail. */
16023 int reverse = 0;
16025 /* If we start with a return insn, we only succeed if we find another one. */
16026 int seeking_return = 0;
16028 /* START_INSN will hold the insn from where we start looking. This is the
16029 first insn after the following code_label if REVERSE is true. */
16030 rtx start_insn = insn;
16032 /* If in state 4, check if the target branch is reached, in order to
16033 change back to state 0. */
16034 if (arm_ccfsm_state == 4)
16036 if (insn == arm_target_insn)
16038 arm_target_insn = NULL;
16039 arm_ccfsm_state = 0;
16041 return;
16044 /* If in state 3, it is possible to repeat the trick, if this insn is an
16045 unconditional branch to a label, and immediately following this branch
16046 is the previous target label which is only used once, and the label this
16047 branch jumps to is not too far off. */
16048 if (arm_ccfsm_state == 3)
16050 if (simplejump_p (insn))
16052 start_insn = next_nonnote_insn (start_insn);
16053 if (GET_CODE (start_insn) == BARRIER)
16055 /* XXX Isn't this always a barrier? */
16056 start_insn = next_nonnote_insn (start_insn);
16058 if (GET_CODE (start_insn) == CODE_LABEL
16059 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16060 && LABEL_NUSES (start_insn) == 1)
16061 reverse = TRUE;
16062 else
16063 return;
16065 else if (GET_CODE (body) == RETURN)
16067 start_insn = next_nonnote_insn (start_insn);
16068 if (GET_CODE (start_insn) == BARRIER)
16069 start_insn = next_nonnote_insn (start_insn);
16070 if (GET_CODE (start_insn) == CODE_LABEL
16071 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16072 && LABEL_NUSES (start_insn) == 1)
16074 reverse = TRUE;
16075 seeking_return = 1;
16077 else
16078 return;
16080 else
16081 return;
16084 gcc_assert (!arm_ccfsm_state || reverse);
16085 if (GET_CODE (insn) != JUMP_INSN)
16086 return;
16088 /* This jump might be paralleled with a clobber of the condition codes
16089 the jump should always come first */
16090 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16091 body = XVECEXP (body, 0, 0);
16093 if (reverse
16094 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16095 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16097 int insns_skipped;
16098 int fail = FALSE, succeed = FALSE;
16099 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16100 int then_not_else = TRUE;
16101 rtx this_insn = start_insn, label = 0;
16103 /* Register the insn jumped to. */
16104 if (reverse)
16106 if (!seeking_return)
16107 label = XEXP (SET_SRC (body), 0);
16109 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16110 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16111 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16113 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16114 then_not_else = FALSE;
16116 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16117 seeking_return = 1;
16118 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16120 seeking_return = 1;
16121 then_not_else = FALSE;
16123 else
16124 gcc_unreachable ();
16126 /* See how many insns this branch skips, and what kind of insns. If all
16127 insns are okay, and the label or unconditional branch to the same
16128 label is not too far away, succeed. */
16129 for (insns_skipped = 0;
16130 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16132 rtx scanbody;
16134 this_insn = next_nonnote_insn (this_insn);
16135 if (!this_insn)
16136 break;
16138 switch (GET_CODE (this_insn))
16140 case CODE_LABEL:
16141 /* Succeed if it is the target label, otherwise fail since
16142 control falls in from somewhere else. */
16143 if (this_insn == label)
16145 arm_ccfsm_state = 1;
16146 succeed = TRUE;
16148 else
16149 fail = TRUE;
16150 break;
16152 case BARRIER:
16153 /* Succeed if the following insn is the target label.
16154 Otherwise fail.
16155 If return insns are used then the last insn in a function
16156 will be a barrier. */
16157 this_insn = next_nonnote_insn (this_insn);
16158 if (this_insn && this_insn == label)
16160 arm_ccfsm_state = 1;
16161 succeed = TRUE;
16163 else
16164 fail = TRUE;
16165 break;
16167 case CALL_INSN:
16168 /* The AAPCS says that conditional calls should not be
16169 used since they make interworking inefficient (the
16170 linker can't transform BL<cond> into BLX). That's
16171 only a problem if the machine has BLX. */
16172 if (arm_arch5)
16174 fail = TRUE;
16175 break;
16178 /* Succeed if the following insn is the target label, or
16179 if the following two insns are a barrier and the
16180 target label. */
16181 this_insn = next_nonnote_insn (this_insn);
16182 if (this_insn && GET_CODE (this_insn) == BARRIER)
16183 this_insn = next_nonnote_insn (this_insn);
16185 if (this_insn && this_insn == label
16186 && insns_skipped < max_insns_skipped)
16188 arm_ccfsm_state = 1;
16189 succeed = TRUE;
16191 else
16192 fail = TRUE;
16193 break;
16195 case JUMP_INSN:
16196 /* If this is an unconditional branch to the same label, succeed.
16197 If it is to another label, do nothing. If it is conditional,
16198 fail. */
16199 /* XXX Probably, the tests for SET and the PC are
16200 unnecessary. */
16202 scanbody = PATTERN (this_insn);
16203 if (GET_CODE (scanbody) == SET
16204 && GET_CODE (SET_DEST (scanbody)) == PC)
16206 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16207 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16209 arm_ccfsm_state = 2;
16210 succeed = TRUE;
16212 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16213 fail = TRUE;
16215 /* Fail if a conditional return is undesirable (e.g. on a
16216 StrongARM), but still allow this if optimizing for size. */
16217 else if (GET_CODE (scanbody) == RETURN
16218 && !use_return_insn (TRUE, NULL)
16219 && !optimize_size)
16220 fail = TRUE;
16221 else if (GET_CODE (scanbody) == RETURN
16222 && seeking_return)
16224 arm_ccfsm_state = 2;
16225 succeed = TRUE;
16227 else if (GET_CODE (scanbody) == PARALLEL)
16229 switch (get_attr_conds (this_insn))
16231 case CONDS_NOCOND:
16232 break;
16233 default:
16234 fail = TRUE;
16235 break;
16238 else
16239 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16241 break;
16243 case INSN:
16244 /* Instructions using or affecting the condition codes make it
16245 fail. */
16246 scanbody = PATTERN (this_insn);
16247 if (!(GET_CODE (scanbody) == SET
16248 || GET_CODE (scanbody) == PARALLEL)
16249 || get_attr_conds (this_insn) != CONDS_NOCOND)
16250 fail = TRUE;
16252 /* A conditional cirrus instruction must be followed by
16253 a non Cirrus instruction. However, since we
16254 conditionalize instructions in this function and by
16255 the time we get here we can't add instructions
16256 (nops), because shorten_branches() has already been
16257 called, we will disable conditionalizing Cirrus
16258 instructions to be safe. */
16259 if (GET_CODE (scanbody) != USE
16260 && GET_CODE (scanbody) != CLOBBER
16261 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16262 fail = TRUE;
16263 break;
16265 default:
16266 break;
16269 if (succeed)
16271 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16272 arm_target_label = CODE_LABEL_NUMBER (label);
16273 else
16275 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16277 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16279 this_insn = next_nonnote_insn (this_insn);
16280 gcc_assert (!this_insn
16281 || (GET_CODE (this_insn) != BARRIER
16282 && GET_CODE (this_insn) != CODE_LABEL));
16284 if (!this_insn)
16286 /* Oh, dear! we ran off the end.. give up. */
16287 extract_constrain_insn_cached (insn);
16288 arm_ccfsm_state = 0;
16289 arm_target_insn = NULL;
16290 return;
16292 arm_target_insn = this_insn;
16295 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16296 what it was. */
16297 if (!reverse)
16298 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16300 if (reverse || then_not_else)
16301 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16304 /* Restore recog_data (getting the attributes of other insns can
16305 destroy this array, but final.c assumes that it remains intact
16306 across this call. */
16307 extract_constrain_insn_cached (insn);
16311 /* Output IT instructions. */
16312 void
16313 thumb2_asm_output_opcode (FILE * stream)
16315 char buff[5];
16316 int n;
16318 if (arm_condexec_mask)
16320 for (n = 0; n < arm_condexec_masklen; n++)
16321 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16322 buff[n] = 0;
16323 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16324 arm_condition_codes[arm_current_cc]);
16325 arm_condexec_mask = 0;
16329 /* Returns true if REGNO is a valid register
16330 for holding a quantity of type MODE. */
16332 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16334 if (GET_MODE_CLASS (mode) == MODE_CC)
16335 return (regno == CC_REGNUM
16336 || (TARGET_HARD_FLOAT && TARGET_VFP
16337 && regno == VFPCC_REGNUM));
16339 if (TARGET_THUMB1)
16340 /* For the Thumb we only allow values bigger than SImode in
16341 registers 0 - 6, so that there is always a second low
16342 register available to hold the upper part of the value.
16343 We probably we ought to ensure that the register is the
16344 start of an even numbered register pair. */
16345 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16347 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16348 && IS_CIRRUS_REGNUM (regno))
16349 /* We have outlawed SI values in Cirrus registers because they
16350 reside in the lower 32 bits, but SF values reside in the
16351 upper 32 bits. This causes gcc all sorts of grief. We can't
16352 even split the registers into pairs because Cirrus SI values
16353 get sign extended to 64bits-- aldyh. */
16354 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16356 if (TARGET_HARD_FLOAT && TARGET_VFP
16357 && IS_VFP_REGNUM (regno))
16359 if (mode == SFmode || mode == SImode)
16360 return VFP_REGNO_OK_FOR_SINGLE (regno);
16362 if (mode == DFmode)
16363 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16365 /* VFP registers can hold HFmode values, but there is no point in
16366 putting them there unless we have hardware conversion insns. */
16367 if (mode == HFmode)
16368 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16370 if (TARGET_NEON)
16371 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16372 || (VALID_NEON_QREG_MODE (mode)
16373 && NEON_REGNO_OK_FOR_QUAD (regno))
16374 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16375 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16376 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16377 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16378 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16380 return FALSE;
16383 if (TARGET_REALLY_IWMMXT)
16385 if (IS_IWMMXT_GR_REGNUM (regno))
16386 return mode == SImode;
16388 if (IS_IWMMXT_REGNUM (regno))
16389 return VALID_IWMMXT_REG_MODE (mode);
16392 /* We allow almost any value to be stored in the general registers.
16393 Restrict doubleword quantities to even register pairs so that we can
16394 use ldrd. Do not allow very large Neon structure opaque modes in
16395 general registers; they would use too many. */
16396 if (regno <= LAST_ARM_REGNUM)
16397 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16398 && ARM_NUM_REGS (mode) <= 4;
16400 if (regno == FRAME_POINTER_REGNUM
16401 || regno == ARG_POINTER_REGNUM)
16402 /* We only allow integers in the fake hard registers. */
16403 return GET_MODE_CLASS (mode) == MODE_INT;
16405 /* The only registers left are the FPA registers
16406 which we only allow to hold FP values. */
16407 return (TARGET_HARD_FLOAT && TARGET_FPA
16408 && GET_MODE_CLASS (mode) == MODE_FLOAT
16409 && regno >= FIRST_FPA_REGNUM
16410 && regno <= LAST_FPA_REGNUM);
16413 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16414 not used in arm mode. */
16416 enum reg_class
16417 arm_regno_class (int regno)
16419 if (TARGET_THUMB1)
16421 if (regno == STACK_POINTER_REGNUM)
16422 return STACK_REG;
16423 if (regno == CC_REGNUM)
16424 return CC_REG;
16425 if (regno < 8)
16426 return LO_REGS;
16427 return HI_REGS;
16430 if (TARGET_THUMB2 && regno < 8)
16431 return LO_REGS;
16433 if ( regno <= LAST_ARM_REGNUM
16434 || regno == FRAME_POINTER_REGNUM
16435 || regno == ARG_POINTER_REGNUM)
16436 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16438 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16439 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16441 if (IS_CIRRUS_REGNUM (regno))
16442 return CIRRUS_REGS;
16444 if (IS_VFP_REGNUM (regno))
16446 if (regno <= D7_VFP_REGNUM)
16447 return VFP_D0_D7_REGS;
16448 else if (regno <= LAST_LO_VFP_REGNUM)
16449 return VFP_LO_REGS;
16450 else
16451 return VFP_HI_REGS;
16454 if (IS_IWMMXT_REGNUM (regno))
16455 return IWMMXT_REGS;
16457 if (IS_IWMMXT_GR_REGNUM (regno))
16458 return IWMMXT_GR_REGS;
16460 return FPA_REGS;
16463 /* Handle a special case when computing the offset
16464 of an argument from the frame pointer. */
16466 arm_debugger_arg_offset (int value, rtx addr)
16468 rtx insn;
16470 /* We are only interested if dbxout_parms() failed to compute the offset. */
16471 if (value != 0)
16472 return 0;
16474 /* We can only cope with the case where the address is held in a register. */
16475 if (GET_CODE (addr) != REG)
16476 return 0;
16478 /* If we are using the frame pointer to point at the argument, then
16479 an offset of 0 is correct. */
16480 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16481 return 0;
16483 /* If we are using the stack pointer to point at the
16484 argument, then an offset of 0 is correct. */
16485 /* ??? Check this is consistent with thumb2 frame layout. */
16486 if ((TARGET_THUMB || !frame_pointer_needed)
16487 && REGNO (addr) == SP_REGNUM)
16488 return 0;
16490 /* Oh dear. The argument is pointed to by a register rather
16491 than being held in a register, or being stored at a known
16492 offset from the frame pointer. Since GDB only understands
16493 those two kinds of argument we must translate the address
16494 held in the register into an offset from the frame pointer.
16495 We do this by searching through the insns for the function
16496 looking to see where this register gets its value. If the
16497 register is initialized from the frame pointer plus an offset
16498 then we are in luck and we can continue, otherwise we give up.
16500 This code is exercised by producing debugging information
16501 for a function with arguments like this:
16503 double func (double a, double b, int c, double d) {return d;}
16505 Without this code the stab for parameter 'd' will be set to
16506 an offset of 0 from the frame pointer, rather than 8. */
16508 /* The if() statement says:
16510 If the insn is a normal instruction
16511 and if the insn is setting the value in a register
16512 and if the register being set is the register holding the address of the argument
16513 and if the address is computing by an addition
16514 that involves adding to a register
16515 which is the frame pointer
16516 a constant integer
16518 then... */
16520 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16522 if ( GET_CODE (insn) == INSN
16523 && GET_CODE (PATTERN (insn)) == SET
16524 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16525 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16526 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16527 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16528 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16531 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16533 break;
16537 if (value == 0)
16539 debug_rtx (addr);
16540 warning (0, "unable to compute real location of stacked parameter");
16541 value = 8; /* XXX magic hack */
16544 return value;
16547 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16548 do \
16550 if ((MASK) & insn_flags) \
16551 add_builtin_function ((NAME), (TYPE), (CODE), \
16552 BUILT_IN_MD, NULL, NULL_TREE); \
16554 while (0)
16556 struct builtin_description
16558 const unsigned int mask;
16559 const enum insn_code icode;
16560 const char * const name;
16561 const enum arm_builtins code;
16562 const enum rtx_code comparison;
16563 const unsigned int flag;
16566 static const struct builtin_description bdesc_2arg[] =
16568 #define IWMMXT_BUILTIN(code, string, builtin) \
16569 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16570 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16572 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16573 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16574 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16575 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16576 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16577 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16578 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16579 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16580 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16581 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16582 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16583 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16584 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16585 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16586 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16587 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16588 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16589 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16590 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16591 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16592 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16593 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16594 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16595 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16596 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16597 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16598 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16599 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16600 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16601 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16602 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16603 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16604 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16605 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16606 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16607 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16608 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16609 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16610 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16611 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16612 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16613 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16614 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16615 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16616 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16617 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16618 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16619 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16620 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16621 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16622 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16623 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16624 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16625 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16626 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16627 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16628 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16629 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16631 #define IWMMXT_BUILTIN2(code, builtin) \
16632 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16634 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16635 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16636 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16637 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16638 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16639 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16640 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16641 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16642 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16643 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16644 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16645 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16646 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16647 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16648 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16649 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16650 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16651 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16652 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16653 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16654 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16655 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16656 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16657 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16658 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16659 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16660 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16661 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16662 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16663 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16664 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16665 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16668 static const struct builtin_description bdesc_1arg[] =
16670 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16671 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16672 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16673 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16674 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16675 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16676 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16677 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16678 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16679 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16680 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16681 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16682 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16683 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16684 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16685 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16686 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16687 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16690 /* Set up all the iWMMXt builtins. This is
16691 not called if TARGET_IWMMXT is zero. */
16693 static void
16694 arm_init_iwmmxt_builtins (void)
16696 const struct builtin_description * d;
16697 size_t i;
16698 tree endlink = void_list_node;
16700 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16701 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16702 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16704 tree int_ftype_int
16705 = build_function_type (integer_type_node,
16706 tree_cons (NULL_TREE, integer_type_node, endlink));
16707 tree v8qi_ftype_v8qi_v8qi_int
16708 = build_function_type (V8QI_type_node,
16709 tree_cons (NULL_TREE, V8QI_type_node,
16710 tree_cons (NULL_TREE, V8QI_type_node,
16711 tree_cons (NULL_TREE,
16712 integer_type_node,
16713 endlink))));
16714 tree v4hi_ftype_v4hi_int
16715 = build_function_type (V4HI_type_node,
16716 tree_cons (NULL_TREE, V4HI_type_node,
16717 tree_cons (NULL_TREE, integer_type_node,
16718 endlink)));
16719 tree v2si_ftype_v2si_int
16720 = build_function_type (V2SI_type_node,
16721 tree_cons (NULL_TREE, V2SI_type_node,
16722 tree_cons (NULL_TREE, integer_type_node,
16723 endlink)));
16724 tree v2si_ftype_di_di
16725 = build_function_type (V2SI_type_node,
16726 tree_cons (NULL_TREE, long_long_integer_type_node,
16727 tree_cons (NULL_TREE, long_long_integer_type_node,
16728 endlink)));
16729 tree di_ftype_di_int
16730 = build_function_type (long_long_integer_type_node,
16731 tree_cons (NULL_TREE, long_long_integer_type_node,
16732 tree_cons (NULL_TREE, integer_type_node,
16733 endlink)));
16734 tree di_ftype_di_int_int
16735 = build_function_type (long_long_integer_type_node,
16736 tree_cons (NULL_TREE, long_long_integer_type_node,
16737 tree_cons (NULL_TREE, integer_type_node,
16738 tree_cons (NULL_TREE,
16739 integer_type_node,
16740 endlink))));
16741 tree int_ftype_v8qi
16742 = build_function_type (integer_type_node,
16743 tree_cons (NULL_TREE, V8QI_type_node,
16744 endlink));
16745 tree int_ftype_v4hi
16746 = build_function_type (integer_type_node,
16747 tree_cons (NULL_TREE, V4HI_type_node,
16748 endlink));
16749 tree int_ftype_v2si
16750 = build_function_type (integer_type_node,
16751 tree_cons (NULL_TREE, V2SI_type_node,
16752 endlink));
16753 tree int_ftype_v8qi_int
16754 = build_function_type (integer_type_node,
16755 tree_cons (NULL_TREE, V8QI_type_node,
16756 tree_cons (NULL_TREE, integer_type_node,
16757 endlink)));
16758 tree int_ftype_v4hi_int
16759 = build_function_type (integer_type_node,
16760 tree_cons (NULL_TREE, V4HI_type_node,
16761 tree_cons (NULL_TREE, integer_type_node,
16762 endlink)));
16763 tree int_ftype_v2si_int
16764 = build_function_type (integer_type_node,
16765 tree_cons (NULL_TREE, V2SI_type_node,
16766 tree_cons (NULL_TREE, integer_type_node,
16767 endlink)));
16768 tree v8qi_ftype_v8qi_int_int
16769 = build_function_type (V8QI_type_node,
16770 tree_cons (NULL_TREE, V8QI_type_node,
16771 tree_cons (NULL_TREE, integer_type_node,
16772 tree_cons (NULL_TREE,
16773 integer_type_node,
16774 endlink))));
16775 tree v4hi_ftype_v4hi_int_int
16776 = build_function_type (V4HI_type_node,
16777 tree_cons (NULL_TREE, V4HI_type_node,
16778 tree_cons (NULL_TREE, integer_type_node,
16779 tree_cons (NULL_TREE,
16780 integer_type_node,
16781 endlink))));
16782 tree v2si_ftype_v2si_int_int
16783 = build_function_type (V2SI_type_node,
16784 tree_cons (NULL_TREE, V2SI_type_node,
16785 tree_cons (NULL_TREE, integer_type_node,
16786 tree_cons (NULL_TREE,
16787 integer_type_node,
16788 endlink))));
16789 /* Miscellaneous. */
16790 tree v8qi_ftype_v4hi_v4hi
16791 = build_function_type (V8QI_type_node,
16792 tree_cons (NULL_TREE, V4HI_type_node,
16793 tree_cons (NULL_TREE, V4HI_type_node,
16794 endlink)));
16795 tree v4hi_ftype_v2si_v2si
16796 = build_function_type (V4HI_type_node,
16797 tree_cons (NULL_TREE, V2SI_type_node,
16798 tree_cons (NULL_TREE, V2SI_type_node,
16799 endlink)));
16800 tree v2si_ftype_v4hi_v4hi
16801 = build_function_type (V2SI_type_node,
16802 tree_cons (NULL_TREE, V4HI_type_node,
16803 tree_cons (NULL_TREE, V4HI_type_node,
16804 endlink)));
16805 tree v2si_ftype_v8qi_v8qi
16806 = build_function_type (V2SI_type_node,
16807 tree_cons (NULL_TREE, V8QI_type_node,
16808 tree_cons (NULL_TREE, V8QI_type_node,
16809 endlink)));
16810 tree v4hi_ftype_v4hi_di
16811 = build_function_type (V4HI_type_node,
16812 tree_cons (NULL_TREE, V4HI_type_node,
16813 tree_cons (NULL_TREE,
16814 long_long_integer_type_node,
16815 endlink)));
16816 tree v2si_ftype_v2si_di
16817 = build_function_type (V2SI_type_node,
16818 tree_cons (NULL_TREE, V2SI_type_node,
16819 tree_cons (NULL_TREE,
16820 long_long_integer_type_node,
16821 endlink)));
16822 tree void_ftype_int_int
16823 = build_function_type (void_type_node,
16824 tree_cons (NULL_TREE, integer_type_node,
16825 tree_cons (NULL_TREE, integer_type_node,
16826 endlink)));
16827 tree di_ftype_void
16828 = build_function_type (long_long_unsigned_type_node, endlink);
16829 tree di_ftype_v8qi
16830 = build_function_type (long_long_integer_type_node,
16831 tree_cons (NULL_TREE, V8QI_type_node,
16832 endlink));
16833 tree di_ftype_v4hi
16834 = build_function_type (long_long_integer_type_node,
16835 tree_cons (NULL_TREE, V4HI_type_node,
16836 endlink));
16837 tree di_ftype_v2si
16838 = build_function_type (long_long_integer_type_node,
16839 tree_cons (NULL_TREE, V2SI_type_node,
16840 endlink));
16841 tree v2si_ftype_v4hi
16842 = build_function_type (V2SI_type_node,
16843 tree_cons (NULL_TREE, V4HI_type_node,
16844 endlink));
16845 tree v4hi_ftype_v8qi
16846 = build_function_type (V4HI_type_node,
16847 tree_cons (NULL_TREE, V8QI_type_node,
16848 endlink));
16850 tree di_ftype_di_v4hi_v4hi
16851 = build_function_type (long_long_unsigned_type_node,
16852 tree_cons (NULL_TREE,
16853 long_long_unsigned_type_node,
16854 tree_cons (NULL_TREE, V4HI_type_node,
16855 tree_cons (NULL_TREE,
16856 V4HI_type_node,
16857 endlink))));
16859 tree di_ftype_v4hi_v4hi
16860 = build_function_type (long_long_unsigned_type_node,
16861 tree_cons (NULL_TREE, V4HI_type_node,
16862 tree_cons (NULL_TREE, V4HI_type_node,
16863 endlink)));
16865 /* Normal vector binops. */
16866 tree v8qi_ftype_v8qi_v8qi
16867 = build_function_type (V8QI_type_node,
16868 tree_cons (NULL_TREE, V8QI_type_node,
16869 tree_cons (NULL_TREE, V8QI_type_node,
16870 endlink)));
16871 tree v4hi_ftype_v4hi_v4hi
16872 = build_function_type (V4HI_type_node,
16873 tree_cons (NULL_TREE, V4HI_type_node,
16874 tree_cons (NULL_TREE, V4HI_type_node,
16875 endlink)));
16876 tree v2si_ftype_v2si_v2si
16877 = build_function_type (V2SI_type_node,
16878 tree_cons (NULL_TREE, V2SI_type_node,
16879 tree_cons (NULL_TREE, V2SI_type_node,
16880 endlink)));
16881 tree di_ftype_di_di
16882 = build_function_type (long_long_unsigned_type_node,
16883 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16884 tree_cons (NULL_TREE,
16885 long_long_unsigned_type_node,
16886 endlink)));
16888 /* Add all builtins that are more or less simple operations on two
16889 operands. */
16890 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16892 /* Use one of the operands; the target can have a different mode for
16893 mask-generating compares. */
16894 enum machine_mode mode;
16895 tree type;
16897 if (d->name == 0)
16898 continue;
16900 mode = insn_data[d->icode].operand[1].mode;
16902 switch (mode)
16904 case V8QImode:
16905 type = v8qi_ftype_v8qi_v8qi;
16906 break;
16907 case V4HImode:
16908 type = v4hi_ftype_v4hi_v4hi;
16909 break;
16910 case V2SImode:
16911 type = v2si_ftype_v2si_v2si;
16912 break;
16913 case DImode:
16914 type = di_ftype_di_di;
16915 break;
16917 default:
16918 gcc_unreachable ();
16921 def_mbuiltin (d->mask, d->name, type, d->code);
16924 /* Add the remaining MMX insns with somewhat more complicated types. */
16925 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16926 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16927 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16929 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16930 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16931 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16932 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16933 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16934 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16936 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16937 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16938 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16939 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16940 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16941 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16943 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16944 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16945 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16946 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16947 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16948 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16950 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16951 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16952 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16953 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16954 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16955 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16957 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16959 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16960 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16961 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16962 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16964 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16965 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16966 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16967 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16968 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
16969 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
16970 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
16971 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
16972 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
16974 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
16975 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
16976 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
16978 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
16979 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
16980 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
16982 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
16983 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
16984 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
16985 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
16986 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
16987 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
16989 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
16990 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
16991 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
16992 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
16993 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
16994 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
16995 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
16996 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
16997 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
16998 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
16999 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17000 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17002 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17004 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17005 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17008 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17009 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17010 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17011 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17012 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17013 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17016 static void
17017 arm_init_tls_builtins (void)
17019 tree ftype, decl;
17021 ftype = build_function_type (ptr_type_node, void_list_node);
17022 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17023 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17024 NULL, NULL_TREE);
17025 TREE_NOTHROW (decl) = 1;
17026 TREE_READONLY (decl) = 1;
17029 enum neon_builtin_type_bits {
17030 T_V8QI = 0x0001,
17031 T_V4HI = 0x0002,
17032 T_V2SI = 0x0004,
17033 T_V2SF = 0x0008,
17034 T_DI = 0x0010,
17035 T_V16QI = 0x0020,
17036 T_V8HI = 0x0040,
17037 T_V4SI = 0x0080,
17038 T_V4SF = 0x0100,
17039 T_V2DI = 0x0200,
17040 T_TI = 0x0400,
17041 T_EI = 0x0800,
17042 T_OI = 0x1000
17045 #define v8qi_UP T_V8QI
17046 #define v4hi_UP T_V4HI
17047 #define v2si_UP T_V2SI
17048 #define v2sf_UP T_V2SF
17049 #define di_UP T_DI
17050 #define v16qi_UP T_V16QI
17051 #define v8hi_UP T_V8HI
17052 #define v4si_UP T_V4SI
17053 #define v4sf_UP T_V4SF
17054 #define v2di_UP T_V2DI
17055 #define ti_UP T_TI
17056 #define ei_UP T_EI
17057 #define oi_UP T_OI
17059 #define UP(X) X##_UP
17061 #define T_MAX 13
17063 typedef enum {
17064 NEON_BINOP,
17065 NEON_TERNOP,
17066 NEON_UNOP,
17067 NEON_GETLANE,
17068 NEON_SETLANE,
17069 NEON_CREATE,
17070 NEON_DUP,
17071 NEON_DUPLANE,
17072 NEON_COMBINE,
17073 NEON_SPLIT,
17074 NEON_LANEMUL,
17075 NEON_LANEMULL,
17076 NEON_LANEMULH,
17077 NEON_LANEMAC,
17078 NEON_SCALARMUL,
17079 NEON_SCALARMULL,
17080 NEON_SCALARMULH,
17081 NEON_SCALARMAC,
17082 NEON_CONVERT,
17083 NEON_FIXCONV,
17084 NEON_SELECT,
17085 NEON_RESULTPAIR,
17086 NEON_REINTERP,
17087 NEON_VTBL,
17088 NEON_VTBX,
17089 NEON_LOAD1,
17090 NEON_LOAD1LANE,
17091 NEON_STORE1,
17092 NEON_STORE1LANE,
17093 NEON_LOADSTRUCT,
17094 NEON_LOADSTRUCTLANE,
17095 NEON_STORESTRUCT,
17096 NEON_STORESTRUCTLANE,
17097 NEON_LOGICBINOP,
17098 NEON_SHIFTINSERT,
17099 NEON_SHIFTIMM,
17100 NEON_SHIFTACC
17101 } neon_itype;
17103 typedef struct {
17104 const char *name;
17105 const neon_itype itype;
17106 const int bits;
17107 const enum insn_code codes[T_MAX];
17108 const unsigned int num_vars;
17109 unsigned int base_fcode;
17110 } neon_builtin_datum;
17112 #define CF(N,X) CODE_FOR_neon_##N##X
17114 #define VAR1(T, N, A) \
17115 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17116 #define VAR2(T, N, A, B) \
17117 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17118 #define VAR3(T, N, A, B, C) \
17119 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17120 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17121 #define VAR4(T, N, A, B, C, D) \
17122 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17123 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17124 #define VAR5(T, N, A, B, C, D, E) \
17125 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17126 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17127 #define VAR6(T, N, A, B, C, D, E, F) \
17128 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17129 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17130 #define VAR7(T, N, A, B, C, D, E, F, G) \
17131 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17132 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17133 CF (N, G) }, 7, 0
17134 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17135 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17136 | UP (H), \
17137 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17138 CF (N, G), CF (N, H) }, 8, 0
17139 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17140 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17141 | UP (H) | UP (I), \
17142 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17143 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17144 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17145 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17146 | UP (H) | UP (I) | UP (J), \
17147 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17148 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17150 /* The mode entries in the following table correspond to the "key" type of the
17151 instruction variant, i.e. equivalent to that which would be specified after
17152 the assembler mnemonic, which usually refers to the last vector operand.
17153 (Signed/unsigned/polynomial types are not differentiated between though, and
17154 are all mapped onto the same mode for a given element size.) The modes
17155 listed per instruction should be the same as those defined for that
17156 instruction's pattern in neon.md.
17157 WARNING: Variants should be listed in the same increasing order as
17158 neon_builtin_type_bits. */
17160 static neon_builtin_datum neon_builtin_data[] =
17162 { VAR10 (BINOP, vadd,
17163 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17164 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17165 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17166 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17167 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17168 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17169 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17170 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17171 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17172 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17173 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17174 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17175 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17176 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17177 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17178 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17179 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17180 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17181 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17182 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17183 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17184 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17185 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17186 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17187 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17188 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17189 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17190 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17191 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17192 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17193 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17194 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17195 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17196 { VAR10 (BINOP, vsub,
17197 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17198 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17199 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17200 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17201 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17202 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17203 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17204 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17205 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17206 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17207 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17208 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17209 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17210 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17211 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17212 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17213 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17214 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17215 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17216 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17217 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17218 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17219 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17220 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17221 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17222 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17223 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17224 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17225 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17226 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17227 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17228 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17229 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17230 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17231 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17232 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17233 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17234 /* FIXME: vget_lane supports more variants than this! */
17235 { VAR10 (GETLANE, vget_lane,
17236 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17237 { VAR10 (SETLANE, vset_lane,
17238 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17239 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17240 { VAR10 (DUP, vdup_n,
17241 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17242 { VAR10 (DUPLANE, vdup_lane,
17243 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17244 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17245 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17246 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17247 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17248 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17249 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17250 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17251 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17252 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17253 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17254 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17255 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17256 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17257 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17258 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17259 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17260 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17261 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17262 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17263 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17264 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17265 { VAR10 (BINOP, vext,
17266 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17267 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17268 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17269 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17270 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17271 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17272 { VAR10 (SELECT, vbsl,
17273 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17274 { VAR1 (VTBL, vtbl1, v8qi) },
17275 { VAR1 (VTBL, vtbl2, v8qi) },
17276 { VAR1 (VTBL, vtbl3, v8qi) },
17277 { VAR1 (VTBL, vtbl4, v8qi) },
17278 { VAR1 (VTBX, vtbx1, v8qi) },
17279 { VAR1 (VTBX, vtbx2, v8qi) },
17280 { VAR1 (VTBX, vtbx3, v8qi) },
17281 { VAR1 (VTBX, vtbx4, v8qi) },
17282 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17283 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17284 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17285 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17286 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17287 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17288 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17289 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17290 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17291 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17292 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17293 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17294 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17295 { VAR10 (LOAD1, vld1,
17296 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17297 { VAR10 (LOAD1LANE, vld1_lane,
17298 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17299 { VAR10 (LOAD1, vld1_dup,
17300 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17301 { VAR10 (STORE1, vst1,
17302 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17303 { VAR10 (STORE1LANE, vst1_lane,
17304 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17305 { VAR9 (LOADSTRUCT,
17306 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17307 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17308 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17309 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17310 { VAR9 (STORESTRUCT, vst2,
17311 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17312 { VAR7 (STORESTRUCTLANE, vst2_lane,
17313 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17314 { VAR9 (LOADSTRUCT,
17315 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17316 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17317 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17318 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17319 { VAR9 (STORESTRUCT, vst3,
17320 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17321 { VAR7 (STORESTRUCTLANE, vst3_lane,
17322 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17323 { VAR9 (LOADSTRUCT, vld4,
17324 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17325 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17326 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17327 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17328 { VAR9 (STORESTRUCT, vst4,
17329 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17330 { VAR7 (STORESTRUCTLANE, vst4_lane,
17331 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17332 { VAR10 (LOGICBINOP, vand,
17333 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17334 { VAR10 (LOGICBINOP, vorr,
17335 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17336 { VAR10 (BINOP, veor,
17337 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17338 { VAR10 (LOGICBINOP, vbic,
17339 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17340 { VAR10 (LOGICBINOP, vorn,
17341 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17344 #undef CF
17345 #undef VAR1
17346 #undef VAR2
17347 #undef VAR3
17348 #undef VAR4
17349 #undef VAR5
17350 #undef VAR6
17351 #undef VAR7
17352 #undef VAR8
17353 #undef VAR9
17354 #undef VAR10
17356 static void
17357 arm_init_neon_builtins (void)
17359 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17361 tree neon_intQI_type_node;
17362 tree neon_intHI_type_node;
17363 tree neon_polyQI_type_node;
17364 tree neon_polyHI_type_node;
17365 tree neon_intSI_type_node;
17366 tree neon_intDI_type_node;
17367 tree neon_float_type_node;
17369 tree intQI_pointer_node;
17370 tree intHI_pointer_node;
17371 tree intSI_pointer_node;
17372 tree intDI_pointer_node;
17373 tree float_pointer_node;
17375 tree const_intQI_node;
17376 tree const_intHI_node;
17377 tree const_intSI_node;
17378 tree const_intDI_node;
17379 tree const_float_node;
17381 tree const_intQI_pointer_node;
17382 tree const_intHI_pointer_node;
17383 tree const_intSI_pointer_node;
17384 tree const_intDI_pointer_node;
17385 tree const_float_pointer_node;
17387 tree V8QI_type_node;
17388 tree V4HI_type_node;
17389 tree V2SI_type_node;
17390 tree V2SF_type_node;
17391 tree V16QI_type_node;
17392 tree V8HI_type_node;
17393 tree V4SI_type_node;
17394 tree V4SF_type_node;
17395 tree V2DI_type_node;
17397 tree intUQI_type_node;
17398 tree intUHI_type_node;
17399 tree intUSI_type_node;
17400 tree intUDI_type_node;
17402 tree intEI_type_node;
17403 tree intOI_type_node;
17404 tree intCI_type_node;
17405 tree intXI_type_node;
17407 tree V8QI_pointer_node;
17408 tree V4HI_pointer_node;
17409 tree V2SI_pointer_node;
17410 tree V2SF_pointer_node;
17411 tree V16QI_pointer_node;
17412 tree V8HI_pointer_node;
17413 tree V4SI_pointer_node;
17414 tree V4SF_pointer_node;
17415 tree V2DI_pointer_node;
17417 tree void_ftype_pv8qi_v8qi_v8qi;
17418 tree void_ftype_pv4hi_v4hi_v4hi;
17419 tree void_ftype_pv2si_v2si_v2si;
17420 tree void_ftype_pv2sf_v2sf_v2sf;
17421 tree void_ftype_pdi_di_di;
17422 tree void_ftype_pv16qi_v16qi_v16qi;
17423 tree void_ftype_pv8hi_v8hi_v8hi;
17424 tree void_ftype_pv4si_v4si_v4si;
17425 tree void_ftype_pv4sf_v4sf_v4sf;
17426 tree void_ftype_pv2di_v2di_v2di;
17428 tree reinterp_ftype_dreg[5][5];
17429 tree reinterp_ftype_qreg[5][5];
17430 tree dreg_types[5], qreg_types[5];
17432 /* Create distinguished type nodes for NEON vector element types,
17433 and pointers to values of such types, so we can detect them later. */
17434 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17435 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17436 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17437 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17438 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17439 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17440 neon_float_type_node = make_node (REAL_TYPE);
17441 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17442 layout_type (neon_float_type_node);
17444 /* Define typedefs which exactly correspond to the modes we are basing vector
17445 types on. If you change these names you'll need to change
17446 the table used by arm_mangle_type too. */
17447 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17448 "__builtin_neon_qi");
17449 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17450 "__builtin_neon_hi");
17451 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17452 "__builtin_neon_si");
17453 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17454 "__builtin_neon_sf");
17455 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17456 "__builtin_neon_di");
17457 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17458 "__builtin_neon_poly8");
17459 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17460 "__builtin_neon_poly16");
17462 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17463 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17464 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17465 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17466 float_pointer_node = build_pointer_type (neon_float_type_node);
17468 /* Next create constant-qualified versions of the above types. */
17469 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17470 TYPE_QUAL_CONST);
17471 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17472 TYPE_QUAL_CONST);
17473 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17474 TYPE_QUAL_CONST);
17475 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17476 TYPE_QUAL_CONST);
17477 const_float_node = build_qualified_type (neon_float_type_node,
17478 TYPE_QUAL_CONST);
17480 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17481 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17482 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17483 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17484 const_float_pointer_node = build_pointer_type (const_float_node);
17486 /* Now create vector types based on our NEON element types. */
17487 /* 64-bit vectors. */
17488 V8QI_type_node =
17489 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17490 V4HI_type_node =
17491 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17492 V2SI_type_node =
17493 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17494 V2SF_type_node =
17495 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17496 /* 128-bit vectors. */
17497 V16QI_type_node =
17498 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17499 V8HI_type_node =
17500 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17501 V4SI_type_node =
17502 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17503 V4SF_type_node =
17504 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17505 V2DI_type_node =
17506 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17508 /* Unsigned integer types for various mode sizes. */
17509 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17510 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17511 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17512 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17514 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17515 "__builtin_neon_uqi");
17516 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17517 "__builtin_neon_uhi");
17518 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17519 "__builtin_neon_usi");
17520 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17521 "__builtin_neon_udi");
17523 /* Opaque integer types for structures of vectors. */
17524 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17525 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17526 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17527 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17529 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17530 "__builtin_neon_ti");
17531 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17532 "__builtin_neon_ei");
17533 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17534 "__builtin_neon_oi");
17535 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17536 "__builtin_neon_ci");
17537 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17538 "__builtin_neon_xi");
17540 /* Pointers to vector types. */
17541 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17542 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17543 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17544 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17545 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17546 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17547 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17548 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17549 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17551 /* Operations which return results as pairs. */
17552 void_ftype_pv8qi_v8qi_v8qi =
17553 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17554 V8QI_type_node, NULL);
17555 void_ftype_pv4hi_v4hi_v4hi =
17556 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17557 V4HI_type_node, NULL);
17558 void_ftype_pv2si_v2si_v2si =
17559 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17560 V2SI_type_node, NULL);
17561 void_ftype_pv2sf_v2sf_v2sf =
17562 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17563 V2SF_type_node, NULL);
17564 void_ftype_pdi_di_di =
17565 build_function_type_list (void_type_node, intDI_pointer_node,
17566 neon_intDI_type_node, neon_intDI_type_node, NULL);
17567 void_ftype_pv16qi_v16qi_v16qi =
17568 build_function_type_list (void_type_node, V16QI_pointer_node,
17569 V16QI_type_node, V16QI_type_node, NULL);
17570 void_ftype_pv8hi_v8hi_v8hi =
17571 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17572 V8HI_type_node, NULL);
17573 void_ftype_pv4si_v4si_v4si =
17574 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17575 V4SI_type_node, NULL);
17576 void_ftype_pv4sf_v4sf_v4sf =
17577 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17578 V4SF_type_node, NULL);
17579 void_ftype_pv2di_v2di_v2di =
17580 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17581 V2DI_type_node, NULL);
17583 dreg_types[0] = V8QI_type_node;
17584 dreg_types[1] = V4HI_type_node;
17585 dreg_types[2] = V2SI_type_node;
17586 dreg_types[3] = V2SF_type_node;
17587 dreg_types[4] = neon_intDI_type_node;
17589 qreg_types[0] = V16QI_type_node;
17590 qreg_types[1] = V8HI_type_node;
17591 qreg_types[2] = V4SI_type_node;
17592 qreg_types[3] = V4SF_type_node;
17593 qreg_types[4] = V2DI_type_node;
17595 for (i = 0; i < 5; i++)
17597 int j;
17598 for (j = 0; j < 5; j++)
17600 reinterp_ftype_dreg[i][j]
17601 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17602 reinterp_ftype_qreg[i][j]
17603 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17607 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17609 neon_builtin_datum *d = &neon_builtin_data[i];
17610 unsigned int j, codeidx = 0;
17612 d->base_fcode = fcode;
17614 for (j = 0; j < T_MAX; j++)
17616 const char* const modenames[] = {
17617 "v8qi", "v4hi", "v2si", "v2sf", "di",
17618 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17620 char namebuf[60];
17621 tree ftype = NULL;
17622 enum insn_code icode;
17623 int is_load = 0, is_store = 0;
17625 if ((d->bits & (1 << j)) == 0)
17626 continue;
17628 icode = d->codes[codeidx++];
17630 switch (d->itype)
17632 case NEON_LOAD1:
17633 case NEON_LOAD1LANE:
17634 case NEON_LOADSTRUCT:
17635 case NEON_LOADSTRUCTLANE:
17636 is_load = 1;
17637 /* Fall through. */
17638 case NEON_STORE1:
17639 case NEON_STORE1LANE:
17640 case NEON_STORESTRUCT:
17641 case NEON_STORESTRUCTLANE:
17642 if (!is_load)
17643 is_store = 1;
17644 /* Fall through. */
17645 case NEON_UNOP:
17646 case NEON_BINOP:
17647 case NEON_LOGICBINOP:
17648 case NEON_SHIFTINSERT:
17649 case NEON_TERNOP:
17650 case NEON_GETLANE:
17651 case NEON_SETLANE:
17652 case NEON_CREATE:
17653 case NEON_DUP:
17654 case NEON_DUPLANE:
17655 case NEON_SHIFTIMM:
17656 case NEON_SHIFTACC:
17657 case NEON_COMBINE:
17658 case NEON_SPLIT:
17659 case NEON_CONVERT:
17660 case NEON_FIXCONV:
17661 case NEON_LANEMUL:
17662 case NEON_LANEMULL:
17663 case NEON_LANEMULH:
17664 case NEON_LANEMAC:
17665 case NEON_SCALARMUL:
17666 case NEON_SCALARMULL:
17667 case NEON_SCALARMULH:
17668 case NEON_SCALARMAC:
17669 case NEON_SELECT:
17670 case NEON_VTBL:
17671 case NEON_VTBX:
17673 int k;
17674 tree return_type = void_type_node, args = void_list_node;
17676 /* Build a function type directly from the insn_data for this
17677 builtin. The build_function_type() function takes care of
17678 removing duplicates for us. */
17679 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17681 tree eltype;
17683 if (is_load && k == 1)
17685 /* Neon load patterns always have the memory operand
17686 (a SImode pointer) in the operand 1 position. We
17687 want a const pointer to the element type in that
17688 position. */
17689 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17691 switch (1 << j)
17693 case T_V8QI:
17694 case T_V16QI:
17695 eltype = const_intQI_pointer_node;
17696 break;
17698 case T_V4HI:
17699 case T_V8HI:
17700 eltype = const_intHI_pointer_node;
17701 break;
17703 case T_V2SI:
17704 case T_V4SI:
17705 eltype = const_intSI_pointer_node;
17706 break;
17708 case T_V2SF:
17709 case T_V4SF:
17710 eltype = const_float_pointer_node;
17711 break;
17713 case T_DI:
17714 case T_V2DI:
17715 eltype = const_intDI_pointer_node;
17716 break;
17718 default: gcc_unreachable ();
17721 else if (is_store && k == 0)
17723 /* Similarly, Neon store patterns use operand 0 as
17724 the memory location to store to (a SImode pointer).
17725 Use a pointer to the element type of the store in
17726 that position. */
17727 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17729 switch (1 << j)
17731 case T_V8QI:
17732 case T_V16QI:
17733 eltype = intQI_pointer_node;
17734 break;
17736 case T_V4HI:
17737 case T_V8HI:
17738 eltype = intHI_pointer_node;
17739 break;
17741 case T_V2SI:
17742 case T_V4SI:
17743 eltype = intSI_pointer_node;
17744 break;
17746 case T_V2SF:
17747 case T_V4SF:
17748 eltype = float_pointer_node;
17749 break;
17751 case T_DI:
17752 case T_V2DI:
17753 eltype = intDI_pointer_node;
17754 break;
17756 default: gcc_unreachable ();
17759 else
17761 switch (insn_data[icode].operand[k].mode)
17763 case VOIDmode: eltype = void_type_node; break;
17764 /* Scalars. */
17765 case QImode: eltype = neon_intQI_type_node; break;
17766 case HImode: eltype = neon_intHI_type_node; break;
17767 case SImode: eltype = neon_intSI_type_node; break;
17768 case SFmode: eltype = neon_float_type_node; break;
17769 case DImode: eltype = neon_intDI_type_node; break;
17770 case TImode: eltype = intTI_type_node; break;
17771 case EImode: eltype = intEI_type_node; break;
17772 case OImode: eltype = intOI_type_node; break;
17773 case CImode: eltype = intCI_type_node; break;
17774 case XImode: eltype = intXI_type_node; break;
17775 /* 64-bit vectors. */
17776 case V8QImode: eltype = V8QI_type_node; break;
17777 case V4HImode: eltype = V4HI_type_node; break;
17778 case V2SImode: eltype = V2SI_type_node; break;
17779 case V2SFmode: eltype = V2SF_type_node; break;
17780 /* 128-bit vectors. */
17781 case V16QImode: eltype = V16QI_type_node; break;
17782 case V8HImode: eltype = V8HI_type_node; break;
17783 case V4SImode: eltype = V4SI_type_node; break;
17784 case V4SFmode: eltype = V4SF_type_node; break;
17785 case V2DImode: eltype = V2DI_type_node; break;
17786 default: gcc_unreachable ();
17790 if (k == 0 && !is_store)
17791 return_type = eltype;
17792 else
17793 args = tree_cons (NULL_TREE, eltype, args);
17796 ftype = build_function_type (return_type, args);
17798 break;
17800 case NEON_RESULTPAIR:
17802 switch (insn_data[icode].operand[1].mode)
17804 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17805 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17806 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17807 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17808 case DImode: ftype = void_ftype_pdi_di_di; break;
17809 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17810 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17811 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17812 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17813 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17814 default: gcc_unreachable ();
17817 break;
17819 case NEON_REINTERP:
17821 /* We iterate over 5 doubleword types, then 5 quadword
17822 types. */
17823 int rhs = j % 5;
17824 switch (insn_data[icode].operand[0].mode)
17826 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17827 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17828 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17829 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17830 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17831 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17832 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17833 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17834 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17835 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17836 default: gcc_unreachable ();
17839 break;
17841 default:
17842 gcc_unreachable ();
17845 gcc_assert (ftype != NULL);
17847 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17849 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17850 NULL_TREE);
17855 static void
17856 arm_init_fp16_builtins (void)
17858 tree fp16_type = make_node (REAL_TYPE);
17859 TYPE_PRECISION (fp16_type) = 16;
17860 layout_type (fp16_type);
17861 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17864 static void
17865 arm_init_builtins (void)
17867 arm_init_tls_builtins ();
17869 if (TARGET_REALLY_IWMMXT)
17870 arm_init_iwmmxt_builtins ();
17872 if (TARGET_NEON)
17873 arm_init_neon_builtins ();
17875 if (arm_fp16_format)
17876 arm_init_fp16_builtins ();
17879 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17881 static const char *
17882 arm_invalid_parameter_type (const_tree t)
17884 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17885 return N_("function parameters cannot have __fp16 type");
17886 return NULL;
17889 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17891 static const char *
17892 arm_invalid_return_type (const_tree t)
17894 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17895 return N_("functions cannot return __fp16 type");
17896 return NULL;
17899 /* Implement TARGET_PROMOTED_TYPE. */
17901 static tree
17902 arm_promoted_type (const_tree t)
17904 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17905 return float_type_node;
17906 return NULL_TREE;
17909 /* Implement TARGET_CONVERT_TO_TYPE.
17910 Specifically, this hook implements the peculiarity of the ARM
17911 half-precision floating-point C semantics that requires conversions between
17912 __fp16 to or from double to do an intermediate conversion to float. */
17914 static tree
17915 arm_convert_to_type (tree type, tree expr)
17917 tree fromtype = TREE_TYPE (expr);
17918 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17919 return NULL_TREE;
17920 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17921 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17922 return convert (type, convert (float_type_node, expr));
17923 return NULL_TREE;
17926 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17927 This simply adds HFmode as a supported mode; even though we don't
17928 implement arithmetic on this type directly, it's supported by
17929 optabs conversions, much the way the double-word arithmetic is
17930 special-cased in the default hook. */
17932 static bool
17933 arm_scalar_mode_supported_p (enum machine_mode mode)
17935 if (mode == HFmode)
17936 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17937 else
17938 return default_scalar_mode_supported_p (mode);
17941 /* Errors in the source file can cause expand_expr to return const0_rtx
17942 where we expect a vector. To avoid crashing, use one of the vector
17943 clear instructions. */
17945 static rtx
17946 safe_vector_operand (rtx x, enum machine_mode mode)
17948 if (x != const0_rtx)
17949 return x;
17950 x = gen_reg_rtx (mode);
17952 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17953 : gen_rtx_SUBREG (DImode, x, 0)));
17954 return x;
17957 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17959 static rtx
17960 arm_expand_binop_builtin (enum insn_code icode,
17961 tree exp, rtx target)
17963 rtx pat;
17964 tree arg0 = CALL_EXPR_ARG (exp, 0);
17965 tree arg1 = CALL_EXPR_ARG (exp, 1);
17966 rtx op0 = expand_normal (arg0);
17967 rtx op1 = expand_normal (arg1);
17968 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17969 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17970 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17972 if (VECTOR_MODE_P (mode0))
17973 op0 = safe_vector_operand (op0, mode0);
17974 if (VECTOR_MODE_P (mode1))
17975 op1 = safe_vector_operand (op1, mode1);
17977 if (! target
17978 || GET_MODE (target) != tmode
17979 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17980 target = gen_reg_rtx (tmode);
17982 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
17984 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17985 op0 = copy_to_mode_reg (mode0, op0);
17986 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17987 op1 = copy_to_mode_reg (mode1, op1);
17989 pat = GEN_FCN (icode) (target, op0, op1);
17990 if (! pat)
17991 return 0;
17992 emit_insn (pat);
17993 return target;
17996 /* Subroutine of arm_expand_builtin to take care of unop insns. */
17998 static rtx
17999 arm_expand_unop_builtin (enum insn_code icode,
18000 tree exp, rtx target, int do_load)
18002 rtx pat;
18003 tree arg0 = CALL_EXPR_ARG (exp, 0);
18004 rtx op0 = expand_normal (arg0);
18005 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18006 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18008 if (! target
18009 || GET_MODE (target) != tmode
18010 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18011 target = gen_reg_rtx (tmode);
18012 if (do_load)
18013 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18014 else
18016 if (VECTOR_MODE_P (mode0))
18017 op0 = safe_vector_operand (op0, mode0);
18019 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18020 op0 = copy_to_mode_reg (mode0, op0);
18023 pat = GEN_FCN (icode) (target, op0);
18024 if (! pat)
18025 return 0;
18026 emit_insn (pat);
18027 return target;
18030 static int
18031 neon_builtin_compare (const void *a, const void *b)
18033 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18034 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18035 unsigned int soughtcode = key->base_fcode;
18037 if (soughtcode >= memb->base_fcode
18038 && soughtcode < memb->base_fcode + memb->num_vars)
18039 return 0;
18040 else if (soughtcode < memb->base_fcode)
18041 return -1;
18042 else
18043 return 1;
18046 static enum insn_code
18047 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18049 neon_builtin_datum key, *found;
18050 int idx;
18052 key.base_fcode = fcode;
18053 found = (neon_builtin_datum *)
18054 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18055 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18056 gcc_assert (found);
18057 idx = fcode - (int) found->base_fcode;
18058 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18060 if (itype)
18061 *itype = found->itype;
18063 return found->codes[idx];
18066 typedef enum {
18067 NEON_ARG_COPY_TO_REG,
18068 NEON_ARG_CONSTANT,
18069 NEON_ARG_STOP
18070 } builtin_arg;
18072 #define NEON_MAX_BUILTIN_ARGS 5
18074 /* Expand a Neon builtin. */
18075 static rtx
18076 arm_expand_neon_args (rtx target, int icode, int have_retval,
18077 tree exp, ...)
18079 va_list ap;
18080 rtx pat;
18081 tree arg[NEON_MAX_BUILTIN_ARGS];
18082 rtx op[NEON_MAX_BUILTIN_ARGS];
18083 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18084 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18085 int argc = 0;
18087 if (have_retval
18088 && (!target
18089 || GET_MODE (target) != tmode
18090 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18091 target = gen_reg_rtx (tmode);
18093 va_start (ap, exp);
18095 for (;;)
18097 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18099 if (thisarg == NEON_ARG_STOP)
18100 break;
18101 else
18103 arg[argc] = CALL_EXPR_ARG (exp, argc);
18104 op[argc] = expand_normal (arg[argc]);
18105 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18107 switch (thisarg)
18109 case NEON_ARG_COPY_TO_REG:
18110 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18111 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18112 (op[argc], mode[argc]))
18113 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18114 break;
18116 case NEON_ARG_CONSTANT:
18117 /* FIXME: This error message is somewhat unhelpful. */
18118 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18119 (op[argc], mode[argc]))
18120 error ("argument must be a constant");
18121 break;
18123 case NEON_ARG_STOP:
18124 gcc_unreachable ();
18127 argc++;
18131 va_end (ap);
18133 if (have_retval)
18134 switch (argc)
18136 case 1:
18137 pat = GEN_FCN (icode) (target, op[0]);
18138 break;
18140 case 2:
18141 pat = GEN_FCN (icode) (target, op[0], op[1]);
18142 break;
18144 case 3:
18145 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18146 break;
18148 case 4:
18149 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18150 break;
18152 case 5:
18153 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18154 break;
18156 default:
18157 gcc_unreachable ();
18159 else
18160 switch (argc)
18162 case 1:
18163 pat = GEN_FCN (icode) (op[0]);
18164 break;
18166 case 2:
18167 pat = GEN_FCN (icode) (op[0], op[1]);
18168 break;
18170 case 3:
18171 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18172 break;
18174 case 4:
18175 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18176 break;
18178 case 5:
18179 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18180 break;
18182 default:
18183 gcc_unreachable ();
18186 if (!pat)
18187 return 0;
18189 emit_insn (pat);
18191 return target;
18194 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18195 constants defined per-instruction or per instruction-variant. Instead, the
18196 required info is looked up in the table neon_builtin_data. */
18197 static rtx
18198 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18200 neon_itype itype;
18201 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18203 switch (itype)
18205 case NEON_UNOP:
18206 case NEON_CONVERT:
18207 case NEON_DUPLANE:
18208 return arm_expand_neon_args (target, icode, 1, exp,
18209 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18211 case NEON_BINOP:
18212 case NEON_SETLANE:
18213 case NEON_SCALARMUL:
18214 case NEON_SCALARMULL:
18215 case NEON_SCALARMULH:
18216 case NEON_SHIFTINSERT:
18217 case NEON_LOGICBINOP:
18218 return arm_expand_neon_args (target, icode, 1, exp,
18219 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18220 NEON_ARG_STOP);
18222 case NEON_TERNOP:
18223 return arm_expand_neon_args (target, icode, 1, exp,
18224 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18225 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18227 case NEON_GETLANE:
18228 case NEON_FIXCONV:
18229 case NEON_SHIFTIMM:
18230 return arm_expand_neon_args (target, icode, 1, exp,
18231 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18232 NEON_ARG_STOP);
18234 case NEON_CREATE:
18235 return arm_expand_neon_args (target, icode, 1, exp,
18236 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18238 case NEON_DUP:
18239 case NEON_SPLIT:
18240 case NEON_REINTERP:
18241 return arm_expand_neon_args (target, icode, 1, exp,
18242 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18244 case NEON_COMBINE:
18245 case NEON_VTBL:
18246 return arm_expand_neon_args (target, icode, 1, exp,
18247 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18249 case NEON_RESULTPAIR:
18250 return arm_expand_neon_args (target, icode, 0, exp,
18251 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18252 NEON_ARG_STOP);
18254 case NEON_LANEMUL:
18255 case NEON_LANEMULL:
18256 case NEON_LANEMULH:
18257 return arm_expand_neon_args (target, icode, 1, exp,
18258 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18259 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18261 case NEON_LANEMAC:
18262 return arm_expand_neon_args (target, icode, 1, exp,
18263 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18264 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18266 case NEON_SHIFTACC:
18267 return arm_expand_neon_args (target, icode, 1, exp,
18268 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18269 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18271 case NEON_SCALARMAC:
18272 return arm_expand_neon_args (target, icode, 1, exp,
18273 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18274 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18276 case NEON_SELECT:
18277 case NEON_VTBX:
18278 return arm_expand_neon_args (target, icode, 1, exp,
18279 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18280 NEON_ARG_STOP);
18282 case NEON_LOAD1:
18283 case NEON_LOADSTRUCT:
18284 return arm_expand_neon_args (target, icode, 1, exp,
18285 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18287 case NEON_LOAD1LANE:
18288 case NEON_LOADSTRUCTLANE:
18289 return arm_expand_neon_args (target, icode, 1, exp,
18290 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18291 NEON_ARG_STOP);
18293 case NEON_STORE1:
18294 case NEON_STORESTRUCT:
18295 return arm_expand_neon_args (target, icode, 0, exp,
18296 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18298 case NEON_STORE1LANE:
18299 case NEON_STORESTRUCTLANE:
18300 return arm_expand_neon_args (target, icode, 0, exp,
18301 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18302 NEON_ARG_STOP);
18305 gcc_unreachable ();
18308 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18309 void
18310 neon_reinterpret (rtx dest, rtx src)
18312 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18315 /* Emit code to place a Neon pair result in memory locations (with equal
18316 registers). */
18317 void
18318 neon_emit_pair_result_insn (enum machine_mode mode,
18319 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18320 rtx op1, rtx op2)
18322 rtx mem = gen_rtx_MEM (mode, destaddr);
18323 rtx tmp1 = gen_reg_rtx (mode);
18324 rtx tmp2 = gen_reg_rtx (mode);
18326 emit_insn (intfn (tmp1, op1, tmp2, op2));
18328 emit_move_insn (mem, tmp1);
18329 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18330 emit_move_insn (mem, tmp2);
18333 /* Set up operands for a register copy from src to dest, taking care not to
18334 clobber registers in the process.
18335 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18336 be called with a large N, so that should be OK. */
18338 void
18339 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18341 unsigned int copied = 0, opctr = 0;
18342 unsigned int done = (1 << count) - 1;
18343 unsigned int i, j;
18345 while (copied != done)
18347 for (i = 0; i < count; i++)
18349 int good = 1;
18351 for (j = 0; good && j < count; j++)
18352 if (i != j && (copied & (1 << j)) == 0
18353 && reg_overlap_mentioned_p (src[j], dest[i]))
18354 good = 0;
18356 if (good)
18358 operands[opctr++] = dest[i];
18359 operands[opctr++] = src[i];
18360 copied |= 1 << i;
18365 gcc_assert (opctr == count * 2);
18368 /* Expand an expression EXP that calls a built-in function,
18369 with result going to TARGET if that's convenient
18370 (and in mode MODE if that's convenient).
18371 SUBTARGET may be used as the target for computing one of EXP's operands.
18372 IGNORE is nonzero if the value is to be ignored. */
18374 static rtx
18375 arm_expand_builtin (tree exp,
18376 rtx target,
18377 rtx subtarget ATTRIBUTE_UNUSED,
18378 enum machine_mode mode ATTRIBUTE_UNUSED,
18379 int ignore ATTRIBUTE_UNUSED)
18381 const struct builtin_description * d;
18382 enum insn_code icode;
18383 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18384 tree arg0;
18385 tree arg1;
18386 tree arg2;
18387 rtx op0;
18388 rtx op1;
18389 rtx op2;
18390 rtx pat;
18391 int fcode = DECL_FUNCTION_CODE (fndecl);
18392 size_t i;
18393 enum machine_mode tmode;
18394 enum machine_mode mode0;
18395 enum machine_mode mode1;
18396 enum machine_mode mode2;
18398 if (fcode >= ARM_BUILTIN_NEON_BASE)
18399 return arm_expand_neon_builtin (fcode, exp, target);
18401 switch (fcode)
18403 case ARM_BUILTIN_TEXTRMSB:
18404 case ARM_BUILTIN_TEXTRMUB:
18405 case ARM_BUILTIN_TEXTRMSH:
18406 case ARM_BUILTIN_TEXTRMUH:
18407 case ARM_BUILTIN_TEXTRMSW:
18408 case ARM_BUILTIN_TEXTRMUW:
18409 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18410 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18411 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18412 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18413 : CODE_FOR_iwmmxt_textrmw);
18415 arg0 = CALL_EXPR_ARG (exp, 0);
18416 arg1 = CALL_EXPR_ARG (exp, 1);
18417 op0 = expand_normal (arg0);
18418 op1 = expand_normal (arg1);
18419 tmode = insn_data[icode].operand[0].mode;
18420 mode0 = insn_data[icode].operand[1].mode;
18421 mode1 = insn_data[icode].operand[2].mode;
18423 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18424 op0 = copy_to_mode_reg (mode0, op0);
18425 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18427 /* @@@ better error message */
18428 error ("selector must be an immediate");
18429 return gen_reg_rtx (tmode);
18431 if (target == 0
18432 || GET_MODE (target) != tmode
18433 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18434 target = gen_reg_rtx (tmode);
18435 pat = GEN_FCN (icode) (target, op0, op1);
18436 if (! pat)
18437 return 0;
18438 emit_insn (pat);
18439 return target;
18441 case ARM_BUILTIN_TINSRB:
18442 case ARM_BUILTIN_TINSRH:
18443 case ARM_BUILTIN_TINSRW:
18444 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18445 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18446 : CODE_FOR_iwmmxt_tinsrw);
18447 arg0 = CALL_EXPR_ARG (exp, 0);
18448 arg1 = CALL_EXPR_ARG (exp, 1);
18449 arg2 = CALL_EXPR_ARG (exp, 2);
18450 op0 = expand_normal (arg0);
18451 op1 = expand_normal (arg1);
18452 op2 = expand_normal (arg2);
18453 tmode = insn_data[icode].operand[0].mode;
18454 mode0 = insn_data[icode].operand[1].mode;
18455 mode1 = insn_data[icode].operand[2].mode;
18456 mode2 = insn_data[icode].operand[3].mode;
18458 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18459 op0 = copy_to_mode_reg (mode0, op0);
18460 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18461 op1 = copy_to_mode_reg (mode1, op1);
18462 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18464 /* @@@ better error message */
18465 error ("selector must be an immediate");
18466 return const0_rtx;
18468 if (target == 0
18469 || GET_MODE (target) != tmode
18470 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18471 target = gen_reg_rtx (tmode);
18472 pat = GEN_FCN (icode) (target, op0, op1, op2);
18473 if (! pat)
18474 return 0;
18475 emit_insn (pat);
18476 return target;
18478 case ARM_BUILTIN_SETWCX:
18479 arg0 = CALL_EXPR_ARG (exp, 0);
18480 arg1 = CALL_EXPR_ARG (exp, 1);
18481 op0 = force_reg (SImode, expand_normal (arg0));
18482 op1 = expand_normal (arg1);
18483 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18484 return 0;
18486 case ARM_BUILTIN_GETWCX:
18487 arg0 = CALL_EXPR_ARG (exp, 0);
18488 op0 = expand_normal (arg0);
18489 target = gen_reg_rtx (SImode);
18490 emit_insn (gen_iwmmxt_tmrc (target, op0));
18491 return target;
18493 case ARM_BUILTIN_WSHUFH:
18494 icode = CODE_FOR_iwmmxt_wshufh;
18495 arg0 = CALL_EXPR_ARG (exp, 0);
18496 arg1 = CALL_EXPR_ARG (exp, 1);
18497 op0 = expand_normal (arg0);
18498 op1 = expand_normal (arg1);
18499 tmode = insn_data[icode].operand[0].mode;
18500 mode1 = insn_data[icode].operand[1].mode;
18501 mode2 = insn_data[icode].operand[2].mode;
18503 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18504 op0 = copy_to_mode_reg (mode1, op0);
18505 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18507 /* @@@ better error message */
18508 error ("mask must be an immediate");
18509 return const0_rtx;
18511 if (target == 0
18512 || GET_MODE (target) != tmode
18513 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18514 target = gen_reg_rtx (tmode);
18515 pat = GEN_FCN (icode) (target, op0, op1);
18516 if (! pat)
18517 return 0;
18518 emit_insn (pat);
18519 return target;
18521 case ARM_BUILTIN_WSADB:
18522 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18523 case ARM_BUILTIN_WSADH:
18524 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18525 case ARM_BUILTIN_WSADBZ:
18526 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18527 case ARM_BUILTIN_WSADHZ:
18528 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18530 /* Several three-argument builtins. */
18531 case ARM_BUILTIN_WMACS:
18532 case ARM_BUILTIN_WMACU:
18533 case ARM_BUILTIN_WALIGN:
18534 case ARM_BUILTIN_TMIA:
18535 case ARM_BUILTIN_TMIAPH:
18536 case ARM_BUILTIN_TMIATT:
18537 case ARM_BUILTIN_TMIATB:
18538 case ARM_BUILTIN_TMIABT:
18539 case ARM_BUILTIN_TMIABB:
18540 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18541 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18542 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18543 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18544 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18545 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18546 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18547 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18548 : CODE_FOR_iwmmxt_walign);
18549 arg0 = CALL_EXPR_ARG (exp, 0);
18550 arg1 = CALL_EXPR_ARG (exp, 1);
18551 arg2 = CALL_EXPR_ARG (exp, 2);
18552 op0 = expand_normal (arg0);
18553 op1 = expand_normal (arg1);
18554 op2 = expand_normal (arg2);
18555 tmode = insn_data[icode].operand[0].mode;
18556 mode0 = insn_data[icode].operand[1].mode;
18557 mode1 = insn_data[icode].operand[2].mode;
18558 mode2 = insn_data[icode].operand[3].mode;
18560 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18561 op0 = copy_to_mode_reg (mode0, op0);
18562 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18563 op1 = copy_to_mode_reg (mode1, op1);
18564 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18565 op2 = copy_to_mode_reg (mode2, op2);
18566 if (target == 0
18567 || GET_MODE (target) != tmode
18568 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18569 target = gen_reg_rtx (tmode);
18570 pat = GEN_FCN (icode) (target, op0, op1, op2);
18571 if (! pat)
18572 return 0;
18573 emit_insn (pat);
18574 return target;
18576 case ARM_BUILTIN_WZERO:
18577 target = gen_reg_rtx (DImode);
18578 emit_insn (gen_iwmmxt_clrdi (target));
18579 return target;
18581 case ARM_BUILTIN_THREAD_POINTER:
18582 return arm_load_tp (target);
18584 default:
18585 break;
18588 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18589 if (d->code == (const enum arm_builtins) fcode)
18590 return arm_expand_binop_builtin (d->icode, exp, target);
18592 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18593 if (d->code == (const enum arm_builtins) fcode)
18594 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18596 /* @@@ Should really do something sensible here. */
18597 return NULL_RTX;
18600 /* Return the number (counting from 0) of
18601 the least significant set bit in MASK. */
18603 inline static int
18604 number_of_first_bit_set (unsigned mask)
18606 int bit;
18608 for (bit = 0;
18609 (mask & (1 << bit)) == 0;
18610 ++bit)
18611 continue;
18613 return bit;
18616 /* Emit code to push or pop registers to or from the stack. F is the
18617 assembly file. MASK is the registers to push or pop. PUSH is
18618 nonzero if we should push, and zero if we should pop. For debugging
18619 output, if pushing, adjust CFA_OFFSET by the amount of space added
18620 to the stack. REAL_REGS should have the same number of bits set as
18621 MASK, and will be used instead (in the same order) to describe which
18622 registers were saved - this is used to mark the save slots when we
18623 push high registers after moving them to low registers. */
18624 static void
18625 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18626 unsigned long real_regs)
18628 int regno;
18629 int lo_mask = mask & 0xFF;
18630 int pushed_words = 0;
18632 gcc_assert (mask);
18634 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18636 /* Special case. Do not generate a POP PC statement here, do it in
18637 thumb_exit() */
18638 thumb_exit (f, -1);
18639 return;
18642 if (ARM_EABI_UNWIND_TABLES && push)
18644 fprintf (f, "\t.save\t{");
18645 for (regno = 0; regno < 15; regno++)
18647 if (real_regs & (1 << regno))
18649 if (real_regs & ((1 << regno) -1))
18650 fprintf (f, ", ");
18651 asm_fprintf (f, "%r", regno);
18654 fprintf (f, "}\n");
18657 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18659 /* Look at the low registers first. */
18660 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18662 if (lo_mask & 1)
18664 asm_fprintf (f, "%r", regno);
18666 if ((lo_mask & ~1) != 0)
18667 fprintf (f, ", ");
18669 pushed_words++;
18673 if (push && (mask & (1 << LR_REGNUM)))
18675 /* Catch pushing the LR. */
18676 if (mask & 0xFF)
18677 fprintf (f, ", ");
18679 asm_fprintf (f, "%r", LR_REGNUM);
18681 pushed_words++;
18683 else if (!push && (mask & (1 << PC_REGNUM)))
18685 /* Catch popping the PC. */
18686 if (TARGET_INTERWORK || TARGET_BACKTRACE
18687 || crtl->calls_eh_return)
18689 /* The PC is never poped directly, instead
18690 it is popped into r3 and then BX is used. */
18691 fprintf (f, "}\n");
18693 thumb_exit (f, -1);
18695 return;
18697 else
18699 if (mask & 0xFF)
18700 fprintf (f, ", ");
18702 asm_fprintf (f, "%r", PC_REGNUM);
18706 fprintf (f, "}\n");
18708 if (push && pushed_words && dwarf2out_do_frame ())
18710 char *l = dwarf2out_cfi_label (false);
18711 int pushed_mask = real_regs;
18713 *cfa_offset += pushed_words * 4;
18714 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18716 pushed_words = 0;
18717 pushed_mask = real_regs;
18718 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18720 if (pushed_mask & 1)
18721 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18726 /* Generate code to return from a thumb function.
18727 If 'reg_containing_return_addr' is -1, then the return address is
18728 actually on the stack, at the stack pointer. */
18729 static void
18730 thumb_exit (FILE *f, int reg_containing_return_addr)
18732 unsigned regs_available_for_popping;
18733 unsigned regs_to_pop;
18734 int pops_needed;
18735 unsigned available;
18736 unsigned required;
18737 int mode;
18738 int size;
18739 int restore_a4 = FALSE;
18741 /* Compute the registers we need to pop. */
18742 regs_to_pop = 0;
18743 pops_needed = 0;
18745 if (reg_containing_return_addr == -1)
18747 regs_to_pop |= 1 << LR_REGNUM;
18748 ++pops_needed;
18751 if (TARGET_BACKTRACE)
18753 /* Restore the (ARM) frame pointer and stack pointer. */
18754 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18755 pops_needed += 2;
18758 /* If there is nothing to pop then just emit the BX instruction and
18759 return. */
18760 if (pops_needed == 0)
18762 if (crtl->calls_eh_return)
18763 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18765 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18766 return;
18768 /* Otherwise if we are not supporting interworking and we have not created
18769 a backtrace structure and the function was not entered in ARM mode then
18770 just pop the return address straight into the PC. */
18771 else if (!TARGET_INTERWORK
18772 && !TARGET_BACKTRACE
18773 && !is_called_in_ARM_mode (current_function_decl)
18774 && !crtl->calls_eh_return)
18776 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18777 return;
18780 /* Find out how many of the (return) argument registers we can corrupt. */
18781 regs_available_for_popping = 0;
18783 /* If returning via __builtin_eh_return, the bottom three registers
18784 all contain information needed for the return. */
18785 if (crtl->calls_eh_return)
18786 size = 12;
18787 else
18789 /* If we can deduce the registers used from the function's
18790 return value. This is more reliable that examining
18791 df_regs_ever_live_p () because that will be set if the register is
18792 ever used in the function, not just if the register is used
18793 to hold a return value. */
18795 if (crtl->return_rtx != 0)
18796 mode = GET_MODE (crtl->return_rtx);
18797 else
18798 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18800 size = GET_MODE_SIZE (mode);
18802 if (size == 0)
18804 /* In a void function we can use any argument register.
18805 In a function that returns a structure on the stack
18806 we can use the second and third argument registers. */
18807 if (mode == VOIDmode)
18808 regs_available_for_popping =
18809 (1 << ARG_REGISTER (1))
18810 | (1 << ARG_REGISTER (2))
18811 | (1 << ARG_REGISTER (3));
18812 else
18813 regs_available_for_popping =
18814 (1 << ARG_REGISTER (2))
18815 | (1 << ARG_REGISTER (3));
18817 else if (size <= 4)
18818 regs_available_for_popping =
18819 (1 << ARG_REGISTER (2))
18820 | (1 << ARG_REGISTER (3));
18821 else if (size <= 8)
18822 regs_available_for_popping =
18823 (1 << ARG_REGISTER (3));
18826 /* Match registers to be popped with registers into which we pop them. */
18827 for (available = regs_available_for_popping,
18828 required = regs_to_pop;
18829 required != 0 && available != 0;
18830 available &= ~(available & - available),
18831 required &= ~(required & - required))
18832 -- pops_needed;
18834 /* If we have any popping registers left over, remove them. */
18835 if (available > 0)
18836 regs_available_for_popping &= ~available;
18838 /* Otherwise if we need another popping register we can use
18839 the fourth argument register. */
18840 else if (pops_needed)
18842 /* If we have not found any free argument registers and
18843 reg a4 contains the return address, we must move it. */
18844 if (regs_available_for_popping == 0
18845 && reg_containing_return_addr == LAST_ARG_REGNUM)
18847 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18848 reg_containing_return_addr = LR_REGNUM;
18850 else if (size > 12)
18852 /* Register a4 is being used to hold part of the return value,
18853 but we have dire need of a free, low register. */
18854 restore_a4 = TRUE;
18856 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18859 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18861 /* The fourth argument register is available. */
18862 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18864 --pops_needed;
18868 /* Pop as many registers as we can. */
18869 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18870 regs_available_for_popping);
18872 /* Process the registers we popped. */
18873 if (reg_containing_return_addr == -1)
18875 /* The return address was popped into the lowest numbered register. */
18876 regs_to_pop &= ~(1 << LR_REGNUM);
18878 reg_containing_return_addr =
18879 number_of_first_bit_set (regs_available_for_popping);
18881 /* Remove this register for the mask of available registers, so that
18882 the return address will not be corrupted by further pops. */
18883 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18886 /* If we popped other registers then handle them here. */
18887 if (regs_available_for_popping)
18889 int frame_pointer;
18891 /* Work out which register currently contains the frame pointer. */
18892 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18894 /* Move it into the correct place. */
18895 asm_fprintf (f, "\tmov\t%r, %r\n",
18896 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18898 /* (Temporarily) remove it from the mask of popped registers. */
18899 regs_available_for_popping &= ~(1 << frame_pointer);
18900 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18902 if (regs_available_for_popping)
18904 int stack_pointer;
18906 /* We popped the stack pointer as well,
18907 find the register that contains it. */
18908 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18910 /* Move it into the stack register. */
18911 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18913 /* At this point we have popped all necessary registers, so
18914 do not worry about restoring regs_available_for_popping
18915 to its correct value:
18917 assert (pops_needed == 0)
18918 assert (regs_available_for_popping == (1 << frame_pointer))
18919 assert (regs_to_pop == (1 << STACK_POINTER)) */
18921 else
18923 /* Since we have just move the popped value into the frame
18924 pointer, the popping register is available for reuse, and
18925 we know that we still have the stack pointer left to pop. */
18926 regs_available_for_popping |= (1 << frame_pointer);
18930 /* If we still have registers left on the stack, but we no longer have
18931 any registers into which we can pop them, then we must move the return
18932 address into the link register and make available the register that
18933 contained it. */
18934 if (regs_available_for_popping == 0 && pops_needed > 0)
18936 regs_available_for_popping |= 1 << reg_containing_return_addr;
18938 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18939 reg_containing_return_addr);
18941 reg_containing_return_addr = LR_REGNUM;
18944 /* If we have registers left on the stack then pop some more.
18945 We know that at most we will want to pop FP and SP. */
18946 if (pops_needed > 0)
18948 int popped_into;
18949 int move_to;
18951 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18952 regs_available_for_popping);
18954 /* We have popped either FP or SP.
18955 Move whichever one it is into the correct register. */
18956 popped_into = number_of_first_bit_set (regs_available_for_popping);
18957 move_to = number_of_first_bit_set (regs_to_pop);
18959 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18961 regs_to_pop &= ~(1 << move_to);
18963 --pops_needed;
18966 /* If we still have not popped everything then we must have only
18967 had one register available to us and we are now popping the SP. */
18968 if (pops_needed > 0)
18970 int popped_into;
18972 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18973 regs_available_for_popping);
18975 popped_into = number_of_first_bit_set (regs_available_for_popping);
18977 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
18979 assert (regs_to_pop == (1 << STACK_POINTER))
18980 assert (pops_needed == 1)
18984 /* If necessary restore the a4 register. */
18985 if (restore_a4)
18987 if (reg_containing_return_addr != LR_REGNUM)
18989 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18990 reg_containing_return_addr = LR_REGNUM;
18993 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
18996 if (crtl->calls_eh_return)
18997 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18999 /* Return to caller. */
19000 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19004 void
19005 thumb1_final_prescan_insn (rtx insn)
19007 if (flag_print_asm_name)
19008 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19009 INSN_ADDRESSES (INSN_UID (insn)));
19013 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19015 unsigned HOST_WIDE_INT mask = 0xff;
19016 int i;
19018 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19019 if (val == 0) /* XXX */
19020 return 0;
19022 for (i = 0; i < 25; i++)
19023 if ((val & (mask << i)) == val)
19024 return 1;
19026 return 0;
19029 /* Returns nonzero if the current function contains,
19030 or might contain a far jump. */
19031 static int
19032 thumb_far_jump_used_p (void)
19034 rtx insn;
19036 /* This test is only important for leaf functions. */
19037 /* assert (!leaf_function_p ()); */
19039 /* If we have already decided that far jumps may be used,
19040 do not bother checking again, and always return true even if
19041 it turns out that they are not being used. Once we have made
19042 the decision that far jumps are present (and that hence the link
19043 register will be pushed onto the stack) we cannot go back on it. */
19044 if (cfun->machine->far_jump_used)
19045 return 1;
19047 /* If this function is not being called from the prologue/epilogue
19048 generation code then it must be being called from the
19049 INITIAL_ELIMINATION_OFFSET macro. */
19050 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19052 /* In this case we know that we are being asked about the elimination
19053 of the arg pointer register. If that register is not being used,
19054 then there are no arguments on the stack, and we do not have to
19055 worry that a far jump might force the prologue to push the link
19056 register, changing the stack offsets. In this case we can just
19057 return false, since the presence of far jumps in the function will
19058 not affect stack offsets.
19060 If the arg pointer is live (or if it was live, but has now been
19061 eliminated and so set to dead) then we do have to test to see if
19062 the function might contain a far jump. This test can lead to some
19063 false negatives, since before reload is completed, then length of
19064 branch instructions is not known, so gcc defaults to returning their
19065 longest length, which in turn sets the far jump attribute to true.
19067 A false negative will not result in bad code being generated, but it
19068 will result in a needless push and pop of the link register. We
19069 hope that this does not occur too often.
19071 If we need doubleword stack alignment this could affect the other
19072 elimination offsets so we can't risk getting it wrong. */
19073 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19074 cfun->machine->arg_pointer_live = 1;
19075 else if (!cfun->machine->arg_pointer_live)
19076 return 0;
19079 /* Check to see if the function contains a branch
19080 insn with the far jump attribute set. */
19081 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19083 if (GET_CODE (insn) == JUMP_INSN
19084 /* Ignore tablejump patterns. */
19085 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19086 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19087 && get_attr_far_jump (insn) == FAR_JUMP_YES
19090 /* Record the fact that we have decided that
19091 the function does use far jumps. */
19092 cfun->machine->far_jump_used = 1;
19093 return 1;
19097 return 0;
19100 /* Return nonzero if FUNC must be entered in ARM mode. */
19102 is_called_in_ARM_mode (tree func)
19104 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19106 /* Ignore the problem about functions whose address is taken. */
19107 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19108 return TRUE;
19110 #ifdef ARM_PE
19111 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19112 #else
19113 return FALSE;
19114 #endif
19117 /* The bits which aren't usefully expanded as rtl. */
19118 const char *
19119 thumb_unexpanded_epilogue (void)
19121 arm_stack_offsets *offsets;
19122 int regno;
19123 unsigned long live_regs_mask = 0;
19124 int high_regs_pushed = 0;
19125 int had_to_push_lr;
19126 int size;
19128 if (cfun->machine->return_used_this_function != 0)
19129 return "";
19131 if (IS_NAKED (arm_current_func_type ()))
19132 return "";
19134 offsets = arm_get_frame_offsets ();
19135 live_regs_mask = offsets->saved_regs_mask;
19136 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19138 /* If we can deduce the registers used from the function's return value.
19139 This is more reliable that examining df_regs_ever_live_p () because that
19140 will be set if the register is ever used in the function, not just if
19141 the register is used to hold a return value. */
19142 size = arm_size_return_regs ();
19144 /* The prolog may have pushed some high registers to use as
19145 work registers. e.g. the testsuite file:
19146 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19147 compiles to produce:
19148 push {r4, r5, r6, r7, lr}
19149 mov r7, r9
19150 mov r6, r8
19151 push {r6, r7}
19152 as part of the prolog. We have to undo that pushing here. */
19154 if (high_regs_pushed)
19156 unsigned long mask = live_regs_mask & 0xff;
19157 int next_hi_reg;
19159 /* The available low registers depend on the size of the value we are
19160 returning. */
19161 if (size <= 12)
19162 mask |= 1 << 3;
19163 if (size <= 8)
19164 mask |= 1 << 2;
19166 if (mask == 0)
19167 /* Oh dear! We have no low registers into which we can pop
19168 high registers! */
19169 internal_error
19170 ("no low registers available for popping high registers");
19172 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19173 if (live_regs_mask & (1 << next_hi_reg))
19174 break;
19176 while (high_regs_pushed)
19178 /* Find lo register(s) into which the high register(s) can
19179 be popped. */
19180 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19182 if (mask & (1 << regno))
19183 high_regs_pushed--;
19184 if (high_regs_pushed == 0)
19185 break;
19188 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19190 /* Pop the values into the low register(s). */
19191 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19193 /* Move the value(s) into the high registers. */
19194 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19196 if (mask & (1 << regno))
19198 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19199 regno);
19201 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19202 if (live_regs_mask & (1 << next_hi_reg))
19203 break;
19207 live_regs_mask &= ~0x0f00;
19210 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19211 live_regs_mask &= 0xff;
19213 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19215 /* Pop the return address into the PC. */
19216 if (had_to_push_lr)
19217 live_regs_mask |= 1 << PC_REGNUM;
19219 /* Either no argument registers were pushed or a backtrace
19220 structure was created which includes an adjusted stack
19221 pointer, so just pop everything. */
19222 if (live_regs_mask)
19223 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19224 live_regs_mask);
19226 /* We have either just popped the return address into the
19227 PC or it is was kept in LR for the entire function. */
19228 if (!had_to_push_lr)
19229 thumb_exit (asm_out_file, LR_REGNUM);
19231 else
19233 /* Pop everything but the return address. */
19234 if (live_regs_mask)
19235 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19236 live_regs_mask);
19238 if (had_to_push_lr)
19240 if (size > 12)
19242 /* We have no free low regs, so save one. */
19243 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19244 LAST_ARG_REGNUM);
19247 /* Get the return address into a temporary register. */
19248 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19249 1 << LAST_ARG_REGNUM);
19251 if (size > 12)
19253 /* Move the return address to lr. */
19254 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19255 LAST_ARG_REGNUM);
19256 /* Restore the low register. */
19257 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19258 IP_REGNUM);
19259 regno = LR_REGNUM;
19261 else
19262 regno = LAST_ARG_REGNUM;
19264 else
19265 regno = LR_REGNUM;
19267 /* Remove the argument registers that were pushed onto the stack. */
19268 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19269 SP_REGNUM, SP_REGNUM,
19270 crtl->args.pretend_args_size);
19272 thumb_exit (asm_out_file, regno);
19275 return "";
19278 /* Functions to save and restore machine-specific function data. */
19279 static struct machine_function *
19280 arm_init_machine_status (void)
19282 struct machine_function *machine;
19283 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
19285 #if ARM_FT_UNKNOWN != 0
19286 machine->func_type = ARM_FT_UNKNOWN;
19287 #endif
19288 return machine;
19291 /* Return an RTX indicating where the return address to the
19292 calling function can be found. */
19294 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19296 if (count != 0)
19297 return NULL_RTX;
19299 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19302 /* Do anything needed before RTL is emitted for each function. */
19303 void
19304 arm_init_expanders (void)
19306 /* Arrange to initialize and mark the machine per-function status. */
19307 init_machine_status = arm_init_machine_status;
19309 /* This is to stop the combine pass optimizing away the alignment
19310 adjustment of va_arg. */
19311 /* ??? It is claimed that this should not be necessary. */
19312 if (cfun)
19313 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19317 /* Like arm_compute_initial_elimination offset. Simpler because there
19318 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19319 to point at the base of the local variables after static stack
19320 space for a function has been allocated. */
19322 HOST_WIDE_INT
19323 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19325 arm_stack_offsets *offsets;
19327 offsets = arm_get_frame_offsets ();
19329 switch (from)
19331 case ARG_POINTER_REGNUM:
19332 switch (to)
19334 case STACK_POINTER_REGNUM:
19335 return offsets->outgoing_args - offsets->saved_args;
19337 case FRAME_POINTER_REGNUM:
19338 return offsets->soft_frame - offsets->saved_args;
19340 case ARM_HARD_FRAME_POINTER_REGNUM:
19341 return offsets->saved_regs - offsets->saved_args;
19343 case THUMB_HARD_FRAME_POINTER_REGNUM:
19344 return offsets->locals_base - offsets->saved_args;
19346 default:
19347 gcc_unreachable ();
19349 break;
19351 case FRAME_POINTER_REGNUM:
19352 switch (to)
19354 case STACK_POINTER_REGNUM:
19355 return offsets->outgoing_args - offsets->soft_frame;
19357 case ARM_HARD_FRAME_POINTER_REGNUM:
19358 return offsets->saved_regs - offsets->soft_frame;
19360 case THUMB_HARD_FRAME_POINTER_REGNUM:
19361 return offsets->locals_base - offsets->soft_frame;
19363 default:
19364 gcc_unreachable ();
19366 break;
19368 default:
19369 gcc_unreachable ();
19373 /* Given the stack offsets and register mask in OFFSETS, decide
19374 how many additional registers to push instead of subtracting
19375 a constant from SP. */
19376 static int
19377 thumb1_extra_regs_pushed (arm_stack_offsets *offsets)
19379 HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs;
19380 unsigned long live_regs_mask = offsets->saved_regs_mask;
19381 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19382 unsigned long l_mask = live_regs_mask & 0x40ff;
19383 /* Then count how many other high registers will need to be pushed. */
19384 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19385 int n_free;
19387 /* If the stack frame size is 512 exactly, we can save one load
19388 instruction, which should make this a win even when optimizing
19389 for speed. */
19390 if (!optimize_size && amount != 512)
19391 return 0;
19393 /* Can't do this if there are high registers to push, or if we
19394 are not going to do a push at all. */
19395 if (high_regs_pushed != 0 || l_mask == 0)
19396 return 0;
19398 /* Don't do this if thumb1_expand_prologue wants to emit instructions
19399 between the push and the stack frame allocation. */
19400 if ((flag_pic && arm_pic_register != INVALID_REGNUM)
19401 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))
19402 return 0;
19404 for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1)
19405 n_free++;
19407 if (n_free == 0)
19408 return 0;
19409 gcc_assert (amount / 4 * 4 == amount);
19411 if (amount >= 512 && (amount - n_free * 4) < 512)
19412 return (amount - 508) / 4;
19413 if (amount <= n_free * 4)
19414 return amount / 4;
19415 return 0;
19418 /* Generate the rest of a function's prologue. */
19419 void
19420 thumb1_expand_prologue (void)
19422 rtx insn, dwarf;
19424 HOST_WIDE_INT amount;
19425 arm_stack_offsets *offsets;
19426 unsigned long func_type;
19427 int regno;
19428 unsigned long live_regs_mask;
19430 func_type = arm_current_func_type ();
19432 /* Naked functions don't have prologues. */
19433 if (IS_NAKED (func_type))
19434 return;
19436 if (IS_INTERRUPT (func_type))
19438 error ("interrupt Service Routines cannot be coded in Thumb mode");
19439 return;
19442 offsets = arm_get_frame_offsets ();
19443 live_regs_mask = offsets->saved_regs_mask;
19444 /* Load the pic register before setting the frame pointer,
19445 so we can use r7 as a temporary work register. */
19446 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19447 arm_load_pic_register (live_regs_mask);
19449 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19450 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19451 stack_pointer_rtx);
19453 amount = offsets->outgoing_args - offsets->saved_regs;
19454 amount -= 4 * thumb1_extra_regs_pushed (offsets);
19455 if (amount)
19457 if (amount < 512)
19459 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19460 GEN_INT (- amount)));
19461 RTX_FRAME_RELATED_P (insn) = 1;
19463 else
19465 rtx reg;
19467 /* The stack decrement is too big for an immediate value in a single
19468 insn. In theory we could issue multiple subtracts, but after
19469 three of them it becomes more space efficient to place the full
19470 value in the constant pool and load into a register. (Also the
19471 ARM debugger really likes to see only one stack decrement per
19472 function). So instead we look for a scratch register into which
19473 we can load the decrement, and then we subtract this from the
19474 stack pointer. Unfortunately on the thumb the only available
19475 scratch registers are the argument registers, and we cannot use
19476 these as they may hold arguments to the function. Instead we
19477 attempt to locate a call preserved register which is used by this
19478 function. If we can find one, then we know that it will have
19479 been pushed at the start of the prologue and so we can corrupt
19480 it now. */
19481 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19482 if (live_regs_mask & (1 << regno))
19483 break;
19485 gcc_assert(regno <= LAST_LO_REGNUM);
19487 reg = gen_rtx_REG (SImode, regno);
19489 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19491 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19492 stack_pointer_rtx, reg));
19493 RTX_FRAME_RELATED_P (insn) = 1;
19494 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19495 plus_constant (stack_pointer_rtx,
19496 -amount));
19497 RTX_FRAME_RELATED_P (dwarf) = 1;
19498 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19502 if (frame_pointer_needed)
19503 thumb_set_frame_pointer (offsets);
19505 /* If we are profiling, make sure no instructions are scheduled before
19506 the call to mcount. Similarly if the user has requested no
19507 scheduling in the prolog. Similarly if we want non-call exceptions
19508 using the EABI unwinder, to prevent faulting instructions from being
19509 swapped with a stack adjustment. */
19510 if (crtl->profile || !TARGET_SCHED_PROLOG
19511 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
19512 emit_insn (gen_blockage ());
19514 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19515 if (live_regs_mask & 0xff)
19516 cfun->machine->lr_save_eliminated = 0;
19520 void
19521 thumb1_expand_epilogue (void)
19523 HOST_WIDE_INT amount;
19524 arm_stack_offsets *offsets;
19525 int regno;
19527 /* Naked functions don't have prologues. */
19528 if (IS_NAKED (arm_current_func_type ()))
19529 return;
19531 offsets = arm_get_frame_offsets ();
19532 amount = offsets->outgoing_args - offsets->saved_regs;
19534 if (frame_pointer_needed)
19536 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19537 amount = offsets->locals_base - offsets->saved_regs;
19540 gcc_assert (amount >= 0);
19541 if (amount)
19543 if (amount < 512)
19544 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19545 GEN_INT (amount)));
19546 else
19548 /* r3 is always free in the epilogue. */
19549 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19551 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19552 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19556 /* Emit a USE (stack_pointer_rtx), so that
19557 the stack adjustment will not be deleted. */
19558 emit_insn (gen_prologue_use (stack_pointer_rtx));
19560 if (crtl->profile || !TARGET_SCHED_PROLOG)
19561 emit_insn (gen_blockage ());
19563 /* Emit a clobber for each insn that will be restored in the epilogue,
19564 so that flow2 will get register lifetimes correct. */
19565 for (regno = 0; regno < 13; regno++)
19566 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19567 emit_clobber (gen_rtx_REG (SImode, regno));
19569 if (! df_regs_ever_live_p (LR_REGNUM))
19570 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19573 static void
19574 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19576 arm_stack_offsets *offsets;
19577 unsigned long live_regs_mask = 0;
19578 unsigned long l_mask;
19579 unsigned high_regs_pushed = 0;
19580 int cfa_offset = 0;
19581 int regno;
19583 if (IS_NAKED (arm_current_func_type ()))
19584 return;
19586 if (is_called_in_ARM_mode (current_function_decl))
19588 const char * name;
19590 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19591 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19592 == SYMBOL_REF);
19593 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19595 /* Generate code sequence to switch us into Thumb mode. */
19596 /* The .code 32 directive has already been emitted by
19597 ASM_DECLARE_FUNCTION_NAME. */
19598 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19599 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19601 /* Generate a label, so that the debugger will notice the
19602 change in instruction sets. This label is also used by
19603 the assembler to bypass the ARM code when this function
19604 is called from a Thumb encoded function elsewhere in the
19605 same file. Hence the definition of STUB_NAME here must
19606 agree with the definition in gas/config/tc-arm.c. */
19608 #define STUB_NAME ".real_start_of"
19610 fprintf (f, "\t.code\t16\n");
19611 #ifdef ARM_PE
19612 if (arm_dllexport_name_p (name))
19613 name = arm_strip_name_encoding (name);
19614 #endif
19615 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19616 fprintf (f, "\t.thumb_func\n");
19617 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19620 if (crtl->args.pretend_args_size)
19622 /* Output unwind directive for the stack adjustment. */
19623 if (ARM_EABI_UNWIND_TABLES)
19624 fprintf (f, "\t.pad #%d\n",
19625 crtl->args.pretend_args_size);
19627 if (cfun->machine->uses_anonymous_args)
19629 int num_pushes;
19631 fprintf (f, "\tpush\t{");
19633 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19635 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19636 regno <= LAST_ARG_REGNUM;
19637 regno++)
19638 asm_fprintf (f, "%r%s", regno,
19639 regno == LAST_ARG_REGNUM ? "" : ", ");
19641 fprintf (f, "}\n");
19643 else
19644 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19645 SP_REGNUM, SP_REGNUM,
19646 crtl->args.pretend_args_size);
19648 /* We don't need to record the stores for unwinding (would it
19649 help the debugger any if we did?), but record the change in
19650 the stack pointer. */
19651 if (dwarf2out_do_frame ())
19653 char *l = dwarf2out_cfi_label (false);
19655 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19656 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19660 /* Get the registers we are going to push. */
19661 offsets = arm_get_frame_offsets ();
19662 live_regs_mask = offsets->saved_regs_mask;
19663 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19664 l_mask = live_regs_mask & 0x40ff;
19665 /* Then count how many other high registers will need to be pushed. */
19666 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19668 if (TARGET_BACKTRACE)
19670 unsigned offset;
19671 unsigned work_register;
19673 /* We have been asked to create a stack backtrace structure.
19674 The code looks like this:
19676 0 .align 2
19677 0 func:
19678 0 sub SP, #16 Reserve space for 4 registers.
19679 2 push {R7} Push low registers.
19680 4 add R7, SP, #20 Get the stack pointer before the push.
19681 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19682 8 mov R7, PC Get hold of the start of this code plus 12.
19683 10 str R7, [SP, #16] Store it.
19684 12 mov R7, FP Get hold of the current frame pointer.
19685 14 str R7, [SP, #4] Store it.
19686 16 mov R7, LR Get hold of the current return address.
19687 18 str R7, [SP, #12] Store it.
19688 20 add R7, SP, #16 Point at the start of the backtrace structure.
19689 22 mov FP, R7 Put this value into the frame pointer. */
19691 work_register = thumb_find_work_register (live_regs_mask);
19693 if (ARM_EABI_UNWIND_TABLES)
19694 asm_fprintf (f, "\t.pad #16\n");
19696 asm_fprintf
19697 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19698 SP_REGNUM, SP_REGNUM);
19700 if (dwarf2out_do_frame ())
19702 char *l = dwarf2out_cfi_label (false);
19704 cfa_offset = cfa_offset + 16;
19705 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19708 if (l_mask)
19710 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19711 offset = bit_count (l_mask) * UNITS_PER_WORD;
19713 else
19714 offset = 0;
19716 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19717 offset + 16 + crtl->args.pretend_args_size);
19719 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19720 offset + 4);
19722 /* Make sure that the instruction fetching the PC is in the right place
19723 to calculate "start of backtrace creation code + 12". */
19724 if (l_mask)
19726 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19727 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19728 offset + 12);
19729 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19730 ARM_HARD_FRAME_POINTER_REGNUM);
19731 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19732 offset);
19734 else
19736 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19737 ARM_HARD_FRAME_POINTER_REGNUM);
19738 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19739 offset);
19740 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19741 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19742 offset + 12);
19745 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19746 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19747 offset + 8);
19748 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19749 offset + 12);
19750 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19751 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19753 /* Optimization: If we are not pushing any low registers but we are going
19754 to push some high registers then delay our first push. This will just
19755 be a push of LR and we can combine it with the push of the first high
19756 register. */
19757 else if ((l_mask & 0xff) != 0
19758 || (high_regs_pushed == 0 && l_mask))
19760 unsigned long mask = l_mask;
19761 mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1;
19762 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
19765 if (high_regs_pushed)
19767 unsigned pushable_regs;
19768 unsigned next_hi_reg;
19770 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19771 if (live_regs_mask & (1 << next_hi_reg))
19772 break;
19774 pushable_regs = l_mask & 0xff;
19776 if (pushable_regs == 0)
19777 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19779 while (high_regs_pushed > 0)
19781 unsigned long real_regs_mask = 0;
19783 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19785 if (pushable_regs & (1 << regno))
19787 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19789 high_regs_pushed --;
19790 real_regs_mask |= (1 << next_hi_reg);
19792 if (high_regs_pushed)
19794 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19795 next_hi_reg --)
19796 if (live_regs_mask & (1 << next_hi_reg))
19797 break;
19799 else
19801 pushable_regs &= ~((1 << regno) - 1);
19802 break;
19807 /* If we had to find a work register and we have not yet
19808 saved the LR then add it to the list of regs to push. */
19809 if (l_mask == (1 << LR_REGNUM))
19811 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19812 1, &cfa_offset,
19813 real_regs_mask | (1 << LR_REGNUM));
19814 l_mask = 0;
19816 else
19817 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19822 /* Handle the case of a double word load into a low register from
19823 a computed memory address. The computed address may involve a
19824 register which is overwritten by the load. */
19825 const char *
19826 thumb_load_double_from_address (rtx *operands)
19828 rtx addr;
19829 rtx base;
19830 rtx offset;
19831 rtx arg1;
19832 rtx arg2;
19834 gcc_assert (GET_CODE (operands[0]) == REG);
19835 gcc_assert (GET_CODE (operands[1]) == MEM);
19837 /* Get the memory address. */
19838 addr = XEXP (operands[1], 0);
19840 /* Work out how the memory address is computed. */
19841 switch (GET_CODE (addr))
19843 case REG:
19844 operands[2] = adjust_address (operands[1], SImode, 4);
19846 if (REGNO (operands[0]) == REGNO (addr))
19848 output_asm_insn ("ldr\t%H0, %2", operands);
19849 output_asm_insn ("ldr\t%0, %1", operands);
19851 else
19853 output_asm_insn ("ldr\t%0, %1", operands);
19854 output_asm_insn ("ldr\t%H0, %2", operands);
19856 break;
19858 case CONST:
19859 /* Compute <address> + 4 for the high order load. */
19860 operands[2] = adjust_address (operands[1], SImode, 4);
19862 output_asm_insn ("ldr\t%0, %1", operands);
19863 output_asm_insn ("ldr\t%H0, %2", operands);
19864 break;
19866 case PLUS:
19867 arg1 = XEXP (addr, 0);
19868 arg2 = XEXP (addr, 1);
19870 if (CONSTANT_P (arg1))
19871 base = arg2, offset = arg1;
19872 else
19873 base = arg1, offset = arg2;
19875 gcc_assert (GET_CODE (base) == REG);
19877 /* Catch the case of <address> = <reg> + <reg> */
19878 if (GET_CODE (offset) == REG)
19880 int reg_offset = REGNO (offset);
19881 int reg_base = REGNO (base);
19882 int reg_dest = REGNO (operands[0]);
19884 /* Add the base and offset registers together into the
19885 higher destination register. */
19886 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19887 reg_dest + 1, reg_base, reg_offset);
19889 /* Load the lower destination register from the address in
19890 the higher destination register. */
19891 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19892 reg_dest, reg_dest + 1);
19894 /* Load the higher destination register from its own address
19895 plus 4. */
19896 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19897 reg_dest + 1, reg_dest + 1);
19899 else
19901 /* Compute <address> + 4 for the high order load. */
19902 operands[2] = adjust_address (operands[1], SImode, 4);
19904 /* If the computed address is held in the low order register
19905 then load the high order register first, otherwise always
19906 load the low order register first. */
19907 if (REGNO (operands[0]) == REGNO (base))
19909 output_asm_insn ("ldr\t%H0, %2", operands);
19910 output_asm_insn ("ldr\t%0, %1", operands);
19912 else
19914 output_asm_insn ("ldr\t%0, %1", operands);
19915 output_asm_insn ("ldr\t%H0, %2", operands);
19918 break;
19920 case LABEL_REF:
19921 /* With no registers to worry about we can just load the value
19922 directly. */
19923 operands[2] = adjust_address (operands[1], SImode, 4);
19925 output_asm_insn ("ldr\t%H0, %2", operands);
19926 output_asm_insn ("ldr\t%0, %1", operands);
19927 break;
19929 default:
19930 gcc_unreachable ();
19933 return "";
19936 const char *
19937 thumb_output_move_mem_multiple (int n, rtx *operands)
19939 rtx tmp;
19941 switch (n)
19943 case 2:
19944 if (REGNO (operands[4]) > REGNO (operands[5]))
19946 tmp = operands[4];
19947 operands[4] = operands[5];
19948 operands[5] = tmp;
19950 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19951 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19952 break;
19954 case 3:
19955 if (REGNO (operands[4]) > REGNO (operands[5]))
19957 tmp = operands[4];
19958 operands[4] = operands[5];
19959 operands[5] = tmp;
19961 if (REGNO (operands[5]) > REGNO (operands[6]))
19963 tmp = operands[5];
19964 operands[5] = operands[6];
19965 operands[6] = tmp;
19967 if (REGNO (operands[4]) > REGNO (operands[5]))
19969 tmp = operands[4];
19970 operands[4] = operands[5];
19971 operands[5] = tmp;
19974 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19975 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19976 break;
19978 default:
19979 gcc_unreachable ();
19982 return "";
19985 /* Output a call-via instruction for thumb state. */
19986 const char *
19987 thumb_call_via_reg (rtx reg)
19989 int regno = REGNO (reg);
19990 rtx *labelp;
19992 gcc_assert (regno < LR_REGNUM);
19994 /* If we are in the normal text section we can use a single instance
19995 per compilation unit. If we are doing function sections, then we need
19996 an entry per section, since we can't rely on reachability. */
19997 if (in_section == text_section)
19999 thumb_call_reg_needed = 1;
20001 if (thumb_call_via_label[regno] == NULL)
20002 thumb_call_via_label[regno] = gen_label_rtx ();
20003 labelp = thumb_call_via_label + regno;
20005 else
20007 if (cfun->machine->call_via[regno] == NULL)
20008 cfun->machine->call_via[regno] = gen_label_rtx ();
20009 labelp = cfun->machine->call_via + regno;
20012 output_asm_insn ("bl\t%a0", labelp);
20013 return "";
20016 /* Routines for generating rtl. */
20017 void
20018 thumb_expand_movmemqi (rtx *operands)
20020 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20021 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20022 HOST_WIDE_INT len = INTVAL (operands[2]);
20023 HOST_WIDE_INT offset = 0;
20025 while (len >= 12)
20027 emit_insn (gen_movmem12b (out, in, out, in));
20028 len -= 12;
20031 if (len >= 8)
20033 emit_insn (gen_movmem8b (out, in, out, in));
20034 len -= 8;
20037 if (len >= 4)
20039 rtx reg = gen_reg_rtx (SImode);
20040 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20041 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20042 len -= 4;
20043 offset += 4;
20046 if (len >= 2)
20048 rtx reg = gen_reg_rtx (HImode);
20049 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
20050 plus_constant (in, offset))));
20051 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
20052 reg));
20053 len -= 2;
20054 offset += 2;
20057 if (len)
20059 rtx reg = gen_reg_rtx (QImode);
20060 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
20061 plus_constant (in, offset))));
20062 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
20063 reg));
20067 void
20068 thumb_reload_out_hi (rtx *operands)
20070 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
20073 /* Handle reading a half-word from memory during reload. */
20074 void
20075 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
20077 gcc_unreachable ();
20080 /* Return the length of a function name prefix
20081 that starts with the character 'c'. */
20082 static int
20083 arm_get_strip_length (int c)
20085 switch (c)
20087 ARM_NAME_ENCODING_LENGTHS
20088 default: return 0;
20092 /* Return a pointer to a function's name with any
20093 and all prefix encodings stripped from it. */
20094 const char *
20095 arm_strip_name_encoding (const char *name)
20097 int skip;
20099 while ((skip = arm_get_strip_length (* name)))
20100 name += skip;
20102 return name;
20105 /* If there is a '*' anywhere in the name's prefix, then
20106 emit the stripped name verbatim, otherwise prepend an
20107 underscore if leading underscores are being used. */
20108 void
20109 arm_asm_output_labelref (FILE *stream, const char *name)
20111 int skip;
20112 int verbatim = 0;
20114 while ((skip = arm_get_strip_length (* name)))
20116 verbatim |= (*name == '*');
20117 name += skip;
20120 if (verbatim)
20121 fputs (name, stream);
20122 else
20123 asm_fprintf (stream, "%U%s", name);
20126 static void
20127 arm_file_start (void)
20129 int val;
20131 if (TARGET_UNIFIED_ASM)
20132 asm_fprintf (asm_out_file, "\t.syntax unified\n");
20134 if (TARGET_BPABI)
20136 const char *fpu_name;
20137 if (arm_selected_arch)
20138 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
20139 else
20140 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
20142 if (TARGET_SOFT_FLOAT)
20144 if (TARGET_VFP)
20145 fpu_name = "softvfp";
20146 else
20147 fpu_name = "softfpa";
20149 else
20151 fpu_name = arm_fpu_desc->name;
20152 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
20154 if (TARGET_HARD_FLOAT)
20155 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
20156 if (TARGET_HARD_FLOAT_ABI)
20157 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
20160 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
20162 /* Some of these attributes only apply when the corresponding features
20163 are used. However we don't have any easy way of figuring this out.
20164 Conservatively record the setting that would have been used. */
20166 /* Tag_ABI_FP_rounding. */
20167 if (flag_rounding_math)
20168 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
20169 if (!flag_unsafe_math_optimizations)
20171 /* Tag_ABI_FP_denomal. */
20172 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
20173 /* Tag_ABI_FP_exceptions. */
20174 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
20176 /* Tag_ABI_FP_user_exceptions. */
20177 if (flag_signaling_nans)
20178 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
20179 /* Tag_ABI_FP_number_model. */
20180 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
20181 flag_finite_math_only ? 1 : 3);
20183 /* Tag_ABI_align8_needed. */
20184 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20185 /* Tag_ABI_align8_preserved. */
20186 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20187 /* Tag_ABI_enum_size. */
20188 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20189 flag_short_enums ? 1 : 2);
20191 /* Tag_ABI_optimization_goals. */
20192 if (optimize_size)
20193 val = 4;
20194 else if (optimize >= 2)
20195 val = 2;
20196 else if (optimize)
20197 val = 1;
20198 else
20199 val = 6;
20200 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20202 /* Tag_ABI_FP_16bit_format. */
20203 if (arm_fp16_format)
20204 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20205 (int)arm_fp16_format);
20207 if (arm_lang_output_object_attributes_hook)
20208 arm_lang_output_object_attributes_hook();
20210 default_file_start();
20213 static void
20214 arm_file_end (void)
20216 int regno;
20218 if (NEED_INDICATE_EXEC_STACK)
20219 /* Add .note.GNU-stack. */
20220 file_end_indicate_exec_stack ();
20222 if (! thumb_call_reg_needed)
20223 return;
20225 switch_to_section (text_section);
20226 asm_fprintf (asm_out_file, "\t.code 16\n");
20227 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20229 for (regno = 0; regno < LR_REGNUM; regno++)
20231 rtx label = thumb_call_via_label[regno];
20233 if (label != 0)
20235 targetm.asm_out.internal_label (asm_out_file, "L",
20236 CODE_LABEL_NUMBER (label));
20237 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20242 #ifndef ARM_PE
20243 /* Symbols in the text segment can be accessed without indirecting via the
20244 constant pool; it may take an extra binary operation, but this is still
20245 faster than indirecting via memory. Don't do this when not optimizing,
20246 since we won't be calculating al of the offsets necessary to do this
20247 simplification. */
20249 static void
20250 arm_encode_section_info (tree decl, rtx rtl, int first)
20252 if (optimize > 0 && TREE_CONSTANT (decl))
20253 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20255 default_encode_section_info (decl, rtl, first);
20257 #endif /* !ARM_PE */
20259 static void
20260 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20262 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20263 && !strcmp (prefix, "L"))
20265 arm_ccfsm_state = 0;
20266 arm_target_insn = NULL;
20268 default_internal_label (stream, prefix, labelno);
20271 /* Output code to add DELTA to the first argument, and then jump
20272 to FUNCTION. Used for C++ multiple inheritance. */
20273 static void
20274 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20275 HOST_WIDE_INT delta,
20276 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20277 tree function)
20279 static int thunk_label = 0;
20280 char label[256];
20281 char labelpc[256];
20282 int mi_delta = delta;
20283 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20284 int shift = 0;
20285 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20286 ? 1 : 0);
20287 if (mi_delta < 0)
20288 mi_delta = - mi_delta;
20290 if (TARGET_THUMB1)
20292 int labelno = thunk_label++;
20293 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20294 /* Thunks are entered in arm mode when avaiable. */
20295 if (TARGET_THUMB1_ONLY)
20297 /* push r3 so we can use it as a temporary. */
20298 /* TODO: Omit this save if r3 is not used. */
20299 fputs ("\tpush {r3}\n", file);
20300 fputs ("\tldr\tr3, ", file);
20302 else
20304 fputs ("\tldr\tr12, ", file);
20306 assemble_name (file, label);
20307 fputc ('\n', file);
20308 if (flag_pic)
20310 /* If we are generating PIC, the ldr instruction below loads
20311 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20312 the address of the add + 8, so we have:
20314 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20315 = target + 1.
20317 Note that we have "+ 1" because some versions of GNU ld
20318 don't set the low bit of the result for R_ARM_REL32
20319 relocations against thumb function symbols.
20320 On ARMv6M this is +4, not +8. */
20321 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20322 assemble_name (file, labelpc);
20323 fputs (":\n", file);
20324 if (TARGET_THUMB1_ONLY)
20326 /* This is 2 insns after the start of the thunk, so we know it
20327 is 4-byte aligned. */
20328 fputs ("\tadd\tr3, pc, r3\n", file);
20329 fputs ("\tmov r12, r3\n", file);
20331 else
20332 fputs ("\tadd\tr12, pc, r12\n", file);
20334 else if (TARGET_THUMB1_ONLY)
20335 fputs ("\tmov r12, r3\n", file);
20337 if (TARGET_THUMB1_ONLY)
20339 if (mi_delta > 255)
20341 fputs ("\tldr\tr3, ", file);
20342 assemble_name (file, label);
20343 fputs ("+4\n", file);
20344 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20345 mi_op, this_regno, this_regno);
20347 else if (mi_delta != 0)
20349 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20350 mi_op, this_regno, this_regno,
20351 mi_delta);
20354 else
20356 /* TODO: Use movw/movt for large constants when available. */
20357 while (mi_delta != 0)
20359 if ((mi_delta & (3 << shift)) == 0)
20360 shift += 2;
20361 else
20363 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20364 mi_op, this_regno, this_regno,
20365 mi_delta & (0xff << shift));
20366 mi_delta &= ~(0xff << shift);
20367 shift += 8;
20371 if (TARGET_THUMB1)
20373 if (TARGET_THUMB1_ONLY)
20374 fputs ("\tpop\t{r3}\n", file);
20376 fprintf (file, "\tbx\tr12\n");
20377 ASM_OUTPUT_ALIGN (file, 2);
20378 assemble_name (file, label);
20379 fputs (":\n", file);
20380 if (flag_pic)
20382 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20383 rtx tem = XEXP (DECL_RTL (function), 0);
20384 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20385 tem = gen_rtx_MINUS (GET_MODE (tem),
20386 tem,
20387 gen_rtx_SYMBOL_REF (Pmode,
20388 ggc_strdup (labelpc)));
20389 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20391 else
20392 /* Output ".word .LTHUNKn". */
20393 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20395 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20396 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20398 else
20400 fputs ("\tb\t", file);
20401 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20402 if (NEED_PLT_RELOC)
20403 fputs ("(PLT)", file);
20404 fputc ('\n', file);
20409 arm_emit_vector_const (FILE *file, rtx x)
20411 int i;
20412 const char * pattern;
20414 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20416 switch (GET_MODE (x))
20418 case V2SImode: pattern = "%08x"; break;
20419 case V4HImode: pattern = "%04x"; break;
20420 case V8QImode: pattern = "%02x"; break;
20421 default: gcc_unreachable ();
20424 fprintf (file, "0x");
20425 for (i = CONST_VECTOR_NUNITS (x); i--;)
20427 rtx element;
20429 element = CONST_VECTOR_ELT (x, i);
20430 fprintf (file, pattern, INTVAL (element));
20433 return 1;
20436 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20437 HFmode constant pool entries are actually loaded with ldr. */
20438 void
20439 arm_emit_fp16_const (rtx c)
20441 REAL_VALUE_TYPE r;
20442 long bits;
20444 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20445 bits = real_to_target (NULL, &r, HFmode);
20446 if (WORDS_BIG_ENDIAN)
20447 assemble_zeros (2);
20448 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20449 if (!WORDS_BIG_ENDIAN)
20450 assemble_zeros (2);
20453 const char *
20454 arm_output_load_gr (rtx *operands)
20456 rtx reg;
20457 rtx offset;
20458 rtx wcgr;
20459 rtx sum;
20461 if (GET_CODE (operands [1]) != MEM
20462 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20463 || GET_CODE (reg = XEXP (sum, 0)) != REG
20464 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20465 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20466 return "wldrw%?\t%0, %1";
20468 /* Fix up an out-of-range load of a GR register. */
20469 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20470 wcgr = operands[0];
20471 operands[0] = reg;
20472 output_asm_insn ("ldr%?\t%0, %1", operands);
20474 operands[0] = wcgr;
20475 operands[1] = reg;
20476 output_asm_insn ("tmcr%?\t%0, %1", operands);
20477 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20479 return "";
20482 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20484 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20485 named arg and all anonymous args onto the stack.
20486 XXX I know the prologue shouldn't be pushing registers, but it is faster
20487 that way. */
20489 static void
20490 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20491 enum machine_mode mode,
20492 tree type,
20493 int *pretend_size,
20494 int second_time ATTRIBUTE_UNUSED)
20496 int nregs;
20498 cfun->machine->uses_anonymous_args = 1;
20499 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20501 nregs = pcum->aapcs_ncrn;
20502 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20503 nregs++;
20505 else
20506 nregs = pcum->nregs;
20508 if (nregs < NUM_ARG_REGS)
20509 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20512 /* Return nonzero if the CONSUMER instruction (a store) does not need
20513 PRODUCER's value to calculate the address. */
20516 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20518 rtx value = PATTERN (producer);
20519 rtx addr = PATTERN (consumer);
20521 if (GET_CODE (value) == COND_EXEC)
20522 value = COND_EXEC_CODE (value);
20523 if (GET_CODE (value) == PARALLEL)
20524 value = XVECEXP (value, 0, 0);
20525 value = XEXP (value, 0);
20526 if (GET_CODE (addr) == COND_EXEC)
20527 addr = COND_EXEC_CODE (addr);
20528 if (GET_CODE (addr) == PARALLEL)
20529 addr = XVECEXP (addr, 0, 0);
20530 addr = XEXP (addr, 0);
20532 return !reg_overlap_mentioned_p (value, addr);
20535 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20536 have an early register shift value or amount dependency on the
20537 result of PRODUCER. */
20540 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20542 rtx value = PATTERN (producer);
20543 rtx op = PATTERN (consumer);
20544 rtx early_op;
20546 if (GET_CODE (value) == COND_EXEC)
20547 value = COND_EXEC_CODE (value);
20548 if (GET_CODE (value) == PARALLEL)
20549 value = XVECEXP (value, 0, 0);
20550 value = XEXP (value, 0);
20551 if (GET_CODE (op) == COND_EXEC)
20552 op = COND_EXEC_CODE (op);
20553 if (GET_CODE (op) == PARALLEL)
20554 op = XVECEXP (op, 0, 0);
20555 op = XEXP (op, 1);
20557 early_op = XEXP (op, 0);
20558 /* This is either an actual independent shift, or a shift applied to
20559 the first operand of another operation. We want the whole shift
20560 operation. */
20561 if (GET_CODE (early_op) == REG)
20562 early_op = op;
20564 return !reg_overlap_mentioned_p (value, early_op);
20567 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20568 have an early register shift value dependency on the result of
20569 PRODUCER. */
20572 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20574 rtx value = PATTERN (producer);
20575 rtx op = PATTERN (consumer);
20576 rtx early_op;
20578 if (GET_CODE (value) == COND_EXEC)
20579 value = COND_EXEC_CODE (value);
20580 if (GET_CODE (value) == PARALLEL)
20581 value = XVECEXP (value, 0, 0);
20582 value = XEXP (value, 0);
20583 if (GET_CODE (op) == COND_EXEC)
20584 op = COND_EXEC_CODE (op);
20585 if (GET_CODE (op) == PARALLEL)
20586 op = XVECEXP (op, 0, 0);
20587 op = XEXP (op, 1);
20589 early_op = XEXP (op, 0);
20591 /* This is either an actual independent shift, or a shift applied to
20592 the first operand of another operation. We want the value being
20593 shifted, in either case. */
20594 if (GET_CODE (early_op) != REG)
20595 early_op = XEXP (early_op, 0);
20597 return !reg_overlap_mentioned_p (value, early_op);
20600 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20601 have an early register mult dependency on the result of
20602 PRODUCER. */
20605 arm_no_early_mul_dep (rtx producer, rtx consumer)
20607 rtx value = PATTERN (producer);
20608 rtx op = PATTERN (consumer);
20610 if (GET_CODE (value) == COND_EXEC)
20611 value = COND_EXEC_CODE (value);
20612 if (GET_CODE (value) == PARALLEL)
20613 value = XVECEXP (value, 0, 0);
20614 value = XEXP (value, 0);
20615 if (GET_CODE (op) == COND_EXEC)
20616 op = COND_EXEC_CODE (op);
20617 if (GET_CODE (op) == PARALLEL)
20618 op = XVECEXP (op, 0, 0);
20619 op = XEXP (op, 1);
20621 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20623 if (GET_CODE (XEXP (op, 0)) == MULT)
20624 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20625 else
20626 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20629 return 0;
20632 /* We can't rely on the caller doing the proper promotion when
20633 using APCS or ATPCS. */
20635 static bool
20636 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20638 return !TARGET_AAPCS_BASED;
20641 static enum machine_mode
20642 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20643 enum machine_mode mode,
20644 int *punsignedp ATTRIBUTE_UNUSED,
20645 const_tree fntype ATTRIBUTE_UNUSED,
20646 int for_return ATTRIBUTE_UNUSED)
20648 if (GET_MODE_CLASS (mode) == MODE_INT
20649 && GET_MODE_SIZE (mode) < 4)
20650 return SImode;
20652 return mode;
20655 /* AAPCS based ABIs use short enums by default. */
20657 static bool
20658 arm_default_short_enums (void)
20660 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20664 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20666 static bool
20667 arm_align_anon_bitfield (void)
20669 return TARGET_AAPCS_BASED;
20673 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20675 static tree
20676 arm_cxx_guard_type (void)
20678 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20681 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20682 has an accumulator dependency on the result of the producer (a
20683 multiplication instruction) and no other dependency on that result. */
20685 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20687 rtx mul = PATTERN (producer);
20688 rtx mac = PATTERN (consumer);
20689 rtx mul_result;
20690 rtx mac_op0, mac_op1, mac_acc;
20692 if (GET_CODE (mul) == COND_EXEC)
20693 mul = COND_EXEC_CODE (mul);
20694 if (GET_CODE (mac) == COND_EXEC)
20695 mac = COND_EXEC_CODE (mac);
20697 /* Check that mul is of the form (set (...) (mult ...))
20698 and mla is of the form (set (...) (plus (mult ...) (...))). */
20699 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20700 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20701 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20702 return 0;
20704 mul_result = XEXP (mul, 0);
20705 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20706 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20707 mac_acc = XEXP (XEXP (mac, 1), 1);
20709 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20710 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20711 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20715 /* The EABI says test the least significant bit of a guard variable. */
20717 static bool
20718 arm_cxx_guard_mask_bit (void)
20720 return TARGET_AAPCS_BASED;
20724 /* The EABI specifies that all array cookies are 8 bytes long. */
20726 static tree
20727 arm_get_cookie_size (tree type)
20729 tree size;
20731 if (!TARGET_AAPCS_BASED)
20732 return default_cxx_get_cookie_size (type);
20734 size = build_int_cst (sizetype, 8);
20735 return size;
20739 /* The EABI says that array cookies should also contain the element size. */
20741 static bool
20742 arm_cookie_has_size (void)
20744 return TARGET_AAPCS_BASED;
20748 /* The EABI says constructors and destructors should return a pointer to
20749 the object constructed/destroyed. */
20751 static bool
20752 arm_cxx_cdtor_returns_this (void)
20754 return TARGET_AAPCS_BASED;
20757 /* The EABI says that an inline function may never be the key
20758 method. */
20760 static bool
20761 arm_cxx_key_method_may_be_inline (void)
20763 return !TARGET_AAPCS_BASED;
20766 static void
20767 arm_cxx_determine_class_data_visibility (tree decl)
20769 if (!TARGET_AAPCS_BASED
20770 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20771 return;
20773 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20774 is exported. However, on systems without dynamic vague linkage,
20775 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20776 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20777 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20778 else
20779 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20780 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20783 static bool
20784 arm_cxx_class_data_always_comdat (void)
20786 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20787 vague linkage if the class has no key function. */
20788 return !TARGET_AAPCS_BASED;
20792 /* The EABI says __aeabi_atexit should be used to register static
20793 destructors. */
20795 static bool
20796 arm_cxx_use_aeabi_atexit (void)
20798 return TARGET_AAPCS_BASED;
20802 void
20803 arm_set_return_address (rtx source, rtx scratch)
20805 arm_stack_offsets *offsets;
20806 HOST_WIDE_INT delta;
20807 rtx addr;
20808 unsigned long saved_regs;
20810 offsets = arm_get_frame_offsets ();
20811 saved_regs = offsets->saved_regs_mask;
20813 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20814 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20815 else
20817 if (frame_pointer_needed)
20818 addr = plus_constant(hard_frame_pointer_rtx, -4);
20819 else
20821 /* LR will be the first saved register. */
20822 delta = offsets->outgoing_args - (offsets->frame + 4);
20825 if (delta >= 4096)
20827 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20828 GEN_INT (delta & ~4095)));
20829 addr = scratch;
20830 delta &= 4095;
20832 else
20833 addr = stack_pointer_rtx;
20835 addr = plus_constant (addr, delta);
20837 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20842 void
20843 thumb_set_return_address (rtx source, rtx scratch)
20845 arm_stack_offsets *offsets;
20846 HOST_WIDE_INT delta;
20847 HOST_WIDE_INT limit;
20848 int reg;
20849 rtx addr;
20850 unsigned long mask;
20852 emit_use (source);
20854 offsets = arm_get_frame_offsets ();
20855 mask = offsets->saved_regs_mask;
20856 if (mask & (1 << LR_REGNUM))
20858 limit = 1024;
20859 /* Find the saved regs. */
20860 if (frame_pointer_needed)
20862 delta = offsets->soft_frame - offsets->saved_args;
20863 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20864 if (TARGET_THUMB1)
20865 limit = 128;
20867 else
20869 delta = offsets->outgoing_args - offsets->saved_args;
20870 reg = SP_REGNUM;
20872 /* Allow for the stack frame. */
20873 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20874 delta -= 16;
20875 /* The link register is always the first saved register. */
20876 delta -= 4;
20878 /* Construct the address. */
20879 addr = gen_rtx_REG (SImode, reg);
20880 if (delta > limit)
20882 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20883 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20884 addr = scratch;
20886 else
20887 addr = plus_constant (addr, delta);
20889 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20891 else
20892 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20895 /* Implements target hook vector_mode_supported_p. */
20896 bool
20897 arm_vector_mode_supported_p (enum machine_mode mode)
20899 /* Neon also supports V2SImode, etc. listed in the clause below. */
20900 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20901 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20902 return true;
20904 if ((TARGET_NEON || TARGET_IWMMXT)
20905 && ((mode == V2SImode)
20906 || (mode == V4HImode)
20907 || (mode == V8QImode)))
20908 return true;
20910 return false;
20913 /* Implements target hook small_register_classes_for_mode_p. */
20914 bool
20915 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
20917 return TARGET_THUMB1;
20920 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20921 ARM insns and therefore guarantee that the shift count is modulo 256.
20922 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20923 guarantee no particular behavior for out-of-range counts. */
20925 static unsigned HOST_WIDE_INT
20926 arm_shift_truncation_mask (enum machine_mode mode)
20928 return mode == SImode ? 255 : 0;
20932 /* Map internal gcc register numbers to DWARF2 register numbers. */
20934 unsigned int
20935 arm_dbx_register_number (unsigned int regno)
20937 if (regno < 16)
20938 return regno;
20940 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20941 compatibility. The EABI defines them as registers 96-103. */
20942 if (IS_FPA_REGNUM (regno))
20943 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20945 if (IS_VFP_REGNUM (regno))
20947 /* See comment in arm_dwarf_register_span. */
20948 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20949 return 64 + regno - FIRST_VFP_REGNUM;
20950 else
20951 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
20954 if (IS_IWMMXT_GR_REGNUM (regno))
20955 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20957 if (IS_IWMMXT_REGNUM (regno))
20958 return 112 + regno - FIRST_IWMMXT_REGNUM;
20960 gcc_unreachable ();
20963 /* Dwarf models VFPv3 registers as 32 64-bit registers.
20964 GCC models tham as 64 32-bit registers, so we need to describe this to
20965 the DWARF generation code. Other registers can use the default. */
20966 static rtx
20967 arm_dwarf_register_span (rtx rtl)
20969 unsigned regno;
20970 int nregs;
20971 int i;
20972 rtx p;
20974 regno = REGNO (rtl);
20975 if (!IS_VFP_REGNUM (regno))
20976 return NULL_RTX;
20978 /* XXX FIXME: The EABI defines two VFP register ranges:
20979 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
20980 256-287: D0-D31
20981 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
20982 corresponding D register. Until GDB supports this, we shall use the
20983 legacy encodings. We also use these encodings for D0-D15 for
20984 compatibility with older debuggers. */
20985 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20986 return NULL_RTX;
20988 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
20989 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
20990 regno = (regno - FIRST_VFP_REGNUM) / 2;
20991 for (i = 0; i < nregs; i++)
20992 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
20994 return p;
20997 #ifdef TARGET_UNWIND_INFO
20998 /* Emit unwind directives for a store-multiple instruction or stack pointer
20999 push during alignment.
21000 These should only ever be generated by the function prologue code, so
21001 expect them to have a particular form. */
21003 static void
21004 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21006 int i;
21007 HOST_WIDE_INT offset;
21008 HOST_WIDE_INT nregs;
21009 int reg_size;
21010 unsigned reg;
21011 unsigned lastreg;
21012 rtx e;
21014 e = XVECEXP (p, 0, 0);
21015 if (GET_CODE (e) != SET)
21016 abort ();
21018 /* First insn will adjust the stack pointer. */
21019 if (GET_CODE (e) != SET
21020 || GET_CODE (XEXP (e, 0)) != REG
21021 || REGNO (XEXP (e, 0)) != SP_REGNUM
21022 || GET_CODE (XEXP (e, 1)) != PLUS)
21023 abort ();
21025 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21026 nregs = XVECLEN (p, 0) - 1;
21028 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21029 if (reg < 16)
21031 /* The function prologue may also push pc, but not annotate it as it is
21032 never restored. We turn this into a stack pointer adjustment. */
21033 if (nregs * 4 == offset - 4)
21035 fprintf (asm_out_file, "\t.pad #4\n");
21036 offset -= 4;
21038 reg_size = 4;
21039 fprintf (asm_out_file, "\t.save {");
21041 else if (IS_VFP_REGNUM (reg))
21043 reg_size = 8;
21044 fprintf (asm_out_file, "\t.vsave {");
21046 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21048 /* FPA registers are done differently. */
21049 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
21050 return;
21052 else
21053 /* Unknown register type. */
21054 abort ();
21056 /* If the stack increment doesn't match the size of the saved registers,
21057 something has gone horribly wrong. */
21058 if (offset != nregs * reg_size)
21059 abort ();
21061 offset = 0;
21062 lastreg = 0;
21063 /* The remaining insns will describe the stores. */
21064 for (i = 1; i <= nregs; i++)
21066 /* Expect (set (mem <addr>) (reg)).
21067 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21068 e = XVECEXP (p, 0, i);
21069 if (GET_CODE (e) != SET
21070 || GET_CODE (XEXP (e, 0)) != MEM
21071 || GET_CODE (XEXP (e, 1)) != REG)
21072 abort ();
21074 reg = REGNO (XEXP (e, 1));
21075 if (reg < lastreg)
21076 abort ();
21078 if (i != 1)
21079 fprintf (asm_out_file, ", ");
21080 /* We can't use %r for vfp because we need to use the
21081 double precision register names. */
21082 if (IS_VFP_REGNUM (reg))
21083 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
21084 else
21085 asm_fprintf (asm_out_file, "%r", reg);
21087 #ifdef ENABLE_CHECKING
21088 /* Check that the addresses are consecutive. */
21089 e = XEXP (XEXP (e, 0), 0);
21090 if (GET_CODE (e) == PLUS)
21092 offset += reg_size;
21093 if (GET_CODE (XEXP (e, 0)) != REG
21094 || REGNO (XEXP (e, 0)) != SP_REGNUM
21095 || GET_CODE (XEXP (e, 1)) != CONST_INT
21096 || offset != INTVAL (XEXP (e, 1)))
21097 abort ();
21099 else if (i != 1
21100 || GET_CODE (e) != REG
21101 || REGNO (e) != SP_REGNUM)
21102 abort ();
21103 #endif
21105 fprintf (asm_out_file, "}\n");
21108 /* Emit unwind directives for a SET. */
21110 static void
21111 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
21113 rtx e0;
21114 rtx e1;
21115 unsigned reg;
21117 e0 = XEXP (p, 0);
21118 e1 = XEXP (p, 1);
21119 switch (GET_CODE (e0))
21121 case MEM:
21122 /* Pushing a single register. */
21123 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
21124 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
21125 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
21126 abort ();
21128 asm_fprintf (asm_out_file, "\t.save ");
21129 if (IS_VFP_REGNUM (REGNO (e1)))
21130 asm_fprintf(asm_out_file, "{d%d}\n",
21131 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
21132 else
21133 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
21134 break;
21136 case REG:
21137 if (REGNO (e0) == SP_REGNUM)
21139 /* A stack increment. */
21140 if (GET_CODE (e1) != PLUS
21141 || GET_CODE (XEXP (e1, 0)) != REG
21142 || REGNO (XEXP (e1, 0)) != SP_REGNUM
21143 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21144 abort ();
21146 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
21147 -INTVAL (XEXP (e1, 1)));
21149 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
21151 HOST_WIDE_INT offset;
21153 if (GET_CODE (e1) == PLUS)
21155 if (GET_CODE (XEXP (e1, 0)) != REG
21156 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21157 abort ();
21158 reg = REGNO (XEXP (e1, 0));
21159 offset = INTVAL (XEXP (e1, 1));
21160 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
21161 HARD_FRAME_POINTER_REGNUM, reg,
21162 offset);
21164 else if (GET_CODE (e1) == REG)
21166 reg = REGNO (e1);
21167 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21168 HARD_FRAME_POINTER_REGNUM, reg);
21170 else
21171 abort ();
21173 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
21175 /* Move from sp to reg. */
21176 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
21178 else if (GET_CODE (e1) == PLUS
21179 && GET_CODE (XEXP (e1, 0)) == REG
21180 && REGNO (XEXP (e1, 0)) == SP_REGNUM
21181 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
21183 /* Set reg to offset from sp. */
21184 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
21185 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
21187 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
21189 /* Stack pointer save before alignment. */
21190 reg = REGNO (e0);
21191 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21192 reg + 0x90, reg);
21194 else
21195 abort ();
21196 break;
21198 default:
21199 abort ();
21204 /* Emit unwind directives for the given insn. */
21206 static void
21207 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21209 rtx pat;
21211 if (!ARM_EABI_UNWIND_TABLES)
21212 return;
21214 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21215 && (TREE_NOTHROW (current_function_decl)
21216 || crtl->all_throwers_are_sibcalls))
21217 return;
21219 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21220 return;
21222 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21223 if (pat)
21224 pat = XEXP (pat, 0);
21225 else
21226 pat = PATTERN (insn);
21228 switch (GET_CODE (pat))
21230 case SET:
21231 arm_unwind_emit_set (asm_out_file, pat);
21232 break;
21234 case SEQUENCE:
21235 /* Store multiple. */
21236 arm_unwind_emit_sequence (asm_out_file, pat);
21237 break;
21239 default:
21240 abort();
21245 /* Output a reference from a function exception table to the type_info
21246 object X. The EABI specifies that the symbol should be relocated by
21247 an R_ARM_TARGET2 relocation. */
21249 static bool
21250 arm_output_ttype (rtx x)
21252 fputs ("\t.word\t", asm_out_file);
21253 output_addr_const (asm_out_file, x);
21254 /* Use special relocations for symbol references. */
21255 if (GET_CODE (x) != CONST_INT)
21256 fputs ("(TARGET2)", asm_out_file);
21257 fputc ('\n', asm_out_file);
21259 return TRUE;
21261 #endif /* TARGET_UNWIND_INFO */
21264 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21265 stack alignment. */
21267 static void
21268 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21270 rtx unspec = SET_SRC (pattern);
21271 gcc_assert (GET_CODE (unspec) == UNSPEC);
21273 switch (index)
21275 case UNSPEC_STACK_ALIGN:
21276 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21277 put anything on the stack, so hopefully it won't matter.
21278 CFA = SP will be correct after alignment. */
21279 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21280 SET_DEST (pattern));
21281 break;
21282 default:
21283 gcc_unreachable ();
21288 /* Output unwind directives for the start/end of a function. */
21290 void
21291 arm_output_fn_unwind (FILE * f, bool prologue)
21293 if (!ARM_EABI_UNWIND_TABLES)
21294 return;
21296 if (prologue)
21297 fputs ("\t.fnstart\n", f);
21298 else
21300 /* If this function will never be unwound, then mark it as such.
21301 The came condition is used in arm_unwind_emit to suppress
21302 the frame annotations. */
21303 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21304 && (TREE_NOTHROW (current_function_decl)
21305 || crtl->all_throwers_are_sibcalls))
21306 fputs("\t.cantunwind\n", f);
21308 fputs ("\t.fnend\n", f);
21312 static bool
21313 arm_emit_tls_decoration (FILE *fp, rtx x)
21315 enum tls_reloc reloc;
21316 rtx val;
21318 val = XVECEXP (x, 0, 0);
21319 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21321 output_addr_const (fp, val);
21323 switch (reloc)
21325 case TLS_GD32:
21326 fputs ("(tlsgd)", fp);
21327 break;
21328 case TLS_LDM32:
21329 fputs ("(tlsldm)", fp);
21330 break;
21331 case TLS_LDO32:
21332 fputs ("(tlsldo)", fp);
21333 break;
21334 case TLS_IE32:
21335 fputs ("(gottpoff)", fp);
21336 break;
21337 case TLS_LE32:
21338 fputs ("(tpoff)", fp);
21339 break;
21340 default:
21341 gcc_unreachable ();
21344 switch (reloc)
21346 case TLS_GD32:
21347 case TLS_LDM32:
21348 case TLS_IE32:
21349 fputs (" + (. - ", fp);
21350 output_addr_const (fp, XVECEXP (x, 0, 2));
21351 fputs (" - ", fp);
21352 output_addr_const (fp, XVECEXP (x, 0, 3));
21353 fputc (')', fp);
21354 break;
21355 default:
21356 break;
21359 return TRUE;
21362 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21364 static void
21365 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21367 gcc_assert (size == 4);
21368 fputs ("\t.word\t", file);
21369 output_addr_const (file, x);
21370 fputs ("(tlsldo)", file);
21373 bool
21374 arm_output_addr_const_extra (FILE *fp, rtx x)
21376 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21377 return arm_emit_tls_decoration (fp, x);
21378 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21380 char label[256];
21381 int labelno = INTVAL (XVECEXP (x, 0, 0));
21383 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21384 assemble_name_raw (fp, label);
21386 return TRUE;
21388 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21390 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21391 if (GOT_PCREL)
21392 fputs ("+.", fp);
21393 fputs ("-(", fp);
21394 output_addr_const (fp, XVECEXP (x, 0, 0));
21395 fputc (')', fp);
21396 return TRUE;
21398 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
21400 output_addr_const (fp, XVECEXP (x, 0, 0));
21401 if (GOT_PCREL)
21402 fputs ("+.", fp);
21403 fputs ("-(", fp);
21404 output_addr_const (fp, XVECEXP (x, 0, 1));
21405 fputc (')', fp);
21406 return TRUE;
21408 else if (GET_CODE (x) == CONST_VECTOR)
21409 return arm_emit_vector_const (fp, x);
21411 return FALSE;
21414 /* Output assembly for a shift instruction.
21415 SET_FLAGS determines how the instruction modifies the condition codes.
21416 0 - Do not set condition codes.
21417 1 - Set condition codes.
21418 2 - Use smallest instruction. */
21419 const char *
21420 arm_output_shift(rtx * operands, int set_flags)
21422 char pattern[100];
21423 static const char flag_chars[3] = {'?', '.', '!'};
21424 const char *shift;
21425 HOST_WIDE_INT val;
21426 char c;
21428 c = flag_chars[set_flags];
21429 if (TARGET_UNIFIED_ASM)
21431 shift = shift_op(operands[3], &val);
21432 if (shift)
21434 if (val != -1)
21435 operands[2] = GEN_INT(val);
21436 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21438 else
21439 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21441 else
21442 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21443 output_asm_insn (pattern, operands);
21444 return "";
21447 /* Output a Thumb-1 casesi dispatch sequence. */
21448 const char *
21449 thumb1_output_casesi (rtx *operands)
21451 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21453 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21455 switch (GET_MODE(diff_vec))
21457 case QImode:
21458 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21459 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21460 case HImode:
21461 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21462 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21463 case SImode:
21464 return "bl\t%___gnu_thumb1_case_si";
21465 default:
21466 gcc_unreachable ();
21470 /* Output a Thumb-2 casesi instruction. */
21471 const char *
21472 thumb2_output_casesi (rtx *operands)
21474 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21476 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21478 output_asm_insn ("cmp\t%0, %1", operands);
21479 output_asm_insn ("bhi\t%l3", operands);
21480 switch (GET_MODE(diff_vec))
21482 case QImode:
21483 return "tbb\t[%|pc, %0]";
21484 case HImode:
21485 return "tbh\t[%|pc, %0, lsl #1]";
21486 case SImode:
21487 if (flag_pic)
21489 output_asm_insn ("adr\t%4, %l2", operands);
21490 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21491 output_asm_insn ("add\t%4, %4, %5", operands);
21492 return "bx\t%4";
21494 else
21496 output_asm_insn ("adr\t%4, %l2", operands);
21497 return "ldr\t%|pc, [%4, %0, lsl #2]";
21499 default:
21500 gcc_unreachable ();
21504 /* Most ARM cores are single issue, but some newer ones can dual issue.
21505 The scheduler descriptions rely on this being correct. */
21506 static int
21507 arm_issue_rate (void)
21509 switch (arm_tune)
21511 case cortexr4:
21512 case cortexr4f:
21513 case cortexa8:
21514 case cortexa9:
21515 return 2;
21517 default:
21518 return 1;
21522 /* A table and a function to perform ARM-specific name mangling for
21523 NEON vector types in order to conform to the AAPCS (see "Procedure
21524 Call Standard for the ARM Architecture", Appendix A). To qualify
21525 for emission with the mangled names defined in that document, a
21526 vector type must not only be of the correct mode but also be
21527 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21528 typedef struct
21530 enum machine_mode mode;
21531 const char *element_type_name;
21532 const char *aapcs_name;
21533 } arm_mangle_map_entry;
21535 static arm_mangle_map_entry arm_mangle_map[] = {
21536 /* 64-bit containerized types. */
21537 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21538 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21539 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21540 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21541 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21542 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21543 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21544 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21545 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21546 /* 128-bit containerized types. */
21547 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21548 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21549 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21550 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21551 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21552 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21553 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21554 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21555 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21556 { VOIDmode, NULL, NULL }
21559 const char *
21560 arm_mangle_type (const_tree type)
21562 arm_mangle_map_entry *pos = arm_mangle_map;
21564 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21565 has to be managled as if it is in the "std" namespace. */
21566 if (TARGET_AAPCS_BASED
21567 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21569 static bool warned;
21570 if (!warned && warn_psabi && !in_system_header)
21572 warned = true;
21573 inform (input_location,
21574 "the mangling of %<va_list%> has changed in GCC 4.4");
21576 return "St9__va_list";
21579 /* Half-precision float. */
21580 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21581 return "Dh";
21583 if (TREE_CODE (type) != VECTOR_TYPE)
21584 return NULL;
21586 /* Check the mode of the vector type, and the name of the vector
21587 element type, against the table. */
21588 while (pos->mode != VOIDmode)
21590 tree elt_type = TREE_TYPE (type);
21592 if (pos->mode == TYPE_MODE (type)
21593 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21594 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21595 pos->element_type_name))
21596 return pos->aapcs_name;
21598 pos++;
21601 /* Use the default mangling for unrecognized (possibly user-defined)
21602 vector types. */
21603 return NULL;
21606 /* Order of allocation of core registers for Thumb: this allocation is
21607 written over the corresponding initial entries of the array
21608 initialized with REG_ALLOC_ORDER. We allocate all low registers
21609 first. Saving and restoring a low register is usually cheaper than
21610 using a call-clobbered high register. */
21612 static const int thumb_core_reg_alloc_order[] =
21614 3, 2, 1, 0, 4, 5, 6, 7,
21615 14, 12, 8, 9, 10, 11, 13, 15
21618 /* Adjust register allocation order when compiling for Thumb. */
21620 void
21621 arm_order_regs_for_local_alloc (void)
21623 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21624 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21625 if (TARGET_THUMB)
21626 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21627 sizeof (thumb_core_reg_alloc_order));
21630 /* Set default optimization options. */
21631 void
21632 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21634 /* Enable section anchors by default at -O1 or higher.
21635 Use 2 to distinguish from an explicit -fsection-anchors
21636 given on the command line. */
21637 if (level > 0)
21638 flag_section_anchors = 2;
21641 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21643 bool
21644 arm_frame_pointer_required (void)
21646 return (cfun->has_nonlocal_label
21647 || SUBTARGET_FRAME_POINTER_REQUIRED
21648 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21651 /* Only thumb1 can't support conditional execution, so return true if
21652 the target is not thumb1. */
21653 static bool
21654 arm_have_conditional_execution (void)
21656 return !TARGET_THUMB1;
21659 #include "gt-arm.h"