Merged r158229 through r158464 into branch.
[official-gcc.git] / gcc / config / arm / arm.c
blob8d21b8782f028d88c131e783c3a105a80431cb01
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "ggc.h"
48 #include "except.h"
49 #include "c-pragma.h"
50 #include "integrate.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
92 int, HOST_WIDE_INT);
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
109 rtx);
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
121 #endif
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 enum machine_mode, int *,
133 const_tree, int);
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, const_rtx);
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
140 tree);
141 static bool arm_have_conditional_execution (void);
142 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
143 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
144 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_rtx_costs (rtx, int, int, int *, bool);
149 static int arm_address_cost (rtx, bool);
150 static bool arm_memory_load_p (rtx);
151 static bool arm_cirrus_insn_p (rtx);
152 static void cirrus_reorg (rtx);
153 static void arm_init_builtins (void);
154 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
155 static void arm_init_iwmmxt_builtins (void);
156 static rtx safe_vector_operand (rtx, enum machine_mode);
157 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
158 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
159 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
160 static void emit_constant_insn (rtx cond, rtx pattern);
161 static rtx emit_set_insn (rtx, rtx);
162 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
163 tree, bool);
164 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
165 const_tree);
166 static int aapcs_select_return_coproc (const_tree, const_tree);
168 #ifdef OBJECT_FORMAT_ELF
169 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
170 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
171 #endif
172 #ifndef ARM_PE
173 static void arm_encode_section_info (tree, rtx, int);
174 #endif
176 static void arm_file_end (void);
177 static void arm_file_start (void);
179 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
180 tree, int *, int);
181 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
182 enum machine_mode, const_tree, bool);
183 static bool arm_promote_prototypes (const_tree);
184 static bool arm_default_short_enums (void);
185 static bool arm_align_anon_bitfield (void);
186 static bool arm_return_in_msb (const_tree);
187 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 #ifdef TARGET_UNWIND_INFO
190 static void arm_unwind_emit (FILE *, rtx);
191 static bool arm_output_ttype (rtx);
192 #endif
193 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
194 static rtx arm_dwarf_register_span (rtx);
196 static tree arm_cxx_guard_type (void);
197 static bool arm_cxx_guard_mask_bit (void);
198 static tree arm_get_cookie_size (tree);
199 static bool arm_cookie_has_size (void);
200 static bool arm_cxx_cdtor_returns_this (void);
201 static bool arm_cxx_key_method_may_be_inline (void);
202 static void arm_cxx_determine_class_data_visibility (tree);
203 static bool arm_cxx_class_data_always_comdat (void);
204 static bool arm_cxx_use_aeabi_atexit (void);
205 static void arm_init_libfuncs (void);
206 static tree arm_build_builtin_va_list (void);
207 static void arm_expand_builtin_va_start (tree, rtx);
208 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
209 static bool arm_handle_option (size_t, const char *, int);
210 static void arm_target_help (void);
211 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
212 static bool arm_cannot_copy_insn_p (rtx);
213 static bool arm_tls_symbol_p (rtx x);
214 static int arm_issue_rate (void);
215 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
216 static bool arm_allocate_stack_slots_for_args (void);
217 static const char *arm_invalid_parameter_type (const_tree t);
218 static const char *arm_invalid_return_type (const_tree t);
219 static tree arm_promoted_type (const_tree t);
220 static tree arm_convert_to_type (tree type, tree expr);
221 static bool arm_scalar_mode_supported_p (enum machine_mode);
222 static bool arm_frame_pointer_required (void);
223 static bool arm_can_eliminate (const int, const int);
224 static void arm_asm_trampoline_template (FILE *);
225 static void arm_trampoline_init (rtx, tree, rtx);
226 static rtx arm_trampoline_adjust_address (rtx);
227 static rtx arm_pic_static_addr (rtx orig, rtx reg);
230 /* Table of machine attributes. */
231 static const struct attribute_spec arm_attribute_table[] =
233 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
234 /* Function calls made to this symbol must be done indirectly, because
235 it may lie outside of the 26 bit addressing range of a normal function
236 call. */
237 { "long_call", 0, 0, false, true, true, NULL },
238 /* Whereas these functions are always known to reside within the 26 bit
239 addressing range. */
240 { "short_call", 0, 0, false, true, true, NULL },
241 /* Specify the procedure call conventions for a function. */
242 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
243 /* Interrupt Service Routines have special prologue and epilogue requirements. */
244 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
245 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
246 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
247 #ifdef ARM_PE
248 /* ARM/PE has three new attributes:
249 interfacearm - ?
250 dllexport - for exporting a function/variable that will live in a dll
251 dllimport - for importing a function/variable from a dll
253 Microsoft allows multiple declspecs in one __declspec, separating
254 them with spaces. We do NOT support this. Instead, use __declspec
255 multiple times.
257 { "dllimport", 0, 0, true, false, false, NULL },
258 { "dllexport", 0, 0, true, false, false, NULL },
259 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
260 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
261 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
262 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
263 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
264 #endif
265 { NULL, 0, 0, false, false, false, NULL }
268 /* Initialize the GCC target structure. */
269 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
270 #undef TARGET_MERGE_DECL_ATTRIBUTES
271 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
272 #endif
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
280 #undef TARGET_ASM_FILE_START
281 #define TARGET_ASM_FILE_START arm_file_start
282 #undef TARGET_ASM_FILE_END
283 #define TARGET_ASM_FILE_END arm_file_end
285 #undef TARGET_ASM_ALIGNED_SI_OP
286 #define TARGET_ASM_ALIGNED_SI_OP NULL
287 #undef TARGET_ASM_INTEGER
288 #define TARGET_ASM_INTEGER arm_assemble_integer
290 #undef TARGET_ASM_FUNCTION_PROLOGUE
291 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
293 #undef TARGET_ASM_FUNCTION_EPILOGUE
294 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
296 #undef TARGET_DEFAULT_TARGET_FLAGS
297 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
298 #undef TARGET_HANDLE_OPTION
299 #define TARGET_HANDLE_OPTION arm_handle_option
300 #undef TARGET_HELP
301 #define TARGET_HELP arm_target_help
303 #undef TARGET_COMP_TYPE_ATTRIBUTES
304 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
306 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
307 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
309 #undef TARGET_SCHED_ADJUST_COST
310 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
312 #undef TARGET_ENCODE_SECTION_INFO
313 #ifdef ARM_PE
314 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
315 #else
316 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
317 #endif
319 #undef TARGET_STRIP_NAME_ENCODING
320 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
322 #undef TARGET_ASM_INTERNAL_LABEL
323 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
325 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
326 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
328 #undef TARGET_FUNCTION_VALUE
329 #define TARGET_FUNCTION_VALUE arm_function_value
331 #undef TARGET_LIBCALL_VALUE
332 #define TARGET_LIBCALL_VALUE arm_libcall_value
334 #undef TARGET_ASM_OUTPUT_MI_THUNK
335 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
336 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
337 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
339 #undef TARGET_RTX_COSTS
340 #define TARGET_RTX_COSTS arm_rtx_costs
341 #undef TARGET_ADDRESS_COST
342 #define TARGET_ADDRESS_COST arm_address_cost
344 #undef TARGET_SHIFT_TRUNCATION_MASK
345 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
347 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
349 #undef TARGET_MACHINE_DEPENDENT_REORG
350 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
352 #undef TARGET_INIT_BUILTINS
353 #define TARGET_INIT_BUILTINS arm_init_builtins
354 #undef TARGET_EXPAND_BUILTIN
355 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
357 #undef TARGET_INIT_LIBFUNCS
358 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
360 #undef TARGET_PROMOTE_FUNCTION_MODE
361 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
362 #undef TARGET_PROMOTE_PROTOTYPES
363 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
364 #undef TARGET_PASS_BY_REFERENCE
365 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
366 #undef TARGET_ARG_PARTIAL_BYTES
367 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
369 #undef TARGET_SETUP_INCOMING_VARARGS
370 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
372 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
373 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
375 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
376 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
377 #undef TARGET_TRAMPOLINE_INIT
378 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
379 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
380 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
382 #undef TARGET_DEFAULT_SHORT_ENUMS
383 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
385 #undef TARGET_ALIGN_ANON_BITFIELD
386 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
388 #undef TARGET_NARROW_VOLATILE_BITFIELD
389 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
391 #undef TARGET_CXX_GUARD_TYPE
392 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
394 #undef TARGET_CXX_GUARD_MASK_BIT
395 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
397 #undef TARGET_CXX_GET_COOKIE_SIZE
398 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
400 #undef TARGET_CXX_COOKIE_HAS_SIZE
401 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
403 #undef TARGET_CXX_CDTOR_RETURNS_THIS
404 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
406 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
407 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
409 #undef TARGET_CXX_USE_AEABI_ATEXIT
410 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
412 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
413 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
414 arm_cxx_determine_class_data_visibility
416 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
417 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
419 #undef TARGET_RETURN_IN_MSB
420 #define TARGET_RETURN_IN_MSB arm_return_in_msb
422 #undef TARGET_RETURN_IN_MEMORY
423 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
425 #undef TARGET_MUST_PASS_IN_STACK
426 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
428 #ifdef TARGET_UNWIND_INFO
429 #undef TARGET_UNWIND_EMIT
430 #define TARGET_UNWIND_EMIT arm_unwind_emit
432 /* EABI unwinding tables use a different format for the typeinfo tables. */
433 #undef TARGET_ASM_TTYPE
434 #define TARGET_ASM_TTYPE arm_output_ttype
436 #undef TARGET_ARM_EABI_UNWINDER
437 #define TARGET_ARM_EABI_UNWINDER true
438 #endif /* TARGET_UNWIND_INFO */
440 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
441 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
443 #undef TARGET_DWARF_REGISTER_SPAN
444 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
446 #undef TARGET_CANNOT_COPY_INSN_P
447 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
449 #ifdef HAVE_AS_TLS
450 #undef TARGET_HAVE_TLS
451 #define TARGET_HAVE_TLS true
452 #endif
454 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
455 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
457 #undef TARGET_CANNOT_FORCE_CONST_MEM
458 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
460 #undef TARGET_MAX_ANCHOR_OFFSET
461 #define TARGET_MAX_ANCHOR_OFFSET 4095
463 /* The minimum is set such that the total size of the block
464 for a particular anchor is -4088 + 1 + 4095 bytes, which is
465 divisible by eight, ensuring natural spacing of anchors. */
466 #undef TARGET_MIN_ANCHOR_OFFSET
467 #define TARGET_MIN_ANCHOR_OFFSET -4088
469 #undef TARGET_SCHED_ISSUE_RATE
470 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
472 #undef TARGET_MANGLE_TYPE
473 #define TARGET_MANGLE_TYPE arm_mangle_type
475 #undef TARGET_BUILD_BUILTIN_VA_LIST
476 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
477 #undef TARGET_EXPAND_BUILTIN_VA_START
478 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
479 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
480 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
482 #ifdef HAVE_AS_TLS
483 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
484 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
485 #endif
487 #undef TARGET_LEGITIMATE_ADDRESS_P
488 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
490 #undef TARGET_INVALID_PARAMETER_TYPE
491 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
493 #undef TARGET_INVALID_RETURN_TYPE
494 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
496 #undef TARGET_PROMOTED_TYPE
497 #define TARGET_PROMOTED_TYPE arm_promoted_type
499 #undef TARGET_CONVERT_TO_TYPE
500 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
502 #undef TARGET_SCALAR_MODE_SUPPORTED_P
503 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
505 #undef TARGET_FRAME_POINTER_REQUIRED
506 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
508 #undef TARGET_CAN_ELIMINATE
509 #define TARGET_CAN_ELIMINATE arm_can_eliminate
511 struct gcc_target targetm = TARGET_INITIALIZER;
513 /* Obstack for minipool constant handling. */
514 static struct obstack minipool_obstack;
515 static char * minipool_startobj;
517 /* The maximum number of insns skipped which
518 will be conditionalised if possible. */
519 static int max_insns_skipped = 5;
521 extern FILE * asm_out_file;
523 /* True if we are currently building a constant table. */
524 int making_const_table;
526 /* The processor for which instructions should be scheduled. */
527 enum processor_type arm_tune = arm_none;
529 /* The current tuning set. */
530 const struct tune_params *current_tune;
532 /* The default processor used if not overridden by commandline. */
533 static enum processor_type arm_default_cpu = arm_none;
535 /* Which floating point hardware to schedule for. */
536 int arm_fpu_attr;
538 /* Which floating popint hardware to use. */
539 const struct arm_fpu_desc *arm_fpu_desc;
541 /* Whether to use floating point hardware. */
542 enum float_abi_type arm_float_abi;
544 /* Which __fp16 format to use. */
545 enum arm_fp16_format_type arm_fp16_format;
547 /* Which ABI to use. */
548 enum arm_abi_type arm_abi;
550 /* Which thread pointer model to use. */
551 enum arm_tp_type target_thread_pointer = TP_AUTO;
553 /* Used to parse -mstructure_size_boundary command line option. */
554 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
556 /* Used for Thumb call_via trampolines. */
557 rtx thumb_call_via_label[14];
558 static int thumb_call_reg_needed;
560 /* Bit values used to identify processor capabilities. */
561 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
562 #define FL_ARCH3M (1 << 1) /* Extended multiply */
563 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
564 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
565 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
566 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
567 #define FL_THUMB (1 << 6) /* Thumb aware */
568 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
569 #define FL_STRONG (1 << 8) /* StrongARM */
570 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
571 #define FL_XSCALE (1 << 10) /* XScale */
572 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
573 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
574 media instructions. */
575 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
576 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
577 Note: ARM6 & 7 derivatives only. */
578 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
579 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
580 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
581 profile. */
582 #define FL_DIV (1 << 18) /* Hardware divide. */
583 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
584 #define FL_NEON (1 << 20) /* Neon instructions. */
585 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
586 architecture. */
588 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
590 #define FL_FOR_ARCH2 FL_NOTM
591 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
592 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
593 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
594 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
595 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
596 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
597 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
598 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
599 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
600 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
601 #define FL_FOR_ARCH6J FL_FOR_ARCH6
602 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
603 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
604 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
605 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
606 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
607 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
608 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
609 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
610 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
611 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
613 /* The bits in this mask specify which
614 instructions we are allowed to generate. */
615 static unsigned long insn_flags = 0;
617 /* The bits in this mask specify which instruction scheduling options should
618 be used. */
619 static unsigned long tune_flags = 0;
621 /* The following are used in the arm.md file as equivalents to bits
622 in the above two flag variables. */
624 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
625 int arm_arch3m = 0;
627 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
628 int arm_arch4 = 0;
630 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
631 int arm_arch4t = 0;
633 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
634 int arm_arch5 = 0;
636 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
637 int arm_arch5e = 0;
639 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
640 int arm_arch6 = 0;
642 /* Nonzero if this chip supports the ARM 6K extensions. */
643 int arm_arch6k = 0;
645 /* Nonzero if instructions not present in the 'M' profile can be used. */
646 int arm_arch_notm = 0;
648 /* Nonzero if instructions present in ARMv7E-M can be used. */
649 int arm_arch7em = 0;
651 /* Nonzero if this chip can benefit from load scheduling. */
652 int arm_ld_sched = 0;
654 /* Nonzero if this chip is a StrongARM. */
655 int arm_tune_strongarm = 0;
657 /* Nonzero if this chip is a Cirrus variant. */
658 int arm_arch_cirrus = 0;
660 /* Nonzero if this chip supports Intel Wireless MMX technology. */
661 int arm_arch_iwmmxt = 0;
663 /* Nonzero if this chip is an XScale. */
664 int arm_arch_xscale = 0;
666 /* Nonzero if tuning for XScale */
667 int arm_tune_xscale = 0;
669 /* Nonzero if we want to tune for stores that access the write-buffer.
670 This typically means an ARM6 or ARM7 with MMU or MPU. */
671 int arm_tune_wbuf = 0;
673 /* Nonzero if tuning for Cortex-A9. */
674 int arm_tune_cortex_a9 = 0;
676 /* Nonzero if generating Thumb instructions. */
677 int thumb_code = 0;
679 /* Nonzero if we should define __THUMB_INTERWORK__ in the
680 preprocessor.
681 XXX This is a bit of a hack, it's intended to help work around
682 problems in GLD which doesn't understand that armv5t code is
683 interworking clean. */
684 int arm_cpp_interwork = 0;
686 /* Nonzero if chip supports Thumb 2. */
687 int arm_arch_thumb2;
689 /* Nonzero if chip supports integer division instruction. */
690 int arm_arch_hwdiv;
692 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
693 must report the mode of the memory reference from PRINT_OPERAND to
694 PRINT_OPERAND_ADDRESS. */
695 enum machine_mode output_memory_reference_mode;
697 /* The register number to be used for the PIC offset register. */
698 unsigned arm_pic_register = INVALID_REGNUM;
700 /* Set to 1 after arm_reorg has started. Reset to start at the start of
701 the next function. */
702 static int after_arm_reorg = 0;
704 static enum arm_pcs arm_pcs_default;
706 /* For an explanation of these variables, see final_prescan_insn below. */
707 int arm_ccfsm_state;
708 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
709 enum arm_cond_code arm_current_cc;
710 rtx arm_target_insn;
711 int arm_target_label;
712 /* The number of conditionally executed insns, including the current insn. */
713 int arm_condexec_count = 0;
714 /* A bitmask specifying the patterns for the IT block.
715 Zero means do not output an IT block before this insn. */
716 int arm_condexec_mask = 0;
717 /* The number of bits used in arm_condexec_mask. */
718 int arm_condexec_masklen = 0;
720 /* The condition codes of the ARM, and the inverse function. */
721 static const char * const arm_condition_codes[] =
723 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
724 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
727 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
728 #define streq(string1, string2) (strcmp (string1, string2) == 0)
730 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
731 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
732 | (1 << PIC_OFFSET_TABLE_REGNUM)))
734 /* Initialization code. */
736 struct processors
738 const char *const name;
739 enum processor_type core;
740 const char *arch;
741 const unsigned long flags;
742 const struct tune_params *const tune;
745 const struct tune_params arm_slowmul_tune =
747 arm_slowmul_rtx_costs,
751 const struct tune_params arm_fastmul_tune =
753 arm_fastmul_rtx_costs,
757 const struct tune_params arm_xscale_tune =
759 arm_xscale_rtx_costs,
763 const struct tune_params arm_9e_tune =
765 arm_9e_rtx_costs,
769 /* Not all of these give usefully different compilation alternatives,
770 but there is no simple way of generalizing them. */
771 static const struct processors all_cores[] =
773 /* ARM Cores */
774 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
775 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
776 #include "arm-cores.def"
777 #undef ARM_CORE
778 {NULL, arm_none, NULL, 0, NULL}
781 static const struct processors all_architectures[] =
783 /* ARM Architectures */
784 /* We don't specify tuning costs here as it will be figured out
785 from the core. */
787 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
788 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
789 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
790 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
791 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
792 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
793 implementations that support it, so we will leave it out for now. */
794 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
795 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
796 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
797 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
798 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
799 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
800 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
801 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
802 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
803 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
804 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
805 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
806 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
807 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
808 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
809 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
810 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
811 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
812 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
813 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
814 {NULL, arm_none, NULL, 0 , NULL}
817 struct arm_cpu_select
819 const char * string;
820 const char * name;
821 const struct processors * processors;
824 /* This is a magic structure. The 'string' field is magically filled in
825 with a pointer to the value specified by the user on the command line
826 assuming that the user has specified such a value. */
828 static struct arm_cpu_select arm_select[] =
830 /* string name processors */
831 { NULL, "-mcpu=", all_cores },
832 { NULL, "-march=", all_architectures },
833 { NULL, "-mtune=", all_cores }
836 /* Defines representing the indexes into the above table. */
837 #define ARM_OPT_SET_CPU 0
838 #define ARM_OPT_SET_ARCH 1
839 #define ARM_OPT_SET_TUNE 2
841 /* The name of the preprocessor macro to define for this architecture. */
843 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
845 /* Available values for -mfpu=. */
847 static const struct arm_fpu_desc all_fpus[] =
849 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
850 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
851 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
852 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
853 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
854 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
855 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
856 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
857 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
858 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
859 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
860 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
861 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
862 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
863 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
864 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
865 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
866 /* Compatibility aliases. */
867 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
871 struct float_abi
873 const char * name;
874 enum float_abi_type abi_type;
878 /* Available values for -mfloat-abi=. */
880 static const struct float_abi all_float_abis[] =
882 {"soft", ARM_FLOAT_ABI_SOFT},
883 {"softfp", ARM_FLOAT_ABI_SOFTFP},
884 {"hard", ARM_FLOAT_ABI_HARD}
888 struct fp16_format
890 const char *name;
891 enum arm_fp16_format_type fp16_format_type;
895 /* Available values for -mfp16-format=. */
897 static const struct fp16_format all_fp16_formats[] =
899 {"none", ARM_FP16_FORMAT_NONE},
900 {"ieee", ARM_FP16_FORMAT_IEEE},
901 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
905 struct abi_name
907 const char *name;
908 enum arm_abi_type abi_type;
912 /* Available values for -mabi=. */
914 static const struct abi_name arm_all_abis[] =
916 {"apcs-gnu", ARM_ABI_APCS},
917 {"atpcs", ARM_ABI_ATPCS},
918 {"aapcs", ARM_ABI_AAPCS},
919 {"iwmmxt", ARM_ABI_IWMMXT},
920 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
923 /* Supported TLS relocations. */
925 enum tls_reloc {
926 TLS_GD32,
927 TLS_LDM32,
928 TLS_LDO32,
929 TLS_IE32,
930 TLS_LE32
933 /* The maximum number of insns to be used when loading a constant. */
934 inline static int
935 arm_constant_limit (bool size_p)
937 return size_p ? 1 : current_tune->constant_limit;
940 /* Emit an insn that's a simple single-set. Both the operands must be known
941 to be valid. */
942 inline static rtx
943 emit_set_insn (rtx x, rtx y)
945 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
948 /* Return the number of bits set in VALUE. */
949 static unsigned
950 bit_count (unsigned long value)
952 unsigned long count = 0;
954 while (value)
956 count++;
957 value &= value - 1; /* Clear the least-significant set bit. */
960 return count;
963 /* Set up library functions unique to ARM. */
965 static void
966 arm_init_libfuncs (void)
968 /* There are no special library functions unless we are using the
969 ARM BPABI. */
970 if (!TARGET_BPABI)
971 return;
973 /* The functions below are described in Section 4 of the "Run-Time
974 ABI for the ARM architecture", Version 1.0. */
976 /* Double-precision floating-point arithmetic. Table 2. */
977 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
978 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
979 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
980 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
981 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
983 /* Double-precision comparisons. Table 3. */
984 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
985 set_optab_libfunc (ne_optab, DFmode, NULL);
986 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
987 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
988 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
989 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
990 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
992 /* Single-precision floating-point arithmetic. Table 4. */
993 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
994 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
995 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
996 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
997 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
999 /* Single-precision comparisons. Table 5. */
1000 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1001 set_optab_libfunc (ne_optab, SFmode, NULL);
1002 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1003 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1004 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1005 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1006 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1008 /* Floating-point to integer conversions. Table 6. */
1009 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1010 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1011 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1012 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1013 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1014 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1015 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1016 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1018 /* Conversions between floating types. Table 7. */
1019 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1020 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1022 /* Integer to floating-point conversions. Table 8. */
1023 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1024 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1025 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1026 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1027 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1028 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1029 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1030 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1032 /* Long long. Table 9. */
1033 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1034 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1035 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1036 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1037 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1038 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1039 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1040 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1042 /* Integer (32/32->32) division. \S 4.3.1. */
1043 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1044 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1046 /* The divmod functions are designed so that they can be used for
1047 plain division, even though they return both the quotient and the
1048 remainder. The quotient is returned in the usual location (i.e.,
1049 r0 for SImode, {r0, r1} for DImode), just as would be expected
1050 for an ordinary division routine. Because the AAPCS calling
1051 conventions specify that all of { r0, r1, r2, r3 } are
1052 callee-saved registers, there is no need to tell the compiler
1053 explicitly that those registers are clobbered by these
1054 routines. */
1055 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1056 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1058 /* For SImode division the ABI provides div-without-mod routines,
1059 which are faster. */
1060 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1061 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1063 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1064 divmod libcalls instead. */
1065 set_optab_libfunc (smod_optab, DImode, NULL);
1066 set_optab_libfunc (umod_optab, DImode, NULL);
1067 set_optab_libfunc (smod_optab, SImode, NULL);
1068 set_optab_libfunc (umod_optab, SImode, NULL);
1070 /* Half-precision float operations. The compiler handles all operations
1071 with NULL libfuncs by converting the SFmode. */
1072 switch (arm_fp16_format)
1074 case ARM_FP16_FORMAT_IEEE:
1075 case ARM_FP16_FORMAT_ALTERNATIVE:
1077 /* Conversions. */
1078 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1079 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1080 ? "__gnu_f2h_ieee"
1081 : "__gnu_f2h_alternative"));
1082 set_conv_libfunc (sext_optab, SFmode, HFmode,
1083 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1084 ? "__gnu_h2f_ieee"
1085 : "__gnu_h2f_alternative"));
1087 /* Arithmetic. */
1088 set_optab_libfunc (add_optab, HFmode, NULL);
1089 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1090 set_optab_libfunc (smul_optab, HFmode, NULL);
1091 set_optab_libfunc (neg_optab, HFmode, NULL);
1092 set_optab_libfunc (sub_optab, HFmode, NULL);
1094 /* Comparisons. */
1095 set_optab_libfunc (eq_optab, HFmode, NULL);
1096 set_optab_libfunc (ne_optab, HFmode, NULL);
1097 set_optab_libfunc (lt_optab, HFmode, NULL);
1098 set_optab_libfunc (le_optab, HFmode, NULL);
1099 set_optab_libfunc (ge_optab, HFmode, NULL);
1100 set_optab_libfunc (gt_optab, HFmode, NULL);
1101 set_optab_libfunc (unord_optab, HFmode, NULL);
1102 break;
1104 default:
1105 break;
1108 if (TARGET_AAPCS_BASED)
1109 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1112 /* On AAPCS systems, this is the "struct __va_list". */
1113 static GTY(()) tree va_list_type;
1115 /* Return the type to use as __builtin_va_list. */
1116 static tree
1117 arm_build_builtin_va_list (void)
1119 tree va_list_name;
1120 tree ap_field;
1122 if (!TARGET_AAPCS_BASED)
1123 return std_build_builtin_va_list ();
1125 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1126 defined as:
1128 struct __va_list
1130 void *__ap;
1133 The C Library ABI further reinforces this definition in \S
1134 4.1.
1136 We must follow this definition exactly. The structure tag
1137 name is visible in C++ mangled names, and thus forms a part
1138 of the ABI. The field name may be used by people who
1139 #include <stdarg.h>. */
1140 /* Create the type. */
1141 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1142 /* Give it the required name. */
1143 va_list_name = build_decl (BUILTINS_LOCATION,
1144 TYPE_DECL,
1145 get_identifier ("__va_list"),
1146 va_list_type);
1147 DECL_ARTIFICIAL (va_list_name) = 1;
1148 TYPE_NAME (va_list_type) = va_list_name;
1149 /* Create the __ap field. */
1150 ap_field = build_decl (BUILTINS_LOCATION,
1151 FIELD_DECL,
1152 get_identifier ("__ap"),
1153 ptr_type_node);
1154 DECL_ARTIFICIAL (ap_field) = 1;
1155 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1156 TYPE_FIELDS (va_list_type) = ap_field;
1157 /* Compute its layout. */
1158 layout_type (va_list_type);
1160 return va_list_type;
1163 /* Return an expression of type "void *" pointing to the next
1164 available argument in a variable-argument list. VALIST is the
1165 user-level va_list object, of type __builtin_va_list. */
1166 static tree
1167 arm_extract_valist_ptr (tree valist)
1169 if (TREE_TYPE (valist) == error_mark_node)
1170 return error_mark_node;
1172 /* On an AAPCS target, the pointer is stored within "struct
1173 va_list". */
1174 if (TARGET_AAPCS_BASED)
1176 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1177 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1178 valist, ap_field, NULL_TREE);
1181 return valist;
1184 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1185 static void
1186 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1188 valist = arm_extract_valist_ptr (valist);
1189 std_expand_builtin_va_start (valist, nextarg);
1192 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1193 static tree
1194 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1195 gimple_seq *post_p)
1197 valist = arm_extract_valist_ptr (valist);
1198 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1201 /* Implement TARGET_HANDLE_OPTION. */
1203 static bool
1204 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1206 switch (code)
1208 case OPT_march_:
1209 arm_select[1].string = arg;
1210 return true;
1212 case OPT_mcpu_:
1213 arm_select[0].string = arg;
1214 return true;
1216 case OPT_mhard_float:
1217 target_float_abi_name = "hard";
1218 return true;
1220 case OPT_msoft_float:
1221 target_float_abi_name = "soft";
1222 return true;
1224 case OPT_mtune_:
1225 arm_select[2].string = arg;
1226 return true;
1228 default:
1229 return true;
1233 static void
1234 arm_target_help (void)
1236 int i;
1237 static int columns = 0;
1238 int remaining;
1240 /* If we have not done so already, obtain the desired maximum width of
1241 the output. Note - this is a duplication of the code at the start of
1242 gcc/opts.c:print_specific_help() - the two copies should probably be
1243 replaced by a single function. */
1244 if (columns == 0)
1246 const char *p;
1248 GET_ENVIRONMENT (p, "COLUMNS");
1249 if (p != NULL)
1251 int value = atoi (p);
1253 if (value > 0)
1254 columns = value;
1257 if (columns == 0)
1258 /* Use a reasonable default. */
1259 columns = 80;
1262 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1264 /* The - 2 is because we know that the last entry in the array is NULL. */
1265 i = ARRAY_SIZE (all_cores) - 2;
1266 gcc_assert (i > 0);
1267 printf (" %s", all_cores[i].name);
1268 remaining = columns - (strlen (all_cores[i].name) + 4);
1269 gcc_assert (remaining >= 0);
1271 while (i--)
1273 int len = strlen (all_cores[i].name);
1275 if (remaining > len + 2)
1277 printf (", %s", all_cores[i].name);
1278 remaining -= len + 2;
1280 else
1282 if (remaining > 0)
1283 printf (",");
1284 printf ("\n %s", all_cores[i].name);
1285 remaining = columns - (len + 4);
1289 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1291 i = ARRAY_SIZE (all_architectures) - 2;
1292 gcc_assert (i > 0);
1294 printf (" %s", all_architectures[i].name);
1295 remaining = columns - (strlen (all_architectures[i].name) + 4);
1296 gcc_assert (remaining >= 0);
1298 while (i--)
1300 int len = strlen (all_architectures[i].name);
1302 if (remaining > len + 2)
1304 printf (", %s", all_architectures[i].name);
1305 remaining -= len + 2;
1307 else
1309 if (remaining > 0)
1310 printf (",");
1311 printf ("\n %s", all_architectures[i].name);
1312 remaining = columns - (len + 4);
1315 printf ("\n");
1319 /* Fix up any incompatible options that the user has specified.
1320 This has now turned into a maze. */
1321 void
1322 arm_override_options (void)
1324 unsigned i;
1325 enum processor_type target_arch_cpu = arm_none;
1326 enum processor_type selected_cpu = arm_none;
1328 /* Set up the flags based on the cpu/architecture selected by the user. */
1329 for (i = ARRAY_SIZE (arm_select); i--;)
1331 struct arm_cpu_select * ptr = arm_select + i;
1333 if (ptr->string != NULL && ptr->string[0] != '\0')
1335 const struct processors * sel;
1337 for (sel = ptr->processors; sel->name != NULL; sel++)
1338 if (streq (ptr->string, sel->name))
1340 /* Set the architecture define. */
1341 if (i != ARM_OPT_SET_TUNE)
1342 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1344 /* Determine the processor core for which we should
1345 tune code-generation. */
1346 if (/* -mcpu= is a sensible default. */
1347 i == ARM_OPT_SET_CPU
1348 /* -mtune= overrides -mcpu= and -march=. */
1349 || i == ARM_OPT_SET_TUNE)
1350 arm_tune = (enum processor_type) (sel - ptr->processors);
1352 /* Remember the CPU associated with this architecture.
1353 If no other option is used to set the CPU type,
1354 we'll use this to guess the most suitable tuning
1355 options. */
1356 if (i == ARM_OPT_SET_ARCH)
1357 target_arch_cpu = sel->core;
1359 if (i == ARM_OPT_SET_CPU)
1360 selected_cpu = (enum processor_type) (sel - ptr->processors);
1362 if (i != ARM_OPT_SET_TUNE)
1364 /* If we have been given an architecture and a processor
1365 make sure that they are compatible. We only generate
1366 a warning though, and we prefer the CPU over the
1367 architecture. */
1368 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1369 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1370 ptr->string);
1372 insn_flags = sel->flags;
1375 break;
1378 if (sel->name == NULL)
1379 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1383 /* Guess the tuning options from the architecture if necessary. */
1384 if (arm_tune == arm_none)
1385 arm_tune = target_arch_cpu;
1387 /* If the user did not specify a processor, choose one for them. */
1388 if (insn_flags == 0)
1390 const struct processors * sel;
1391 unsigned int sought;
1393 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1394 if (selected_cpu == arm_none)
1396 #ifdef SUBTARGET_CPU_DEFAULT
1397 /* Use the subtarget default CPU if none was specified by
1398 configure. */
1399 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1400 #endif
1401 /* Default to ARM6. */
1402 if (selected_cpu == arm_none)
1403 selected_cpu = arm6;
1405 sel = &all_cores[selected_cpu];
1407 insn_flags = sel->flags;
1409 /* Now check to see if the user has specified some command line
1410 switch that require certain abilities from the cpu. */
1411 sought = 0;
1413 if (TARGET_INTERWORK || TARGET_THUMB)
1415 sought |= (FL_THUMB | FL_MODE32);
1417 /* There are no ARM processors that support both APCS-26 and
1418 interworking. Therefore we force FL_MODE26 to be removed
1419 from insn_flags here (if it was set), so that the search
1420 below will always be able to find a compatible processor. */
1421 insn_flags &= ~FL_MODE26;
1424 if (sought != 0 && ((sought & insn_flags) != sought))
1426 /* Try to locate a CPU type that supports all of the abilities
1427 of the default CPU, plus the extra abilities requested by
1428 the user. */
1429 for (sel = all_cores; sel->name != NULL; sel++)
1430 if ((sel->flags & sought) == (sought | insn_flags))
1431 break;
1433 if (sel->name == NULL)
1435 unsigned current_bit_count = 0;
1436 const struct processors * best_fit = NULL;
1438 /* Ideally we would like to issue an error message here
1439 saying that it was not possible to find a CPU compatible
1440 with the default CPU, but which also supports the command
1441 line options specified by the programmer, and so they
1442 ought to use the -mcpu=<name> command line option to
1443 override the default CPU type.
1445 If we cannot find a cpu that has both the
1446 characteristics of the default cpu and the given
1447 command line options we scan the array again looking
1448 for a best match. */
1449 for (sel = all_cores; sel->name != NULL; sel++)
1450 if ((sel->flags & sought) == sought)
1452 unsigned count;
1454 count = bit_count (sel->flags & insn_flags);
1456 if (count >= current_bit_count)
1458 best_fit = sel;
1459 current_bit_count = count;
1463 gcc_assert (best_fit);
1464 sel = best_fit;
1467 insn_flags = sel->flags;
1469 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1470 arm_default_cpu = (enum processor_type) (sel - all_cores);
1471 if (arm_tune == arm_none)
1472 arm_tune = arm_default_cpu;
1475 /* The processor for which we should tune should now have been
1476 chosen. */
1477 gcc_assert (arm_tune != arm_none);
1479 tune_flags = all_cores[(int)arm_tune].flags;
1480 current_tune = all_cores[(int)arm_tune].tune;
1482 if (target_fp16_format_name)
1484 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1486 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1488 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1489 break;
1492 if (i == ARRAY_SIZE (all_fp16_formats))
1493 error ("invalid __fp16 format option: -mfp16-format=%s",
1494 target_fp16_format_name);
1496 else
1497 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1499 if (target_abi_name)
1501 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1503 if (streq (arm_all_abis[i].name, target_abi_name))
1505 arm_abi = arm_all_abis[i].abi_type;
1506 break;
1509 if (i == ARRAY_SIZE (arm_all_abis))
1510 error ("invalid ABI option: -mabi=%s", target_abi_name);
1512 else
1513 arm_abi = ARM_DEFAULT_ABI;
1515 /* Make sure that the processor choice does not conflict with any of the
1516 other command line choices. */
1517 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1518 error ("target CPU does not support ARM mode");
1520 /* BPABI targets use linker tricks to allow interworking on cores
1521 without thumb support. */
1522 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1524 warning (0, "target CPU does not support interworking" );
1525 target_flags &= ~MASK_INTERWORK;
1528 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1530 warning (0, "target CPU does not support THUMB instructions");
1531 target_flags &= ~MASK_THUMB;
1534 if (TARGET_APCS_FRAME && TARGET_THUMB)
1536 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1537 target_flags &= ~MASK_APCS_FRAME;
1540 /* Callee super interworking implies thumb interworking. Adding
1541 this to the flags here simplifies the logic elsewhere. */
1542 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1543 target_flags |= MASK_INTERWORK;
1545 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1546 from here where no function is being compiled currently. */
1547 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1548 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1550 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1551 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1553 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1554 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1556 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1558 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1559 target_flags |= MASK_APCS_FRAME;
1562 if (TARGET_POKE_FUNCTION_NAME)
1563 target_flags |= MASK_APCS_FRAME;
1565 if (TARGET_APCS_REENT && flag_pic)
1566 error ("-fpic and -mapcs-reent are incompatible");
1568 if (TARGET_APCS_REENT)
1569 warning (0, "APCS reentrant code not supported. Ignored");
1571 /* If this target is normally configured to use APCS frames, warn if they
1572 are turned off and debugging is turned on. */
1573 if (TARGET_ARM
1574 && write_symbols != NO_DEBUG
1575 && !TARGET_APCS_FRAME
1576 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1577 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1579 if (TARGET_APCS_FLOAT)
1580 warning (0, "passing floating point arguments in fp regs not yet supported");
1582 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1583 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1584 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1585 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1586 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1587 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1588 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1589 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1590 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1591 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1592 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1593 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1594 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1596 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1597 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1598 thumb_code = (TARGET_ARM == 0);
1599 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1600 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1601 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1602 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1603 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1605 /* If we are not using the default (ARM mode) section anchor offset
1606 ranges, then set the correct ranges now. */
1607 if (TARGET_THUMB1)
1609 /* Thumb-1 LDR instructions cannot have negative offsets.
1610 Permissible positive offset ranges are 5-bit (for byte loads),
1611 6-bit (for halfword loads), or 7-bit (for word loads).
1612 Empirical results suggest a 7-bit anchor range gives the best
1613 overall code size. */
1614 targetm.min_anchor_offset = 0;
1615 targetm.max_anchor_offset = 127;
1617 else if (TARGET_THUMB2)
1619 /* The minimum is set such that the total size of the block
1620 for a particular anchor is 248 + 1 + 4095 bytes, which is
1621 divisible by eight, ensuring natural spacing of anchors. */
1622 targetm.min_anchor_offset = -248;
1623 targetm.max_anchor_offset = 4095;
1626 /* V5 code we generate is completely interworking capable, so we turn off
1627 TARGET_INTERWORK here to avoid many tests later on. */
1629 /* XXX However, we must pass the right pre-processor defines to CPP
1630 or GLD can get confused. This is a hack. */
1631 if (TARGET_INTERWORK)
1632 arm_cpp_interwork = 1;
1634 if (arm_arch5)
1635 target_flags &= ~MASK_INTERWORK;
1637 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1638 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1640 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1641 error ("iwmmxt abi requires an iwmmxt capable cpu");
1643 if (target_fpu_name == NULL && target_fpe_name != NULL)
1645 if (streq (target_fpe_name, "2"))
1646 target_fpu_name = "fpe2";
1647 else if (streq (target_fpe_name, "3"))
1648 target_fpu_name = "fpe3";
1649 else
1650 error ("invalid floating point emulation option: -mfpe=%s",
1651 target_fpe_name);
1654 if (target_fpu_name == NULL)
1656 #ifdef FPUTYPE_DEFAULT
1657 target_fpu_name = FPUTYPE_DEFAULT;
1658 #else
1659 if (arm_arch_cirrus)
1660 target_fpu_name = "maverick";
1661 else
1662 target_fpu_name = "fpe2";
1663 #endif
1666 arm_fpu_desc = NULL;
1667 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1669 if (streq (all_fpus[i].name, target_fpu_name))
1671 arm_fpu_desc = &all_fpus[i];
1672 break;
1676 if (!arm_fpu_desc)
1678 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1679 return;
1682 switch (arm_fpu_desc->model)
1684 case ARM_FP_MODEL_FPA:
1685 if (arm_fpu_desc->rev == 2)
1686 arm_fpu_attr = FPU_FPE2;
1687 else if (arm_fpu_desc->rev == 3)
1688 arm_fpu_attr = FPU_FPE3;
1689 else
1690 arm_fpu_attr = FPU_FPA;
1691 break;
1693 case ARM_FP_MODEL_MAVERICK:
1694 arm_fpu_attr = FPU_MAVERICK;
1695 break;
1697 case ARM_FP_MODEL_VFP:
1698 arm_fpu_attr = FPU_VFP;
1699 break;
1701 default:
1702 gcc_unreachable();
1705 if (target_float_abi_name != NULL)
1707 /* The user specified a FP ABI. */
1708 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1710 if (streq (all_float_abis[i].name, target_float_abi_name))
1712 arm_float_abi = all_float_abis[i].abi_type;
1713 break;
1716 if (i == ARRAY_SIZE (all_float_abis))
1717 error ("invalid floating point abi: -mfloat-abi=%s",
1718 target_float_abi_name);
1720 else
1721 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1723 if (TARGET_AAPCS_BASED
1724 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1725 error ("FPA is unsupported in the AAPCS");
1727 if (TARGET_AAPCS_BASED)
1729 if (TARGET_CALLER_INTERWORKING)
1730 error ("AAPCS does not support -mcaller-super-interworking");
1731 else
1732 if (TARGET_CALLEE_INTERWORKING)
1733 error ("AAPCS does not support -mcallee-super-interworking");
1736 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1737 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1738 will ever exist. GCC makes no attempt to support this combination. */
1739 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1740 sorry ("iWMMXt and hardware floating point");
1742 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1743 if (TARGET_THUMB2 && TARGET_IWMMXT)
1744 sorry ("Thumb-2 iWMMXt");
1746 /* __fp16 support currently assumes the core has ldrh. */
1747 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1748 sorry ("__fp16 and no ldrh");
1750 /* If soft-float is specified then don't use FPU. */
1751 if (TARGET_SOFT_FLOAT)
1752 arm_fpu_attr = FPU_NONE;
1754 if (TARGET_AAPCS_BASED)
1756 if (arm_abi == ARM_ABI_IWMMXT)
1757 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1758 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1759 && TARGET_HARD_FLOAT
1760 && TARGET_VFP)
1761 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1762 else
1763 arm_pcs_default = ARM_PCS_AAPCS;
1765 else
1767 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1768 sorry ("-mfloat-abi=hard and VFP");
1770 if (arm_abi == ARM_ABI_APCS)
1771 arm_pcs_default = ARM_PCS_APCS;
1772 else
1773 arm_pcs_default = ARM_PCS_ATPCS;
1776 /* For arm2/3 there is no need to do any scheduling if there is only
1777 a floating point emulator, or we are doing software floating-point. */
1778 if ((TARGET_SOFT_FLOAT
1779 || (TARGET_FPA && arm_fpu_desc->rev))
1780 && (tune_flags & FL_MODE32) == 0)
1781 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1783 if (target_thread_switch)
1785 if (strcmp (target_thread_switch, "soft") == 0)
1786 target_thread_pointer = TP_SOFT;
1787 else if (strcmp (target_thread_switch, "auto") == 0)
1788 target_thread_pointer = TP_AUTO;
1789 else if (strcmp (target_thread_switch, "cp15") == 0)
1790 target_thread_pointer = TP_CP15;
1791 else
1792 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1795 /* Use the cp15 method if it is available. */
1796 if (target_thread_pointer == TP_AUTO)
1798 if (arm_arch6k && !TARGET_THUMB1)
1799 target_thread_pointer = TP_CP15;
1800 else
1801 target_thread_pointer = TP_SOFT;
1804 if (TARGET_HARD_TP && TARGET_THUMB1)
1805 error ("can not use -mtp=cp15 with 16-bit Thumb");
1807 /* Override the default structure alignment for AAPCS ABI. */
1808 if (TARGET_AAPCS_BASED)
1809 arm_structure_size_boundary = 8;
1811 if (structure_size_string != NULL)
1813 int size = strtol (structure_size_string, NULL, 0);
1815 if (size == 8 || size == 32
1816 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1817 arm_structure_size_boundary = size;
1818 else
1819 warning (0, "structure size boundary can only be set to %s",
1820 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1823 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1825 error ("RTP PIC is incompatible with Thumb");
1826 flag_pic = 0;
1829 /* If stack checking is disabled, we can use r10 as the PIC register,
1830 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1831 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1833 if (TARGET_VXWORKS_RTP)
1834 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1835 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1838 if (flag_pic && TARGET_VXWORKS_RTP)
1839 arm_pic_register = 9;
1841 if (arm_pic_register_string != NULL)
1843 int pic_register = decode_reg_name (arm_pic_register_string);
1845 if (!flag_pic)
1846 warning (0, "-mpic-register= is useless without -fpic");
1848 /* Prevent the user from choosing an obviously stupid PIC register. */
1849 else if (pic_register < 0 || call_used_regs[pic_register]
1850 || pic_register == HARD_FRAME_POINTER_REGNUM
1851 || pic_register == STACK_POINTER_REGNUM
1852 || pic_register >= PC_REGNUM
1853 || (TARGET_VXWORKS_RTP
1854 && (unsigned int) pic_register != arm_pic_register))
1855 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1856 else
1857 arm_pic_register = pic_register;
1860 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1861 if (fix_cm3_ldrd == 2)
1863 if (selected_cpu == cortexm3)
1864 fix_cm3_ldrd = 1;
1865 else
1866 fix_cm3_ldrd = 0;
1869 if (TARGET_THUMB1 && flag_schedule_insns)
1871 /* Don't warn since it's on by default in -O2. */
1872 flag_schedule_insns = 0;
1875 if (optimize_size)
1877 /* If optimizing for size, bump the number of instructions that we
1878 are prepared to conditionally execute (even on a StrongARM). */
1879 max_insns_skipped = 6;
1881 else
1883 /* StrongARM has early execution of branches, so a sequence
1884 that is worth skipping is shorter. */
1885 if (arm_tune_strongarm)
1886 max_insns_skipped = 3;
1889 /* Hot/Cold partitioning is not currently supported, since we can't
1890 handle literal pool placement in that case. */
1891 if (flag_reorder_blocks_and_partition)
1893 inform (input_location,
1894 "-freorder-blocks-and-partition not supported on this architecture");
1895 flag_reorder_blocks_and_partition = 0;
1896 flag_reorder_blocks = 1;
1899 /* Register global variables with the garbage collector. */
1900 arm_add_gc_roots ();
1903 static void
1904 arm_add_gc_roots (void)
1906 gcc_obstack_init(&minipool_obstack);
1907 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1910 /* A table of known ARM exception types.
1911 For use with the interrupt function attribute. */
1913 typedef struct
1915 const char *const arg;
1916 const unsigned long return_value;
1918 isr_attribute_arg;
1920 static const isr_attribute_arg isr_attribute_args [] =
1922 { "IRQ", ARM_FT_ISR },
1923 { "irq", ARM_FT_ISR },
1924 { "FIQ", ARM_FT_FIQ },
1925 { "fiq", ARM_FT_FIQ },
1926 { "ABORT", ARM_FT_ISR },
1927 { "abort", ARM_FT_ISR },
1928 { "ABORT", ARM_FT_ISR },
1929 { "abort", ARM_FT_ISR },
1930 { "UNDEF", ARM_FT_EXCEPTION },
1931 { "undef", ARM_FT_EXCEPTION },
1932 { "SWI", ARM_FT_EXCEPTION },
1933 { "swi", ARM_FT_EXCEPTION },
1934 { NULL, ARM_FT_NORMAL }
1937 /* Returns the (interrupt) function type of the current
1938 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1940 static unsigned long
1941 arm_isr_value (tree argument)
1943 const isr_attribute_arg * ptr;
1944 const char * arg;
1946 if (!arm_arch_notm)
1947 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1949 /* No argument - default to IRQ. */
1950 if (argument == NULL_TREE)
1951 return ARM_FT_ISR;
1953 /* Get the value of the argument. */
1954 if (TREE_VALUE (argument) == NULL_TREE
1955 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1956 return ARM_FT_UNKNOWN;
1958 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1960 /* Check it against the list of known arguments. */
1961 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1962 if (streq (arg, ptr->arg))
1963 return ptr->return_value;
1965 /* An unrecognized interrupt type. */
1966 return ARM_FT_UNKNOWN;
1969 /* Computes the type of the current function. */
1971 static unsigned long
1972 arm_compute_func_type (void)
1974 unsigned long type = ARM_FT_UNKNOWN;
1975 tree a;
1976 tree attr;
1978 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1980 /* Decide if the current function is volatile. Such functions
1981 never return, and many memory cycles can be saved by not storing
1982 register values that will never be needed again. This optimization
1983 was added to speed up context switching in a kernel application. */
1984 if (optimize > 0
1985 && (TREE_NOTHROW (current_function_decl)
1986 || !(flag_unwind_tables
1987 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1988 && TREE_THIS_VOLATILE (current_function_decl))
1989 type |= ARM_FT_VOLATILE;
1991 if (cfun->static_chain_decl != NULL)
1992 type |= ARM_FT_NESTED;
1994 attr = DECL_ATTRIBUTES (current_function_decl);
1996 a = lookup_attribute ("naked", attr);
1997 if (a != NULL_TREE)
1998 type |= ARM_FT_NAKED;
2000 a = lookup_attribute ("isr", attr);
2001 if (a == NULL_TREE)
2002 a = lookup_attribute ("interrupt", attr);
2004 if (a == NULL_TREE)
2005 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2006 else
2007 type |= arm_isr_value (TREE_VALUE (a));
2009 return type;
2012 /* Returns the type of the current function. */
2014 unsigned long
2015 arm_current_func_type (void)
2017 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2018 cfun->machine->func_type = arm_compute_func_type ();
2020 return cfun->machine->func_type;
2023 bool
2024 arm_allocate_stack_slots_for_args (void)
2026 /* Naked functions should not allocate stack slots for arguments. */
2027 return !IS_NAKED (arm_current_func_type ());
2031 /* Output assembler code for a block containing the constant parts
2032 of a trampoline, leaving space for the variable parts.
2034 On the ARM, (if r8 is the static chain regnum, and remembering that
2035 referencing pc adds an offset of 8) the trampoline looks like:
2036 ldr r8, [pc, #0]
2037 ldr pc, [pc]
2038 .word static chain value
2039 .word function's address
2040 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2042 static void
2043 arm_asm_trampoline_template (FILE *f)
2045 if (TARGET_ARM)
2047 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2048 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2050 else if (TARGET_THUMB2)
2052 /* The Thumb-2 trampoline is similar to the arm implementation.
2053 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2054 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2055 STATIC_CHAIN_REGNUM, PC_REGNUM);
2056 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2058 else
2060 ASM_OUTPUT_ALIGN (f, 2);
2061 fprintf (f, "\t.code\t16\n");
2062 fprintf (f, ".Ltrampoline_start:\n");
2063 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2064 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2065 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2066 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2067 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2068 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2070 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2071 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2074 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2076 static void
2077 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2079 rtx fnaddr, mem, a_tramp;
2081 emit_block_move (m_tramp, assemble_trampoline_template (),
2082 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2084 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2085 emit_move_insn (mem, chain_value);
2087 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2088 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2089 emit_move_insn (mem, fnaddr);
2091 a_tramp = XEXP (m_tramp, 0);
2092 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2093 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2094 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2097 /* Thumb trampolines should be entered in thumb mode, so set
2098 the bottom bit of the address. */
2100 static rtx
2101 arm_trampoline_adjust_address (rtx addr)
2103 if (TARGET_THUMB)
2104 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2105 NULL, 0, OPTAB_LIB_WIDEN);
2106 return addr;
2109 /* Return 1 if it is possible to return using a single instruction.
2110 If SIBLING is non-null, this is a test for a return before a sibling
2111 call. SIBLING is the call insn, so we can examine its register usage. */
2114 use_return_insn (int iscond, rtx sibling)
2116 int regno;
2117 unsigned int func_type;
2118 unsigned long saved_int_regs;
2119 unsigned HOST_WIDE_INT stack_adjust;
2120 arm_stack_offsets *offsets;
2122 /* Never use a return instruction before reload has run. */
2123 if (!reload_completed)
2124 return 0;
2126 func_type = arm_current_func_type ();
2128 /* Naked, volatile and stack alignment functions need special
2129 consideration. */
2130 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2131 return 0;
2133 /* So do interrupt functions that use the frame pointer and Thumb
2134 interrupt functions. */
2135 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2136 return 0;
2138 offsets = arm_get_frame_offsets ();
2139 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2141 /* As do variadic functions. */
2142 if (crtl->args.pretend_args_size
2143 || cfun->machine->uses_anonymous_args
2144 /* Or if the function calls __builtin_eh_return () */
2145 || crtl->calls_eh_return
2146 /* Or if the function calls alloca */
2147 || cfun->calls_alloca
2148 /* Or if there is a stack adjustment. However, if the stack pointer
2149 is saved on the stack, we can use a pre-incrementing stack load. */
2150 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2151 && stack_adjust == 4)))
2152 return 0;
2154 saved_int_regs = offsets->saved_regs_mask;
2156 /* Unfortunately, the insn
2158 ldmib sp, {..., sp, ...}
2160 triggers a bug on most SA-110 based devices, such that the stack
2161 pointer won't be correctly restored if the instruction takes a
2162 page fault. We work around this problem by popping r3 along with
2163 the other registers, since that is never slower than executing
2164 another instruction.
2166 We test for !arm_arch5 here, because code for any architecture
2167 less than this could potentially be run on one of the buggy
2168 chips. */
2169 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2171 /* Validate that r3 is a call-clobbered register (always true in
2172 the default abi) ... */
2173 if (!call_used_regs[3])
2174 return 0;
2176 /* ... that it isn't being used for a return value ... */
2177 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2178 return 0;
2180 /* ... or for a tail-call argument ... */
2181 if (sibling)
2183 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2185 if (find_regno_fusage (sibling, USE, 3))
2186 return 0;
2189 /* ... and that there are no call-saved registers in r0-r2
2190 (always true in the default ABI). */
2191 if (saved_int_regs & 0x7)
2192 return 0;
2195 /* Can't be done if interworking with Thumb, and any registers have been
2196 stacked. */
2197 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2198 return 0;
2200 /* On StrongARM, conditional returns are expensive if they aren't
2201 taken and multiple registers have been stacked. */
2202 if (iscond && arm_tune_strongarm)
2204 /* Conditional return when just the LR is stored is a simple
2205 conditional-load instruction, that's not expensive. */
2206 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2207 return 0;
2209 if (flag_pic
2210 && arm_pic_register != INVALID_REGNUM
2211 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2212 return 0;
2215 /* If there are saved registers but the LR isn't saved, then we need
2216 two instructions for the return. */
2217 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2218 return 0;
2220 /* Can't be done if any of the FPA regs are pushed,
2221 since this also requires an insn. */
2222 if (TARGET_HARD_FLOAT && TARGET_FPA)
2223 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2224 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2225 return 0;
2227 /* Likewise VFP regs. */
2228 if (TARGET_HARD_FLOAT && TARGET_VFP)
2229 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2230 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2231 return 0;
2233 if (TARGET_REALLY_IWMMXT)
2234 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2235 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2236 return 0;
2238 return 1;
2241 /* Return TRUE if int I is a valid immediate ARM constant. */
2244 const_ok_for_arm (HOST_WIDE_INT i)
2246 int lowbit;
2248 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2249 be all zero, or all one. */
2250 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2251 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2252 != ((~(unsigned HOST_WIDE_INT) 0)
2253 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2254 return FALSE;
2256 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2258 /* Fast return for 0 and small values. We must do this for zero, since
2259 the code below can't handle that one case. */
2260 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2261 return TRUE;
2263 /* Get the number of trailing zeros. */
2264 lowbit = ffs((int) i) - 1;
2266 /* Only even shifts are allowed in ARM mode so round down to the
2267 nearest even number. */
2268 if (TARGET_ARM)
2269 lowbit &= ~1;
2271 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2272 return TRUE;
2274 if (TARGET_ARM)
2276 /* Allow rotated constants in ARM mode. */
2277 if (lowbit <= 4
2278 && ((i & ~0xc000003f) == 0
2279 || (i & ~0xf000000f) == 0
2280 || (i & ~0xfc000003) == 0))
2281 return TRUE;
2283 else
2285 HOST_WIDE_INT v;
2287 /* Allow repeated pattern. */
2288 v = i & 0xff;
2289 v |= v << 16;
2290 if (i == v || i == (v | (v << 8)))
2291 return TRUE;
2294 return FALSE;
2297 /* Return true if I is a valid constant for the operation CODE. */
2298 static int
2299 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2301 if (const_ok_for_arm (i))
2302 return 1;
2304 switch (code)
2306 case PLUS:
2307 case COMPARE:
2308 case EQ:
2309 case NE:
2310 case GT:
2311 case LE:
2312 case LT:
2313 case GE:
2314 case GEU:
2315 case LTU:
2316 case GTU:
2317 case LEU:
2318 case UNORDERED:
2319 case ORDERED:
2320 case UNEQ:
2321 case UNGE:
2322 case UNLT:
2323 case UNGT:
2324 case UNLE:
2325 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2327 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2328 case XOR:
2329 return 0;
2331 case IOR:
2332 if (TARGET_THUMB2)
2333 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2334 return 0;
2336 case AND:
2337 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2339 default:
2340 gcc_unreachable ();
2344 /* Emit a sequence of insns to handle a large constant.
2345 CODE is the code of the operation required, it can be any of SET, PLUS,
2346 IOR, AND, XOR, MINUS;
2347 MODE is the mode in which the operation is being performed;
2348 VAL is the integer to operate on;
2349 SOURCE is the other operand (a register, or a null-pointer for SET);
2350 SUBTARGETS means it is safe to create scratch registers if that will
2351 either produce a simpler sequence, or we will want to cse the values.
2352 Return value is the number of insns emitted. */
2354 /* ??? Tweak this for thumb2. */
2356 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2357 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2359 rtx cond;
2361 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2362 cond = COND_EXEC_TEST (PATTERN (insn));
2363 else
2364 cond = NULL_RTX;
2366 if (subtargets || code == SET
2367 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2368 && REGNO (target) != REGNO (source)))
2370 /* After arm_reorg has been called, we can't fix up expensive
2371 constants by pushing them into memory so we must synthesize
2372 them in-line, regardless of the cost. This is only likely to
2373 be more costly on chips that have load delay slots and we are
2374 compiling without running the scheduler (so no splitting
2375 occurred before the final instruction emission).
2377 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2379 if (!after_arm_reorg
2380 && !cond
2381 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2382 1, 0)
2383 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2384 + (code != SET))))
2386 if (code == SET)
2388 /* Currently SET is the only monadic value for CODE, all
2389 the rest are diadic. */
2390 if (TARGET_USE_MOVT)
2391 arm_emit_movpair (target, GEN_INT (val));
2392 else
2393 emit_set_insn (target, GEN_INT (val));
2395 return 1;
2397 else
2399 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2401 if (TARGET_USE_MOVT)
2402 arm_emit_movpair (temp, GEN_INT (val));
2403 else
2404 emit_set_insn (temp, GEN_INT (val));
2406 /* For MINUS, the value is subtracted from, since we never
2407 have subtraction of a constant. */
2408 if (code == MINUS)
2409 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2410 else
2411 emit_set_insn (target,
2412 gen_rtx_fmt_ee (code, mode, source, temp));
2413 return 2;
2418 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2422 /* Return the number of instructions required to synthesize the given
2423 constant, if we start emitting them from bit-position I. */
2424 static int
2425 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2427 HOST_WIDE_INT temp1;
2428 int step_size = TARGET_ARM ? 2 : 1;
2429 int num_insns = 0;
2431 gcc_assert (TARGET_ARM || i == 0);
2435 int end;
2437 if (i <= 0)
2438 i += 32;
2439 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2441 end = i - 8;
2442 if (end < 0)
2443 end += 32;
2444 temp1 = remainder & ((0x0ff << end)
2445 | ((i < end) ? (0xff >> (32 - end)) : 0));
2446 remainder &= ~temp1;
2447 num_insns++;
2448 i -= 8 - step_size;
2450 i -= step_size;
2451 } while (remainder);
2452 return num_insns;
2455 static int
2456 find_best_start (unsigned HOST_WIDE_INT remainder)
2458 int best_consecutive_zeros = 0;
2459 int i;
2460 int best_start = 0;
2462 /* If we aren't targetting ARM, the best place to start is always at
2463 the bottom. */
2464 if (! TARGET_ARM)
2465 return 0;
2467 for (i = 0; i < 32; i += 2)
2469 int consecutive_zeros = 0;
2471 if (!(remainder & (3 << i)))
2473 while ((i < 32) && !(remainder & (3 << i)))
2475 consecutive_zeros += 2;
2476 i += 2;
2478 if (consecutive_zeros > best_consecutive_zeros)
2480 best_consecutive_zeros = consecutive_zeros;
2481 best_start = i - consecutive_zeros;
2483 i -= 2;
2487 /* So long as it won't require any more insns to do so, it's
2488 desirable to emit a small constant (in bits 0...9) in the last
2489 insn. This way there is more chance that it can be combined with
2490 a later addressing insn to form a pre-indexed load or store
2491 operation. Consider:
2493 *((volatile int *)0xe0000100) = 1;
2494 *((volatile int *)0xe0000110) = 2;
2496 We want this to wind up as:
2498 mov rA, #0xe0000000
2499 mov rB, #1
2500 str rB, [rA, #0x100]
2501 mov rB, #2
2502 str rB, [rA, #0x110]
2504 rather than having to synthesize both large constants from scratch.
2506 Therefore, we calculate how many insns would be required to emit
2507 the constant starting from `best_start', and also starting from
2508 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2509 yield a shorter sequence, we may as well use zero. */
2510 if (best_start != 0
2511 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2512 && (count_insns_for_constant (remainder, 0) <=
2513 count_insns_for_constant (remainder, best_start)))
2514 best_start = 0;
2516 return best_start;
2519 /* Emit an instruction with the indicated PATTERN. If COND is
2520 non-NULL, conditionalize the execution of the instruction on COND
2521 being true. */
2523 static void
2524 emit_constant_insn (rtx cond, rtx pattern)
2526 if (cond)
2527 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2528 emit_insn (pattern);
2531 /* As above, but extra parameter GENERATE which, if clear, suppresses
2532 RTL generation. */
2533 /* ??? This needs more work for thumb2. */
2535 static int
2536 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2537 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2538 int generate)
2540 int can_invert = 0;
2541 int can_negate = 0;
2542 int final_invert = 0;
2543 int can_negate_initial = 0;
2544 int i;
2545 int num_bits_set = 0;
2546 int set_sign_bit_copies = 0;
2547 int clear_sign_bit_copies = 0;
2548 int clear_zero_bit_copies = 0;
2549 int set_zero_bit_copies = 0;
2550 int insns = 0;
2551 unsigned HOST_WIDE_INT temp1, temp2;
2552 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2553 int step_size = TARGET_ARM ? 2 : 1;
2555 /* Find out which operations are safe for a given CODE. Also do a quick
2556 check for degenerate cases; these can occur when DImode operations
2557 are split. */
2558 switch (code)
2560 case SET:
2561 can_invert = 1;
2562 can_negate = 1;
2563 break;
2565 case PLUS:
2566 can_negate = 1;
2567 can_negate_initial = 1;
2568 break;
2570 case IOR:
2571 if (remainder == 0xffffffff)
2573 if (generate)
2574 emit_constant_insn (cond,
2575 gen_rtx_SET (VOIDmode, target,
2576 GEN_INT (ARM_SIGN_EXTEND (val))));
2577 return 1;
2580 if (remainder == 0)
2582 if (reload_completed && rtx_equal_p (target, source))
2583 return 0;
2585 if (generate)
2586 emit_constant_insn (cond,
2587 gen_rtx_SET (VOIDmode, target, source));
2588 return 1;
2591 if (TARGET_THUMB2)
2592 can_invert = 1;
2593 break;
2595 case AND:
2596 if (remainder == 0)
2598 if (generate)
2599 emit_constant_insn (cond,
2600 gen_rtx_SET (VOIDmode, target, const0_rtx));
2601 return 1;
2603 if (remainder == 0xffffffff)
2605 if (reload_completed && rtx_equal_p (target, source))
2606 return 0;
2607 if (generate)
2608 emit_constant_insn (cond,
2609 gen_rtx_SET (VOIDmode, target, source));
2610 return 1;
2612 can_invert = 1;
2613 break;
2615 case XOR:
2616 if (remainder == 0)
2618 if (reload_completed && rtx_equal_p (target, source))
2619 return 0;
2620 if (generate)
2621 emit_constant_insn (cond,
2622 gen_rtx_SET (VOIDmode, target, source));
2623 return 1;
2626 if (remainder == 0xffffffff)
2628 if (generate)
2629 emit_constant_insn (cond,
2630 gen_rtx_SET (VOIDmode, target,
2631 gen_rtx_NOT (mode, source)));
2632 return 1;
2634 break;
2636 case MINUS:
2637 /* We treat MINUS as (val - source), since (source - val) is always
2638 passed as (source + (-val)). */
2639 if (remainder == 0)
2641 if (generate)
2642 emit_constant_insn (cond,
2643 gen_rtx_SET (VOIDmode, target,
2644 gen_rtx_NEG (mode, source)));
2645 return 1;
2647 if (const_ok_for_arm (val))
2649 if (generate)
2650 emit_constant_insn (cond,
2651 gen_rtx_SET (VOIDmode, target,
2652 gen_rtx_MINUS (mode, GEN_INT (val),
2653 source)));
2654 return 1;
2656 can_negate = 1;
2658 break;
2660 default:
2661 gcc_unreachable ();
2664 /* If we can do it in one insn get out quickly. */
2665 if (const_ok_for_arm (val)
2666 || (can_negate_initial && const_ok_for_arm (-val))
2667 || (can_invert && const_ok_for_arm (~val)))
2669 if (generate)
2670 emit_constant_insn (cond,
2671 gen_rtx_SET (VOIDmode, target,
2672 (source
2673 ? gen_rtx_fmt_ee (code, mode, source,
2674 GEN_INT (val))
2675 : GEN_INT (val))));
2676 return 1;
2679 /* Calculate a few attributes that may be useful for specific
2680 optimizations. */
2681 /* Count number of leading zeros. */
2682 for (i = 31; i >= 0; i--)
2684 if ((remainder & (1 << i)) == 0)
2685 clear_sign_bit_copies++;
2686 else
2687 break;
2690 /* Count number of leading 1's. */
2691 for (i = 31; i >= 0; i--)
2693 if ((remainder & (1 << i)) != 0)
2694 set_sign_bit_copies++;
2695 else
2696 break;
2699 /* Count number of trailing zero's. */
2700 for (i = 0; i <= 31; i++)
2702 if ((remainder & (1 << i)) == 0)
2703 clear_zero_bit_copies++;
2704 else
2705 break;
2708 /* Count number of trailing 1's. */
2709 for (i = 0; i <= 31; i++)
2711 if ((remainder & (1 << i)) != 0)
2712 set_zero_bit_copies++;
2713 else
2714 break;
2717 switch (code)
2719 case SET:
2720 /* See if we can use movw. */
2721 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2723 if (generate)
2724 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2725 GEN_INT (val)));
2726 return 1;
2729 /* See if we can do this by sign_extending a constant that is known
2730 to be negative. This is a good, way of doing it, since the shift
2731 may well merge into a subsequent insn. */
2732 if (set_sign_bit_copies > 1)
2734 if (const_ok_for_arm
2735 (temp1 = ARM_SIGN_EXTEND (remainder
2736 << (set_sign_bit_copies - 1))))
2738 if (generate)
2740 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2741 emit_constant_insn (cond,
2742 gen_rtx_SET (VOIDmode, new_src,
2743 GEN_INT (temp1)));
2744 emit_constant_insn (cond,
2745 gen_ashrsi3 (target, new_src,
2746 GEN_INT (set_sign_bit_copies - 1)));
2748 return 2;
2750 /* For an inverted constant, we will need to set the low bits,
2751 these will be shifted out of harm's way. */
2752 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2753 if (const_ok_for_arm (~temp1))
2755 if (generate)
2757 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2758 emit_constant_insn (cond,
2759 gen_rtx_SET (VOIDmode, new_src,
2760 GEN_INT (temp1)));
2761 emit_constant_insn (cond,
2762 gen_ashrsi3 (target, new_src,
2763 GEN_INT (set_sign_bit_copies - 1)));
2765 return 2;
2769 /* See if we can calculate the value as the difference between two
2770 valid immediates. */
2771 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2773 int topshift = clear_sign_bit_copies & ~1;
2775 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2776 & (0xff000000 >> topshift));
2778 /* If temp1 is zero, then that means the 9 most significant
2779 bits of remainder were 1 and we've caused it to overflow.
2780 When topshift is 0 we don't need to do anything since we
2781 can borrow from 'bit 32'. */
2782 if (temp1 == 0 && topshift != 0)
2783 temp1 = 0x80000000 >> (topshift - 1);
2785 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2787 if (const_ok_for_arm (temp2))
2789 if (generate)
2791 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2792 emit_constant_insn (cond,
2793 gen_rtx_SET (VOIDmode, new_src,
2794 GEN_INT (temp1)));
2795 emit_constant_insn (cond,
2796 gen_addsi3 (target, new_src,
2797 GEN_INT (-temp2)));
2800 return 2;
2804 /* See if we can generate this by setting the bottom (or the top)
2805 16 bits, and then shifting these into the other half of the
2806 word. We only look for the simplest cases, to do more would cost
2807 too much. Be careful, however, not to generate this when the
2808 alternative would take fewer insns. */
2809 if (val & 0xffff0000)
2811 temp1 = remainder & 0xffff0000;
2812 temp2 = remainder & 0x0000ffff;
2814 /* Overlaps outside this range are best done using other methods. */
2815 for (i = 9; i < 24; i++)
2817 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2818 && !const_ok_for_arm (temp2))
2820 rtx new_src = (subtargets
2821 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2822 : target);
2823 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2824 source, subtargets, generate);
2825 source = new_src;
2826 if (generate)
2827 emit_constant_insn
2828 (cond,
2829 gen_rtx_SET
2830 (VOIDmode, target,
2831 gen_rtx_IOR (mode,
2832 gen_rtx_ASHIFT (mode, source,
2833 GEN_INT (i)),
2834 source)));
2835 return insns + 1;
2839 /* Don't duplicate cases already considered. */
2840 for (i = 17; i < 24; i++)
2842 if (((temp1 | (temp1 >> i)) == remainder)
2843 && !const_ok_for_arm (temp1))
2845 rtx new_src = (subtargets
2846 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2847 : target);
2848 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2849 source, subtargets, generate);
2850 source = new_src;
2851 if (generate)
2852 emit_constant_insn
2853 (cond,
2854 gen_rtx_SET (VOIDmode, target,
2855 gen_rtx_IOR
2856 (mode,
2857 gen_rtx_LSHIFTRT (mode, source,
2858 GEN_INT (i)),
2859 source)));
2860 return insns + 1;
2864 break;
2866 case IOR:
2867 case XOR:
2868 /* If we have IOR or XOR, and the constant can be loaded in a
2869 single instruction, and we can find a temporary to put it in,
2870 then this can be done in two instructions instead of 3-4. */
2871 if (subtargets
2872 /* TARGET can't be NULL if SUBTARGETS is 0 */
2873 || (reload_completed && !reg_mentioned_p (target, source)))
2875 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2877 if (generate)
2879 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2881 emit_constant_insn (cond,
2882 gen_rtx_SET (VOIDmode, sub,
2883 GEN_INT (val)));
2884 emit_constant_insn (cond,
2885 gen_rtx_SET (VOIDmode, target,
2886 gen_rtx_fmt_ee (code, mode,
2887 source, sub)));
2889 return 2;
2893 if (code == XOR)
2894 break;
2896 /* Convert.
2897 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2898 and the remainder 0s for e.g. 0xfff00000)
2899 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2901 This can be done in 2 instructions by using shifts with mov or mvn.
2902 e.g. for
2903 x = x | 0xfff00000;
2904 we generate.
2905 mvn r0, r0, asl #12
2906 mvn r0, r0, lsr #12 */
2907 if (set_sign_bit_copies > 8
2908 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2910 if (generate)
2912 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2913 rtx shift = GEN_INT (set_sign_bit_copies);
2915 emit_constant_insn
2916 (cond,
2917 gen_rtx_SET (VOIDmode, sub,
2918 gen_rtx_NOT (mode,
2919 gen_rtx_ASHIFT (mode,
2920 source,
2921 shift))));
2922 emit_constant_insn
2923 (cond,
2924 gen_rtx_SET (VOIDmode, target,
2925 gen_rtx_NOT (mode,
2926 gen_rtx_LSHIFTRT (mode, sub,
2927 shift))));
2929 return 2;
2932 /* Convert
2933 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2935 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2937 For eg. r0 = r0 | 0xfff
2938 mvn r0, r0, lsr #12
2939 mvn r0, r0, asl #12
2942 if (set_zero_bit_copies > 8
2943 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2945 if (generate)
2947 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2948 rtx shift = GEN_INT (set_zero_bit_copies);
2950 emit_constant_insn
2951 (cond,
2952 gen_rtx_SET (VOIDmode, sub,
2953 gen_rtx_NOT (mode,
2954 gen_rtx_LSHIFTRT (mode,
2955 source,
2956 shift))));
2957 emit_constant_insn
2958 (cond,
2959 gen_rtx_SET (VOIDmode, target,
2960 gen_rtx_NOT (mode,
2961 gen_rtx_ASHIFT (mode, sub,
2962 shift))));
2964 return 2;
2967 /* This will never be reached for Thumb2 because orn is a valid
2968 instruction. This is for Thumb1 and the ARM 32 bit cases.
2970 x = y | constant (such that ~constant is a valid constant)
2971 Transform this to
2972 x = ~(~y & ~constant).
2974 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2976 if (generate)
2978 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2979 emit_constant_insn (cond,
2980 gen_rtx_SET (VOIDmode, sub,
2981 gen_rtx_NOT (mode, source)));
2982 source = sub;
2983 if (subtargets)
2984 sub = gen_reg_rtx (mode);
2985 emit_constant_insn (cond,
2986 gen_rtx_SET (VOIDmode, sub,
2987 gen_rtx_AND (mode, source,
2988 GEN_INT (temp1))));
2989 emit_constant_insn (cond,
2990 gen_rtx_SET (VOIDmode, target,
2991 gen_rtx_NOT (mode, sub)));
2993 return 3;
2995 break;
2997 case AND:
2998 /* See if two shifts will do 2 or more insn's worth of work. */
2999 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3001 HOST_WIDE_INT shift_mask = ((0xffffffff
3002 << (32 - clear_sign_bit_copies))
3003 & 0xffffffff);
3005 if ((remainder | shift_mask) != 0xffffffff)
3007 if (generate)
3009 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3010 insns = arm_gen_constant (AND, mode, cond,
3011 remainder | shift_mask,
3012 new_src, source, subtargets, 1);
3013 source = new_src;
3015 else
3017 rtx targ = subtargets ? NULL_RTX : target;
3018 insns = arm_gen_constant (AND, mode, cond,
3019 remainder | shift_mask,
3020 targ, source, subtargets, 0);
3024 if (generate)
3026 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3027 rtx shift = GEN_INT (clear_sign_bit_copies);
3029 emit_insn (gen_ashlsi3 (new_src, source, shift));
3030 emit_insn (gen_lshrsi3 (target, new_src, shift));
3033 return insns + 2;
3036 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3038 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3040 if ((remainder | shift_mask) != 0xffffffff)
3042 if (generate)
3044 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3046 insns = arm_gen_constant (AND, mode, cond,
3047 remainder | shift_mask,
3048 new_src, source, subtargets, 1);
3049 source = new_src;
3051 else
3053 rtx targ = subtargets ? NULL_RTX : target;
3055 insns = arm_gen_constant (AND, mode, cond,
3056 remainder | shift_mask,
3057 targ, source, subtargets, 0);
3061 if (generate)
3063 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3064 rtx shift = GEN_INT (clear_zero_bit_copies);
3066 emit_insn (gen_lshrsi3 (new_src, source, shift));
3067 emit_insn (gen_ashlsi3 (target, new_src, shift));
3070 return insns + 2;
3073 break;
3075 default:
3076 break;
3079 for (i = 0; i < 32; i++)
3080 if (remainder & (1 << i))
3081 num_bits_set++;
3083 if ((code == AND)
3084 || (code != IOR && can_invert && num_bits_set > 16))
3085 remainder ^= 0xffffffff;
3086 else if (code == PLUS && num_bits_set > 16)
3087 remainder = (-remainder) & 0xffffffff;
3089 /* For XOR, if more than half the bits are set and there's a sequence
3090 of more than 8 consecutive ones in the pattern then we can XOR by the
3091 inverted constant and then invert the final result; this may save an
3092 instruction and might also lead to the final mvn being merged with
3093 some other operation. */
3094 else if (code == XOR && num_bits_set > 16
3095 && (count_insns_for_constant (remainder ^ 0xffffffff,
3096 find_best_start
3097 (remainder ^ 0xffffffff))
3098 < count_insns_for_constant (remainder,
3099 find_best_start (remainder))))
3101 remainder ^= 0xffffffff;
3102 final_invert = 1;
3104 else
3106 can_invert = 0;
3107 can_negate = 0;
3110 /* Now try and find a way of doing the job in either two or three
3111 instructions.
3112 We start by looking for the largest block of zeros that are aligned on
3113 a 2-bit boundary, we then fill up the temps, wrapping around to the
3114 top of the word when we drop off the bottom.
3115 In the worst case this code should produce no more than four insns.
3116 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3117 best place to start. */
3119 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3120 the same. */
3122 /* Now start emitting the insns. */
3123 i = find_best_start (remainder);
3126 int end;
3128 if (i <= 0)
3129 i += 32;
3130 if (remainder & (3 << (i - 2)))
3132 end = i - 8;
3133 if (end < 0)
3134 end += 32;
3135 temp1 = remainder & ((0x0ff << end)
3136 | ((i < end) ? (0xff >> (32 - end)) : 0));
3137 remainder &= ~temp1;
3139 if (generate)
3141 rtx new_src, temp1_rtx;
3143 if (code == SET || code == MINUS)
3145 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3146 if (can_invert && code != MINUS)
3147 temp1 = ~temp1;
3149 else
3151 if ((final_invert || remainder) && subtargets)
3152 new_src = gen_reg_rtx (mode);
3153 else
3154 new_src = target;
3155 if (can_invert)
3156 temp1 = ~temp1;
3157 else if (can_negate)
3158 temp1 = -temp1;
3161 temp1 = trunc_int_for_mode (temp1, mode);
3162 temp1_rtx = GEN_INT (temp1);
3164 if (code == SET)
3166 else if (code == MINUS)
3167 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3168 else
3169 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3171 emit_constant_insn (cond,
3172 gen_rtx_SET (VOIDmode, new_src,
3173 temp1_rtx));
3174 source = new_src;
3177 if (code == SET)
3179 can_invert = 0;
3180 code = PLUS;
3182 else if (code == MINUS)
3183 code = PLUS;
3185 insns++;
3186 i -= 8 - step_size;
3188 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3189 shifts. */
3190 i -= step_size;
3192 while (remainder);
3195 if (final_invert)
3197 if (generate)
3198 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3199 gen_rtx_NOT (mode, source)));
3200 insns++;
3203 return insns;
3206 /* Canonicalize a comparison so that we are more likely to recognize it.
3207 This can be done for a few constant compares, where we can make the
3208 immediate value easier to load. */
3210 enum rtx_code
3211 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3212 rtx * op1)
3214 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3215 unsigned HOST_WIDE_INT maxval;
3216 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3218 switch (code)
3220 case EQ:
3221 case NE:
3222 return code;
3224 case GT:
3225 case LE:
3226 if (i != maxval
3227 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3229 *op1 = GEN_INT (i + 1);
3230 return code == GT ? GE : LT;
3232 break;
3234 case GE:
3235 case LT:
3236 if (i != ~maxval
3237 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3239 *op1 = GEN_INT (i - 1);
3240 return code == GE ? GT : LE;
3242 break;
3244 case GTU:
3245 case LEU:
3246 if (i != ~((unsigned HOST_WIDE_INT) 0)
3247 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3249 *op1 = GEN_INT (i + 1);
3250 return code == GTU ? GEU : LTU;
3252 break;
3254 case GEU:
3255 case LTU:
3256 if (i != 0
3257 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3259 *op1 = GEN_INT (i - 1);
3260 return code == GEU ? GTU : LEU;
3262 break;
3264 default:
3265 gcc_unreachable ();
3268 return code;
3272 /* Define how to find the value returned by a function. */
3274 static rtx
3275 arm_function_value(const_tree type, const_tree func,
3276 bool outgoing ATTRIBUTE_UNUSED)
3278 enum machine_mode mode;
3279 int unsignedp ATTRIBUTE_UNUSED;
3280 rtx r ATTRIBUTE_UNUSED;
3282 mode = TYPE_MODE (type);
3284 if (TARGET_AAPCS_BASED)
3285 return aapcs_allocate_return_reg (mode, type, func);
3287 /* Promote integer types. */
3288 if (INTEGRAL_TYPE_P (type))
3289 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3291 /* Promotes small structs returned in a register to full-word size
3292 for big-endian AAPCS. */
3293 if (arm_return_in_msb (type))
3295 HOST_WIDE_INT size = int_size_in_bytes (type);
3296 if (size % UNITS_PER_WORD != 0)
3298 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3299 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3303 return LIBCALL_VALUE (mode);
3306 static int
3307 libcall_eq (const void *p1, const void *p2)
3309 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3312 static hashval_t
3313 libcall_hash (const void *p1)
3315 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3318 static void
3319 add_libcall (htab_t htab, rtx libcall)
3321 *htab_find_slot (htab, libcall, INSERT) = libcall;
3324 static bool
3325 arm_libcall_uses_aapcs_base (const_rtx libcall)
3327 static bool init_done = false;
3328 static htab_t libcall_htab;
3330 if (!init_done)
3332 init_done = true;
3334 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3335 NULL);
3336 add_libcall (libcall_htab,
3337 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3338 add_libcall (libcall_htab,
3339 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3340 add_libcall (libcall_htab,
3341 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3342 add_libcall (libcall_htab,
3343 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3345 add_libcall (libcall_htab,
3346 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3347 add_libcall (libcall_htab,
3348 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3349 add_libcall (libcall_htab,
3350 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3351 add_libcall (libcall_htab,
3352 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3354 add_libcall (libcall_htab,
3355 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3356 add_libcall (libcall_htab,
3357 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3358 add_libcall (libcall_htab,
3359 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3360 add_libcall (libcall_htab,
3361 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3362 add_libcall (libcall_htab,
3363 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3364 add_libcall (libcall_htab,
3365 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3368 return libcall && htab_find (libcall_htab, libcall) != NULL;
3372 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3374 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3375 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3377 /* The following libcalls return their result in integer registers,
3378 even though they return a floating point value. */
3379 if (arm_libcall_uses_aapcs_base (libcall))
3380 return gen_rtx_REG (mode, ARG_REGISTER(1));
3384 return LIBCALL_VALUE (mode);
3387 /* Determine the amount of memory needed to store the possible return
3388 registers of an untyped call. */
3390 arm_apply_result_size (void)
3392 int size = 16;
3394 if (TARGET_32BIT)
3396 if (TARGET_HARD_FLOAT_ABI)
3398 if (TARGET_VFP)
3399 size += 32;
3400 if (TARGET_FPA)
3401 size += 12;
3402 if (TARGET_MAVERICK)
3403 size += 8;
3405 if (TARGET_IWMMXT_ABI)
3406 size += 8;
3409 return size;
3412 /* Decide whether TYPE should be returned in memory (true)
3413 or in a register (false). FNTYPE is the type of the function making
3414 the call. */
3415 static bool
3416 arm_return_in_memory (const_tree type, const_tree fntype)
3418 HOST_WIDE_INT size;
3420 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3422 if (TARGET_AAPCS_BASED)
3424 /* Simple, non-aggregate types (ie not including vectors and
3425 complex) are always returned in a register (or registers).
3426 We don't care about which register here, so we can short-cut
3427 some of the detail. */
3428 if (!AGGREGATE_TYPE_P (type)
3429 && TREE_CODE (type) != VECTOR_TYPE
3430 && TREE_CODE (type) != COMPLEX_TYPE)
3431 return false;
3433 /* Any return value that is no larger than one word can be
3434 returned in r0. */
3435 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3436 return false;
3438 /* Check any available co-processors to see if they accept the
3439 type as a register candidate (VFP, for example, can return
3440 some aggregates in consecutive registers). These aren't
3441 available if the call is variadic. */
3442 if (aapcs_select_return_coproc (type, fntype) >= 0)
3443 return false;
3445 /* Vector values should be returned using ARM registers, not
3446 memory (unless they're over 16 bytes, which will break since
3447 we only have four call-clobbered registers to play with). */
3448 if (TREE_CODE (type) == VECTOR_TYPE)
3449 return (size < 0 || size > (4 * UNITS_PER_WORD));
3451 /* The rest go in memory. */
3452 return true;
3455 if (TREE_CODE (type) == VECTOR_TYPE)
3456 return (size < 0 || size > (4 * UNITS_PER_WORD));
3458 if (!AGGREGATE_TYPE_P (type) &&
3459 (TREE_CODE (type) != VECTOR_TYPE))
3460 /* All simple types are returned in registers. */
3461 return false;
3463 if (arm_abi != ARM_ABI_APCS)
3465 /* ATPCS and later return aggregate types in memory only if they are
3466 larger than a word (or are variable size). */
3467 return (size < 0 || size > UNITS_PER_WORD);
3470 /* For the arm-wince targets we choose to be compatible with Microsoft's
3471 ARM and Thumb compilers, which always return aggregates in memory. */
3472 #ifndef ARM_WINCE
3473 /* All structures/unions bigger than one word are returned in memory.
3474 Also catch the case where int_size_in_bytes returns -1. In this case
3475 the aggregate is either huge or of variable size, and in either case
3476 we will want to return it via memory and not in a register. */
3477 if (size < 0 || size > UNITS_PER_WORD)
3478 return true;
3480 if (TREE_CODE (type) == RECORD_TYPE)
3482 tree field;
3484 /* For a struct the APCS says that we only return in a register
3485 if the type is 'integer like' and every addressable element
3486 has an offset of zero. For practical purposes this means
3487 that the structure can have at most one non bit-field element
3488 and that this element must be the first one in the structure. */
3490 /* Find the first field, ignoring non FIELD_DECL things which will
3491 have been created by C++. */
3492 for (field = TYPE_FIELDS (type);
3493 field && TREE_CODE (field) != FIELD_DECL;
3494 field = TREE_CHAIN (field))
3495 continue;
3497 if (field == NULL)
3498 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3500 /* Check that the first field is valid for returning in a register. */
3502 /* ... Floats are not allowed */
3503 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3504 return true;
3506 /* ... Aggregates that are not themselves valid for returning in
3507 a register are not allowed. */
3508 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3509 return true;
3511 /* Now check the remaining fields, if any. Only bitfields are allowed,
3512 since they are not addressable. */
3513 for (field = TREE_CHAIN (field);
3514 field;
3515 field = TREE_CHAIN (field))
3517 if (TREE_CODE (field) != FIELD_DECL)
3518 continue;
3520 if (!DECL_BIT_FIELD_TYPE (field))
3521 return true;
3524 return false;
3527 if (TREE_CODE (type) == UNION_TYPE)
3529 tree field;
3531 /* Unions can be returned in registers if every element is
3532 integral, or can be returned in an integer register. */
3533 for (field = TYPE_FIELDS (type);
3534 field;
3535 field = TREE_CHAIN (field))
3537 if (TREE_CODE (field) != FIELD_DECL)
3538 continue;
3540 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3541 return true;
3543 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3544 return true;
3547 return false;
3549 #endif /* not ARM_WINCE */
3551 /* Return all other types in memory. */
3552 return true;
3555 /* Indicate whether or not words of a double are in big-endian order. */
3558 arm_float_words_big_endian (void)
3560 if (TARGET_MAVERICK)
3561 return 0;
3563 /* For FPA, float words are always big-endian. For VFP, floats words
3564 follow the memory system mode. */
3566 if (TARGET_FPA)
3568 return 1;
3571 if (TARGET_VFP)
3572 return (TARGET_BIG_END ? 1 : 0);
3574 return 1;
3577 const struct pcs_attribute_arg
3579 const char *arg;
3580 enum arm_pcs value;
3581 } pcs_attribute_args[] =
3583 {"aapcs", ARM_PCS_AAPCS},
3584 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3585 #if 0
3586 /* We could recognize these, but changes would be needed elsewhere
3587 * to implement them. */
3588 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3589 {"atpcs", ARM_PCS_ATPCS},
3590 {"apcs", ARM_PCS_APCS},
3591 #endif
3592 {NULL, ARM_PCS_UNKNOWN}
3595 static enum arm_pcs
3596 arm_pcs_from_attribute (tree attr)
3598 const struct pcs_attribute_arg *ptr;
3599 const char *arg;
3601 /* Get the value of the argument. */
3602 if (TREE_VALUE (attr) == NULL_TREE
3603 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3604 return ARM_PCS_UNKNOWN;
3606 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3608 /* Check it against the list of known arguments. */
3609 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3610 if (streq (arg, ptr->arg))
3611 return ptr->value;
3613 /* An unrecognized interrupt type. */
3614 return ARM_PCS_UNKNOWN;
3617 /* Get the PCS variant to use for this call. TYPE is the function's type
3618 specification, DECL is the specific declartion. DECL may be null if
3619 the call could be indirect or if this is a library call. */
3620 static enum arm_pcs
3621 arm_get_pcs_model (const_tree type, const_tree decl)
3623 bool user_convention = false;
3624 enum arm_pcs user_pcs = arm_pcs_default;
3625 tree attr;
3627 gcc_assert (type);
3629 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3630 if (attr)
3632 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3633 user_convention = true;
3636 if (TARGET_AAPCS_BASED)
3638 /* Detect varargs functions. These always use the base rules
3639 (no argument is ever a candidate for a co-processor
3640 register). */
3641 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3642 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3643 != void_type_node));
3645 if (user_convention)
3647 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3648 sorry ("Non-AAPCS derived PCS variant");
3649 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3650 error ("Variadic functions must use the base AAPCS variant");
3653 if (base_rules)
3654 return ARM_PCS_AAPCS;
3655 else if (user_convention)
3656 return user_pcs;
3657 else if (decl && flag_unit_at_a_time)
3659 /* Local functions never leak outside this compilation unit,
3660 so we are free to use whatever conventions are
3661 appropriate. */
3662 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3663 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3664 if (i && i->local)
3665 return ARM_PCS_AAPCS_LOCAL;
3668 else if (user_convention && user_pcs != arm_pcs_default)
3669 sorry ("PCS variant");
3671 /* For everything else we use the target's default. */
3672 return arm_pcs_default;
3676 static void
3677 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3678 const_tree fntype ATTRIBUTE_UNUSED,
3679 rtx libcall ATTRIBUTE_UNUSED,
3680 const_tree fndecl ATTRIBUTE_UNUSED)
3682 /* Record the unallocated VFP registers. */
3683 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3684 pcum->aapcs_vfp_reg_alloc = 0;
3687 /* Walk down the type tree of TYPE counting consecutive base elements.
3688 If *MODEP is VOIDmode, then set it to the first valid floating point
3689 type. If a non-floating point type is found, or if a floating point
3690 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3691 otherwise return the count in the sub-tree. */
3692 static int
3693 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3695 enum machine_mode mode;
3696 HOST_WIDE_INT size;
3698 switch (TREE_CODE (type))
3700 case REAL_TYPE:
3701 mode = TYPE_MODE (type);
3702 if (mode != DFmode && mode != SFmode)
3703 return -1;
3705 if (*modep == VOIDmode)
3706 *modep = mode;
3708 if (*modep == mode)
3709 return 1;
3711 break;
3713 case COMPLEX_TYPE:
3714 mode = TYPE_MODE (TREE_TYPE (type));
3715 if (mode != DFmode && mode != SFmode)
3716 return -1;
3718 if (*modep == VOIDmode)
3719 *modep = mode;
3721 if (*modep == mode)
3722 return 2;
3724 break;
3726 case VECTOR_TYPE:
3727 /* Use V2SImode and V4SImode as representatives of all 64-bit
3728 and 128-bit vector types, whether or not those modes are
3729 supported with the present options. */
3730 size = int_size_in_bytes (type);
3731 switch (size)
3733 case 8:
3734 mode = V2SImode;
3735 break;
3736 case 16:
3737 mode = V4SImode;
3738 break;
3739 default:
3740 return -1;
3743 if (*modep == VOIDmode)
3744 *modep = mode;
3746 /* Vector modes are considered to be opaque: two vectors are
3747 equivalent for the purposes of being homogeneous aggregates
3748 if they are the same size. */
3749 if (*modep == mode)
3750 return 1;
3752 break;
3754 case ARRAY_TYPE:
3756 int count;
3757 tree index = TYPE_DOMAIN (type);
3759 /* Can't handle incomplete types. */
3760 if (!COMPLETE_TYPE_P(type))
3761 return -1;
3763 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3764 if (count == -1
3765 || !index
3766 || !TYPE_MAX_VALUE (index)
3767 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3768 || !TYPE_MIN_VALUE (index)
3769 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3770 || count < 0)
3771 return -1;
3773 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3774 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3776 /* There must be no padding. */
3777 if (!host_integerp (TYPE_SIZE (type), 1)
3778 || (tree_low_cst (TYPE_SIZE (type), 1)
3779 != count * GET_MODE_BITSIZE (*modep)))
3780 return -1;
3782 return count;
3785 case RECORD_TYPE:
3787 int count = 0;
3788 int sub_count;
3789 tree field;
3791 /* Can't handle incomplete types. */
3792 if (!COMPLETE_TYPE_P(type))
3793 return -1;
3795 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3797 if (TREE_CODE (field) != FIELD_DECL)
3798 continue;
3800 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3801 if (sub_count < 0)
3802 return -1;
3803 count += sub_count;
3806 /* There must be no padding. */
3807 if (!host_integerp (TYPE_SIZE (type), 1)
3808 || (tree_low_cst (TYPE_SIZE (type), 1)
3809 != count * GET_MODE_BITSIZE (*modep)))
3810 return -1;
3812 return count;
3815 case UNION_TYPE:
3816 case QUAL_UNION_TYPE:
3818 /* These aren't very interesting except in a degenerate case. */
3819 int count = 0;
3820 int sub_count;
3821 tree field;
3823 /* Can't handle incomplete types. */
3824 if (!COMPLETE_TYPE_P(type))
3825 return -1;
3827 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3829 if (TREE_CODE (field) != FIELD_DECL)
3830 continue;
3832 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3833 if (sub_count < 0)
3834 return -1;
3835 count = count > sub_count ? count : sub_count;
3838 /* There must be no padding. */
3839 if (!host_integerp (TYPE_SIZE (type), 1)
3840 || (tree_low_cst (TYPE_SIZE (type), 1)
3841 != count * GET_MODE_BITSIZE (*modep)))
3842 return -1;
3844 return count;
3847 default:
3848 break;
3851 return -1;
3854 /* Return true if PCS_VARIANT should use VFP registers. */
3855 static bool
3856 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3858 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3859 return true;
3861 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3862 return false;
3864 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3865 (TARGET_VFP_DOUBLE || !is_double));
3868 static bool
3869 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3870 enum machine_mode mode, const_tree type,
3871 enum machine_mode *base_mode, int *count)
3873 enum machine_mode new_mode = VOIDmode;
3875 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3876 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3877 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3879 *count = 1;
3880 new_mode = mode;
3882 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3884 *count = 2;
3885 new_mode = (mode == DCmode ? DFmode : SFmode);
3887 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3889 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3891 if (ag_count > 0 && ag_count <= 4)
3892 *count = ag_count;
3893 else
3894 return false;
3896 else
3897 return false;
3900 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3901 return false;
3903 *base_mode = new_mode;
3904 return true;
3907 static bool
3908 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3909 enum machine_mode mode, const_tree type)
3911 int count ATTRIBUTE_UNUSED;
3912 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3914 if (!use_vfp_abi (pcs_variant, false))
3915 return false;
3916 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3917 &ag_mode, &count);
3920 static bool
3921 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3922 const_tree type)
3924 if (!use_vfp_abi (pcum->pcs_variant, false))
3925 return false;
3927 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3928 &pcum->aapcs_vfp_rmode,
3929 &pcum->aapcs_vfp_rcount);
3932 static bool
3933 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3934 const_tree type ATTRIBUTE_UNUSED)
3936 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3937 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3938 int regno;
3940 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3941 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3943 pcum->aapcs_vfp_reg_alloc = mask << regno;
3944 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3946 int i;
3947 int rcount = pcum->aapcs_vfp_rcount;
3948 int rshift = shift;
3949 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3950 rtx par;
3951 if (!TARGET_NEON)
3953 /* Avoid using unsupported vector modes. */
3954 if (rmode == V2SImode)
3955 rmode = DImode;
3956 else if (rmode == V4SImode)
3958 rmode = DImode;
3959 rcount *= 2;
3960 rshift /= 2;
3963 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3964 for (i = 0; i < rcount; i++)
3966 rtx tmp = gen_rtx_REG (rmode,
3967 FIRST_VFP_REGNUM + regno + i * rshift);
3968 tmp = gen_rtx_EXPR_LIST
3969 (VOIDmode, tmp,
3970 GEN_INT (i * GET_MODE_SIZE (rmode)));
3971 XVECEXP (par, 0, i) = tmp;
3974 pcum->aapcs_reg = par;
3976 else
3977 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3978 return true;
3980 return false;
3983 static rtx
3984 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3985 enum machine_mode mode,
3986 const_tree type ATTRIBUTE_UNUSED)
3988 if (!use_vfp_abi (pcs_variant, false))
3989 return false;
3991 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3993 int count;
3994 enum machine_mode ag_mode;
3995 int i;
3996 rtx par;
3997 int shift;
3999 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4000 &ag_mode, &count);
4002 if (!TARGET_NEON)
4004 if (ag_mode == V2SImode)
4005 ag_mode = DImode;
4006 else if (ag_mode == V4SImode)
4008 ag_mode = DImode;
4009 count *= 2;
4012 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4013 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4014 for (i = 0; i < count; i++)
4016 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4017 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4018 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4019 XVECEXP (par, 0, i) = tmp;
4022 return par;
4025 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4028 static void
4029 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4030 enum machine_mode mode ATTRIBUTE_UNUSED,
4031 const_tree type ATTRIBUTE_UNUSED)
4033 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4034 pcum->aapcs_vfp_reg_alloc = 0;
4035 return;
4038 #define AAPCS_CP(X) \
4040 aapcs_ ## X ## _cum_init, \
4041 aapcs_ ## X ## _is_call_candidate, \
4042 aapcs_ ## X ## _allocate, \
4043 aapcs_ ## X ## _is_return_candidate, \
4044 aapcs_ ## X ## _allocate_return_reg, \
4045 aapcs_ ## X ## _advance \
4048 /* Table of co-processors that can be used to pass arguments in
4049 registers. Idealy no arugment should be a candidate for more than
4050 one co-processor table entry, but the table is processed in order
4051 and stops after the first match. If that entry then fails to put
4052 the argument into a co-processor register, the argument will go on
4053 the stack. */
4054 static struct
4056 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4057 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4059 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4060 BLKmode) is a candidate for this co-processor's registers; this
4061 function should ignore any position-dependent state in
4062 CUMULATIVE_ARGS and only use call-type dependent information. */
4063 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4065 /* Return true if the argument does get a co-processor register; it
4066 should set aapcs_reg to an RTX of the register allocated as is
4067 required for a return from FUNCTION_ARG. */
4068 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4070 /* Return true if a result of mode MODE (or type TYPE if MODE is
4071 BLKmode) is can be returned in this co-processor's registers. */
4072 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4074 /* Allocate and return an RTX element to hold the return type of a
4075 call, this routine must not fail and will only be called if
4076 is_return_candidate returned true with the same parameters. */
4077 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4079 /* Finish processing this argument and prepare to start processing
4080 the next one. */
4081 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4082 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4084 AAPCS_CP(vfp)
4087 #undef AAPCS_CP
4089 static int
4090 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4091 tree type)
4093 int i;
4095 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4096 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4097 return i;
4099 return -1;
4102 static int
4103 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4105 /* We aren't passed a decl, so we can't check that a call is local.
4106 However, it isn't clear that that would be a win anyway, since it
4107 might limit some tail-calling opportunities. */
4108 enum arm_pcs pcs_variant;
4110 if (fntype)
4112 const_tree fndecl = NULL_TREE;
4114 if (TREE_CODE (fntype) == FUNCTION_DECL)
4116 fndecl = fntype;
4117 fntype = TREE_TYPE (fntype);
4120 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4122 else
4123 pcs_variant = arm_pcs_default;
4125 if (pcs_variant != ARM_PCS_AAPCS)
4127 int i;
4129 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4130 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4131 TYPE_MODE (type),
4132 type))
4133 return i;
4135 return -1;
4138 static rtx
4139 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4140 const_tree fntype)
4142 /* We aren't passed a decl, so we can't check that a call is local.
4143 However, it isn't clear that that would be a win anyway, since it
4144 might limit some tail-calling opportunities. */
4145 enum arm_pcs pcs_variant;
4146 int unsignedp ATTRIBUTE_UNUSED;
4148 if (fntype)
4150 const_tree fndecl = NULL_TREE;
4152 if (TREE_CODE (fntype) == FUNCTION_DECL)
4154 fndecl = fntype;
4155 fntype = TREE_TYPE (fntype);
4158 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4160 else
4161 pcs_variant = arm_pcs_default;
4163 /* Promote integer types. */
4164 if (type && INTEGRAL_TYPE_P (type))
4165 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4167 if (pcs_variant != ARM_PCS_AAPCS)
4169 int i;
4171 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4172 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4173 type))
4174 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4175 mode, type);
4178 /* Promotes small structs returned in a register to full-word size
4179 for big-endian AAPCS. */
4180 if (type && arm_return_in_msb (type))
4182 HOST_WIDE_INT size = int_size_in_bytes (type);
4183 if (size % UNITS_PER_WORD != 0)
4185 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4186 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4190 return gen_rtx_REG (mode, R0_REGNUM);
4194 aapcs_libcall_value (enum machine_mode mode)
4196 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4199 /* Lay out a function argument using the AAPCS rules. The rule
4200 numbers referred to here are those in the AAPCS. */
4201 static void
4202 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4203 tree type, int named)
4205 int nregs, nregs2;
4206 int ncrn;
4208 /* We only need to do this once per argument. */
4209 if (pcum->aapcs_arg_processed)
4210 return;
4212 pcum->aapcs_arg_processed = true;
4214 /* Special case: if named is false then we are handling an incoming
4215 anonymous argument which is on the stack. */
4216 if (!named)
4217 return;
4219 /* Is this a potential co-processor register candidate? */
4220 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4222 int slot = aapcs_select_call_coproc (pcum, mode, type);
4223 pcum->aapcs_cprc_slot = slot;
4225 /* We don't have to apply any of the rules from part B of the
4226 preparation phase, these are handled elsewhere in the
4227 compiler. */
4229 if (slot >= 0)
4231 /* A Co-processor register candidate goes either in its own
4232 class of registers or on the stack. */
4233 if (!pcum->aapcs_cprc_failed[slot])
4235 /* C1.cp - Try to allocate the argument to co-processor
4236 registers. */
4237 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4238 return;
4240 /* C2.cp - Put the argument on the stack and note that we
4241 can't assign any more candidates in this slot. We also
4242 need to note that we have allocated stack space, so that
4243 we won't later try to split a non-cprc candidate between
4244 core registers and the stack. */
4245 pcum->aapcs_cprc_failed[slot] = true;
4246 pcum->can_split = false;
4249 /* We didn't get a register, so this argument goes on the
4250 stack. */
4251 gcc_assert (pcum->can_split == false);
4252 return;
4256 /* C3 - For double-word aligned arguments, round the NCRN up to the
4257 next even number. */
4258 ncrn = pcum->aapcs_ncrn;
4259 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4260 ncrn++;
4262 nregs = ARM_NUM_REGS2(mode, type);
4264 /* Sigh, this test should really assert that nregs > 0, but a GCC
4265 extension allows empty structs and then gives them empty size; it
4266 then allows such a structure to be passed by value. For some of
4267 the code below we have to pretend that such an argument has
4268 non-zero size so that we 'locate' it correctly either in
4269 registers or on the stack. */
4270 gcc_assert (nregs >= 0);
4272 nregs2 = nregs ? nregs : 1;
4274 /* C4 - Argument fits entirely in core registers. */
4275 if (ncrn + nregs2 <= NUM_ARG_REGS)
4277 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4278 pcum->aapcs_next_ncrn = ncrn + nregs;
4279 return;
4282 /* C5 - Some core registers left and there are no arguments already
4283 on the stack: split this argument between the remaining core
4284 registers and the stack. */
4285 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4287 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4288 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4289 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4290 return;
4293 /* C6 - NCRN is set to 4. */
4294 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4296 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4297 return;
4300 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4301 for a call to a function whose data type is FNTYPE.
4302 For a library call, FNTYPE is NULL. */
4303 void
4304 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4305 rtx libname,
4306 tree fndecl ATTRIBUTE_UNUSED)
4308 /* Long call handling. */
4309 if (fntype)
4310 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4311 else
4312 pcum->pcs_variant = arm_pcs_default;
4314 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4316 if (arm_libcall_uses_aapcs_base (libname))
4317 pcum->pcs_variant = ARM_PCS_AAPCS;
4319 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4320 pcum->aapcs_reg = NULL_RTX;
4321 pcum->aapcs_partial = 0;
4322 pcum->aapcs_arg_processed = false;
4323 pcum->aapcs_cprc_slot = -1;
4324 pcum->can_split = true;
4326 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4328 int i;
4330 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4332 pcum->aapcs_cprc_failed[i] = false;
4333 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4336 return;
4339 /* Legacy ABIs */
4341 /* On the ARM, the offset starts at 0. */
4342 pcum->nregs = 0;
4343 pcum->iwmmxt_nregs = 0;
4344 pcum->can_split = true;
4346 /* Varargs vectors are treated the same as long long.
4347 named_count avoids having to change the way arm handles 'named' */
4348 pcum->named_count = 0;
4349 pcum->nargs = 0;
4351 if (TARGET_REALLY_IWMMXT && fntype)
4353 tree fn_arg;
4355 for (fn_arg = TYPE_ARG_TYPES (fntype);
4356 fn_arg;
4357 fn_arg = TREE_CHAIN (fn_arg))
4358 pcum->named_count += 1;
4360 if (! pcum->named_count)
4361 pcum->named_count = INT_MAX;
4366 /* Return true if mode/type need doubleword alignment. */
4367 bool
4368 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4370 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4371 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4375 /* Determine where to put an argument to a function.
4376 Value is zero to push the argument on the stack,
4377 or a hard register in which to store the argument.
4379 MODE is the argument's machine mode.
4380 TYPE is the data type of the argument (as a tree).
4381 This is null for libcalls where that information may
4382 not be available.
4383 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4384 the preceding args and about the function being called.
4385 NAMED is nonzero if this argument is a named parameter
4386 (otherwise it is an extra parameter matching an ellipsis). */
4389 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4390 tree type, int named)
4392 int nregs;
4394 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4395 a call insn (op3 of a call_value insn). */
4396 if (mode == VOIDmode)
4397 return const0_rtx;
4399 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4401 aapcs_layout_arg (pcum, mode, type, named);
4402 return pcum->aapcs_reg;
4405 /* Varargs vectors are treated the same as long long.
4406 named_count avoids having to change the way arm handles 'named' */
4407 if (TARGET_IWMMXT_ABI
4408 && arm_vector_mode_supported_p (mode)
4409 && pcum->named_count > pcum->nargs + 1)
4411 if (pcum->iwmmxt_nregs <= 9)
4412 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4413 else
4415 pcum->can_split = false;
4416 return NULL_RTX;
4420 /* Put doubleword aligned quantities in even register pairs. */
4421 if (pcum->nregs & 1
4422 && ARM_DOUBLEWORD_ALIGN
4423 && arm_needs_doubleword_align (mode, type))
4424 pcum->nregs++;
4426 if (mode == VOIDmode)
4427 /* Pick an arbitrary value for operand 2 of the call insn. */
4428 return const0_rtx;
4430 /* Only allow splitting an arg between regs and memory if all preceding
4431 args were allocated to regs. For args passed by reference we only count
4432 the reference pointer. */
4433 if (pcum->can_split)
4434 nregs = 1;
4435 else
4436 nregs = ARM_NUM_REGS2 (mode, type);
4438 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4439 return NULL_RTX;
4441 return gen_rtx_REG (mode, pcum->nregs);
4444 static int
4445 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4446 tree type, bool named)
4448 int nregs = pcum->nregs;
4450 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4452 aapcs_layout_arg (pcum, mode, type, named);
4453 return pcum->aapcs_partial;
4456 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4457 return 0;
4459 if (NUM_ARG_REGS > nregs
4460 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4461 && pcum->can_split)
4462 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4464 return 0;
4467 void
4468 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4469 tree type, bool named)
4471 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4473 aapcs_layout_arg (pcum, mode, type, named);
4475 if (pcum->aapcs_cprc_slot >= 0)
4477 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4478 type);
4479 pcum->aapcs_cprc_slot = -1;
4482 /* Generic stuff. */
4483 pcum->aapcs_arg_processed = false;
4484 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4485 pcum->aapcs_reg = NULL_RTX;
4486 pcum->aapcs_partial = 0;
4488 else
4490 pcum->nargs += 1;
4491 if (arm_vector_mode_supported_p (mode)
4492 && pcum->named_count > pcum->nargs
4493 && TARGET_IWMMXT_ABI)
4494 pcum->iwmmxt_nregs += 1;
4495 else
4496 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4500 /* Variable sized types are passed by reference. This is a GCC
4501 extension to the ARM ABI. */
4503 static bool
4504 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4505 enum machine_mode mode ATTRIBUTE_UNUSED,
4506 const_tree type, bool named ATTRIBUTE_UNUSED)
4508 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4511 /* Encode the current state of the #pragma [no_]long_calls. */
4512 typedef enum
4514 OFF, /* No #pragma [no_]long_calls is in effect. */
4515 LONG, /* #pragma long_calls is in effect. */
4516 SHORT /* #pragma no_long_calls is in effect. */
4517 } arm_pragma_enum;
4519 static arm_pragma_enum arm_pragma_long_calls = OFF;
4521 void
4522 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4524 arm_pragma_long_calls = LONG;
4527 void
4528 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4530 arm_pragma_long_calls = SHORT;
4533 void
4534 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4536 arm_pragma_long_calls = OFF;
4539 /* Handle an attribute requiring a FUNCTION_DECL;
4540 arguments as in struct attribute_spec.handler. */
4541 static tree
4542 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4543 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4545 if (TREE_CODE (*node) != FUNCTION_DECL)
4547 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4548 name);
4549 *no_add_attrs = true;
4552 return NULL_TREE;
4555 /* Handle an "interrupt" or "isr" attribute;
4556 arguments as in struct attribute_spec.handler. */
4557 static tree
4558 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4559 bool *no_add_attrs)
4561 if (DECL_P (*node))
4563 if (TREE_CODE (*node) != FUNCTION_DECL)
4565 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4566 name);
4567 *no_add_attrs = true;
4569 /* FIXME: the argument if any is checked for type attributes;
4570 should it be checked for decl ones? */
4572 else
4574 if (TREE_CODE (*node) == FUNCTION_TYPE
4575 || TREE_CODE (*node) == METHOD_TYPE)
4577 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4579 warning (OPT_Wattributes, "%qE attribute ignored",
4580 name);
4581 *no_add_attrs = true;
4584 else if (TREE_CODE (*node) == POINTER_TYPE
4585 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4586 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4587 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4589 *node = build_variant_type_copy (*node);
4590 TREE_TYPE (*node) = build_type_attribute_variant
4591 (TREE_TYPE (*node),
4592 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4593 *no_add_attrs = true;
4595 else
4597 /* Possibly pass this attribute on from the type to a decl. */
4598 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4599 | (int) ATTR_FLAG_FUNCTION_NEXT
4600 | (int) ATTR_FLAG_ARRAY_NEXT))
4602 *no_add_attrs = true;
4603 return tree_cons (name, args, NULL_TREE);
4605 else
4607 warning (OPT_Wattributes, "%qE attribute ignored",
4608 name);
4613 return NULL_TREE;
4616 /* Handle a "pcs" attribute; arguments as in struct
4617 attribute_spec.handler. */
4618 static tree
4619 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4620 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4622 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4624 warning (OPT_Wattributes, "%qE attribute ignored", name);
4625 *no_add_attrs = true;
4627 return NULL_TREE;
4630 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4631 /* Handle the "notshared" attribute. This attribute is another way of
4632 requesting hidden visibility. ARM's compiler supports
4633 "__declspec(notshared)"; we support the same thing via an
4634 attribute. */
4636 static tree
4637 arm_handle_notshared_attribute (tree *node,
4638 tree name ATTRIBUTE_UNUSED,
4639 tree args ATTRIBUTE_UNUSED,
4640 int flags ATTRIBUTE_UNUSED,
4641 bool *no_add_attrs)
4643 tree decl = TYPE_NAME (*node);
4645 if (decl)
4647 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4648 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4649 *no_add_attrs = false;
4651 return NULL_TREE;
4653 #endif
4655 /* Return 0 if the attributes for two types are incompatible, 1 if they
4656 are compatible, and 2 if they are nearly compatible (which causes a
4657 warning to be generated). */
4658 static int
4659 arm_comp_type_attributes (const_tree type1, const_tree type2)
4661 int l1, l2, s1, s2;
4663 /* Check for mismatch of non-default calling convention. */
4664 if (TREE_CODE (type1) != FUNCTION_TYPE)
4665 return 1;
4667 /* Check for mismatched call attributes. */
4668 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4669 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4670 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4671 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4673 /* Only bother to check if an attribute is defined. */
4674 if (l1 | l2 | s1 | s2)
4676 /* If one type has an attribute, the other must have the same attribute. */
4677 if ((l1 != l2) || (s1 != s2))
4678 return 0;
4680 /* Disallow mixed attributes. */
4681 if ((l1 & s2) || (l2 & s1))
4682 return 0;
4685 /* Check for mismatched ISR attribute. */
4686 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4687 if (! l1)
4688 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4689 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4690 if (! l2)
4691 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4692 if (l1 != l2)
4693 return 0;
4695 return 1;
4698 /* Assigns default attributes to newly defined type. This is used to
4699 set short_call/long_call attributes for function types of
4700 functions defined inside corresponding #pragma scopes. */
4701 static void
4702 arm_set_default_type_attributes (tree type)
4704 /* Add __attribute__ ((long_call)) to all functions, when
4705 inside #pragma long_calls or __attribute__ ((short_call)),
4706 when inside #pragma no_long_calls. */
4707 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4709 tree type_attr_list, attr_name;
4710 type_attr_list = TYPE_ATTRIBUTES (type);
4712 if (arm_pragma_long_calls == LONG)
4713 attr_name = get_identifier ("long_call");
4714 else if (arm_pragma_long_calls == SHORT)
4715 attr_name = get_identifier ("short_call");
4716 else
4717 return;
4719 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4720 TYPE_ATTRIBUTES (type) = type_attr_list;
4724 /* Return true if DECL is known to be linked into section SECTION. */
4726 static bool
4727 arm_function_in_section_p (tree decl, section *section)
4729 /* We can only be certain about functions defined in the same
4730 compilation unit. */
4731 if (!TREE_STATIC (decl))
4732 return false;
4734 /* Make sure that SYMBOL always binds to the definition in this
4735 compilation unit. */
4736 if (!targetm.binds_local_p (decl))
4737 return false;
4739 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4740 if (!DECL_SECTION_NAME (decl))
4742 /* Make sure that we will not create a unique section for DECL. */
4743 if (flag_function_sections || DECL_ONE_ONLY (decl))
4744 return false;
4747 return function_section (decl) == section;
4750 /* Return nonzero if a 32-bit "long_call" should be generated for
4751 a call from the current function to DECL. We generate a long_call
4752 if the function:
4754 a. has an __attribute__((long call))
4755 or b. is within the scope of a #pragma long_calls
4756 or c. the -mlong-calls command line switch has been specified
4758 However we do not generate a long call if the function:
4760 d. has an __attribute__ ((short_call))
4761 or e. is inside the scope of a #pragma no_long_calls
4762 or f. is defined in the same section as the current function. */
4764 bool
4765 arm_is_long_call_p (tree decl)
4767 tree attrs;
4769 if (!decl)
4770 return TARGET_LONG_CALLS;
4772 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4773 if (lookup_attribute ("short_call", attrs))
4774 return false;
4776 /* For "f", be conservative, and only cater for cases in which the
4777 whole of the current function is placed in the same section. */
4778 if (!flag_reorder_blocks_and_partition
4779 && TREE_CODE (decl) == FUNCTION_DECL
4780 && arm_function_in_section_p (decl, current_function_section ()))
4781 return false;
4783 if (lookup_attribute ("long_call", attrs))
4784 return true;
4786 return TARGET_LONG_CALLS;
4789 /* Return nonzero if it is ok to make a tail-call to DECL. */
4790 static bool
4791 arm_function_ok_for_sibcall (tree decl, tree exp)
4793 unsigned long func_type;
4795 if (cfun->machine->sibcall_blocked)
4796 return false;
4798 /* Never tailcall something for which we have no decl, or if we
4799 are in Thumb mode. */
4800 if (decl == NULL || TARGET_THUMB)
4801 return false;
4803 /* The PIC register is live on entry to VxWorks PLT entries, so we
4804 must make the call before restoring the PIC register. */
4805 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4806 return false;
4808 /* Cannot tail-call to long calls, since these are out of range of
4809 a branch instruction. */
4810 if (arm_is_long_call_p (decl))
4811 return false;
4813 /* If we are interworking and the function is not declared static
4814 then we can't tail-call it unless we know that it exists in this
4815 compilation unit (since it might be a Thumb routine). */
4816 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4817 return false;
4819 func_type = arm_current_func_type ();
4820 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4821 if (IS_INTERRUPT (func_type))
4822 return false;
4824 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4826 /* Check that the return value locations are the same. For
4827 example that we aren't returning a value from the sibling in
4828 a VFP register but then need to transfer it to a core
4829 register. */
4830 rtx a, b;
4832 a = arm_function_value (TREE_TYPE (exp), decl, false);
4833 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4834 cfun->decl, false);
4835 if (!rtx_equal_p (a, b))
4836 return false;
4839 /* Never tailcall if function may be called with a misaligned SP. */
4840 if (IS_STACKALIGN (func_type))
4841 return false;
4843 /* Everything else is ok. */
4844 return true;
4848 /* Addressing mode support functions. */
4850 /* Return nonzero if X is a legitimate immediate operand when compiling
4851 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4853 legitimate_pic_operand_p (rtx x)
4855 if (GET_CODE (x) == SYMBOL_REF
4856 || (GET_CODE (x) == CONST
4857 && GET_CODE (XEXP (x, 0)) == PLUS
4858 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4859 return 0;
4861 return 1;
4864 /* Record that the current function needs a PIC register. Initialize
4865 cfun->machine->pic_reg if we have not already done so. */
4867 static void
4868 require_pic_register (void)
4870 /* A lot of the logic here is made obscure by the fact that this
4871 routine gets called as part of the rtx cost estimation process.
4872 We don't want those calls to affect any assumptions about the real
4873 function; and further, we can't call entry_of_function() until we
4874 start the real expansion process. */
4875 if (!crtl->uses_pic_offset_table)
4877 gcc_assert (can_create_pseudo_p ());
4878 if (arm_pic_register != INVALID_REGNUM)
4880 if (!cfun->machine->pic_reg)
4881 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4883 /* Play games to avoid marking the function as needing pic
4884 if we are being called as part of the cost-estimation
4885 process. */
4886 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4887 crtl->uses_pic_offset_table = 1;
4889 else
4891 rtx seq;
4893 if (!cfun->machine->pic_reg)
4894 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4896 /* Play games to avoid marking the function as needing pic
4897 if we are being called as part of the cost-estimation
4898 process. */
4899 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4901 crtl->uses_pic_offset_table = 1;
4902 start_sequence ();
4904 arm_load_pic_register (0UL);
4906 seq = get_insns ();
4907 end_sequence ();
4908 /* We can be called during expansion of PHI nodes, where
4909 we can't yet emit instructions directly in the final
4910 insn stream. Queue the insns on the entry edge, they will
4911 be committed after everything else is expanded. */
4912 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4919 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4921 if (GET_CODE (orig) == SYMBOL_REF
4922 || GET_CODE (orig) == LABEL_REF)
4924 rtx pic_ref, address;
4925 rtx insn;
4927 if (reg == 0)
4929 gcc_assert (can_create_pseudo_p ());
4930 reg = gen_reg_rtx (Pmode);
4931 address = gen_reg_rtx (Pmode);
4933 else
4934 address = reg;
4936 /* VxWorks does not impose a fixed gap between segments; the run-time
4937 gap can be different from the object-file gap. We therefore can't
4938 use GOTOFF unless we are absolutely sure that the symbol is in the
4939 same segment as the GOT. Unfortunately, the flexibility of linker
4940 scripts means that we can't be sure of that in general, so assume
4941 that GOTOFF is never valid on VxWorks. */
4942 if ((GET_CODE (orig) == LABEL_REF
4943 || (GET_CODE (orig) == SYMBOL_REF &&
4944 SYMBOL_REF_LOCAL_P (orig)))
4945 && NEED_GOT_RELOC
4946 && !TARGET_VXWORKS_RTP)
4947 insn = arm_pic_static_addr (orig, reg);
4948 else
4950 /* If this function doesn't have a pic register, create one now. */
4951 require_pic_register ();
4953 if (TARGET_32BIT)
4954 emit_insn (gen_pic_load_addr_32bit (address, orig));
4955 else /* TARGET_THUMB1 */
4956 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4958 pic_ref = gen_const_mem (Pmode,
4959 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4960 address));
4961 insn = emit_move_insn (reg, pic_ref);
4964 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4965 by loop. */
4966 set_unique_reg_note (insn, REG_EQUAL, orig);
4968 return reg;
4970 else if (GET_CODE (orig) == CONST)
4972 rtx base, offset;
4974 if (GET_CODE (XEXP (orig, 0)) == PLUS
4975 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4976 return orig;
4978 /* Handle the case where we have: const (UNSPEC_TLS). */
4979 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4980 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4981 return orig;
4983 /* Handle the case where we have:
4984 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4985 CONST_INT. */
4986 if (GET_CODE (XEXP (orig, 0)) == PLUS
4987 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4988 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4990 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4991 return orig;
4994 if (reg == 0)
4996 gcc_assert (can_create_pseudo_p ());
4997 reg = gen_reg_rtx (Pmode);
5000 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5002 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5003 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5004 base == reg ? 0 : reg);
5006 if (GET_CODE (offset) == CONST_INT)
5008 /* The base register doesn't really matter, we only want to
5009 test the index for the appropriate mode. */
5010 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5012 gcc_assert (can_create_pseudo_p ());
5013 offset = force_reg (Pmode, offset);
5016 if (GET_CODE (offset) == CONST_INT)
5017 return plus_constant (base, INTVAL (offset));
5020 if (GET_MODE_SIZE (mode) > 4
5021 && (GET_MODE_CLASS (mode) == MODE_INT
5022 || TARGET_SOFT_FLOAT))
5024 emit_insn (gen_addsi3 (reg, base, offset));
5025 return reg;
5028 return gen_rtx_PLUS (Pmode, base, offset);
5031 return orig;
5035 /* Find a spare register to use during the prolog of a function. */
5037 static int
5038 thumb_find_work_register (unsigned long pushed_regs_mask)
5040 int reg;
5042 /* Check the argument registers first as these are call-used. The
5043 register allocation order means that sometimes r3 might be used
5044 but earlier argument registers might not, so check them all. */
5045 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5046 if (!df_regs_ever_live_p (reg))
5047 return reg;
5049 /* Before going on to check the call-saved registers we can try a couple
5050 more ways of deducing that r3 is available. The first is when we are
5051 pushing anonymous arguments onto the stack and we have less than 4
5052 registers worth of fixed arguments(*). In this case r3 will be part of
5053 the variable argument list and so we can be sure that it will be
5054 pushed right at the start of the function. Hence it will be available
5055 for the rest of the prologue.
5056 (*): ie crtl->args.pretend_args_size is greater than 0. */
5057 if (cfun->machine->uses_anonymous_args
5058 && crtl->args.pretend_args_size > 0)
5059 return LAST_ARG_REGNUM;
5061 /* The other case is when we have fixed arguments but less than 4 registers
5062 worth. In this case r3 might be used in the body of the function, but
5063 it is not being used to convey an argument into the function. In theory
5064 we could just check crtl->args.size to see how many bytes are
5065 being passed in argument registers, but it seems that it is unreliable.
5066 Sometimes it will have the value 0 when in fact arguments are being
5067 passed. (See testcase execute/20021111-1.c for an example). So we also
5068 check the args_info.nregs field as well. The problem with this field is
5069 that it makes no allowances for arguments that are passed to the
5070 function but which are not used. Hence we could miss an opportunity
5071 when a function has an unused argument in r3. But it is better to be
5072 safe than to be sorry. */
5073 if (! cfun->machine->uses_anonymous_args
5074 && crtl->args.size >= 0
5075 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5076 && crtl->args.info.nregs < 4)
5077 return LAST_ARG_REGNUM;
5079 /* Otherwise look for a call-saved register that is going to be pushed. */
5080 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5081 if (pushed_regs_mask & (1 << reg))
5082 return reg;
5084 if (TARGET_THUMB2)
5086 /* Thumb-2 can use high regs. */
5087 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5088 if (pushed_regs_mask & (1 << reg))
5089 return reg;
5091 /* Something went wrong - thumb_compute_save_reg_mask()
5092 should have arranged for a suitable register to be pushed. */
5093 gcc_unreachable ();
5096 static GTY(()) int pic_labelno;
5098 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5099 low register. */
5101 void
5102 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5104 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5106 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5107 return;
5109 gcc_assert (flag_pic);
5111 pic_reg = cfun->machine->pic_reg;
5112 if (TARGET_VXWORKS_RTP)
5114 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5115 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5116 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5118 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5120 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5121 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5123 else
5125 /* We use an UNSPEC rather than a LABEL_REF because this label
5126 never appears in the code stream. */
5128 labelno = GEN_INT (pic_labelno++);
5129 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5130 l1 = gen_rtx_CONST (VOIDmode, l1);
5132 /* On the ARM the PC register contains 'dot + 8' at the time of the
5133 addition, on the Thumb it is 'dot + 4'. */
5134 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5135 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5136 UNSPEC_GOTSYM_OFF);
5137 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5139 if (TARGET_32BIT)
5141 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5142 if (TARGET_ARM)
5143 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5144 else
5145 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5147 else /* TARGET_THUMB1 */
5149 if (arm_pic_register != INVALID_REGNUM
5150 && REGNO (pic_reg) > LAST_LO_REGNUM)
5152 /* We will have pushed the pic register, so we should always be
5153 able to find a work register. */
5154 pic_tmp = gen_rtx_REG (SImode,
5155 thumb_find_work_register (saved_regs));
5156 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5157 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5159 else
5160 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5161 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5165 /* Need to emit this whether or not we obey regdecls,
5166 since setjmp/longjmp can cause life info to screw up. */
5167 emit_use (pic_reg);
5170 /* Generate code to load the address of a static var when flag_pic is set. */
5171 static rtx
5172 arm_pic_static_addr (rtx orig, rtx reg)
5174 rtx l1, labelno, offset_rtx, insn;
5176 gcc_assert (flag_pic);
5178 /* We use an UNSPEC rather than a LABEL_REF because this label
5179 never appears in the code stream. */
5180 labelno = GEN_INT (pic_labelno++);
5181 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5182 l1 = gen_rtx_CONST (VOIDmode, l1);
5184 /* On the ARM the PC register contains 'dot + 8' at the time of the
5185 addition, on the Thumb it is 'dot + 4'. */
5186 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5187 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5188 UNSPEC_SYMBOL_OFFSET);
5189 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5191 if (TARGET_32BIT)
5193 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5194 if (TARGET_ARM)
5195 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5196 else
5197 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5199 else /* TARGET_THUMB1 */
5201 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5202 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5205 return insn;
5208 /* Return nonzero if X is valid as an ARM state addressing register. */
5209 static int
5210 arm_address_register_rtx_p (rtx x, int strict_p)
5212 int regno;
5214 if (GET_CODE (x) != REG)
5215 return 0;
5217 regno = REGNO (x);
5219 if (strict_p)
5220 return ARM_REGNO_OK_FOR_BASE_P (regno);
5222 return (regno <= LAST_ARM_REGNUM
5223 || regno >= FIRST_PSEUDO_REGISTER
5224 || regno == FRAME_POINTER_REGNUM
5225 || regno == ARG_POINTER_REGNUM);
5228 /* Return TRUE if this rtx is the difference of a symbol and a label,
5229 and will reduce to a PC-relative relocation in the object file.
5230 Expressions like this can be left alone when generating PIC, rather
5231 than forced through the GOT. */
5232 static int
5233 pcrel_constant_p (rtx x)
5235 if (GET_CODE (x) == MINUS)
5236 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5238 return FALSE;
5241 /* Return nonzero if X is a valid ARM state address operand. */
5243 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5244 int strict_p)
5246 bool use_ldrd;
5247 enum rtx_code code = GET_CODE (x);
5249 if (arm_address_register_rtx_p (x, strict_p))
5250 return 1;
5252 use_ldrd = (TARGET_LDRD
5253 && (mode == DImode
5254 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5256 if (code == POST_INC || code == PRE_DEC
5257 || ((code == PRE_INC || code == POST_DEC)
5258 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5259 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5261 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5262 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5263 && GET_CODE (XEXP (x, 1)) == PLUS
5264 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5266 rtx addend = XEXP (XEXP (x, 1), 1);
5268 /* Don't allow ldrd post increment by register because it's hard
5269 to fixup invalid register choices. */
5270 if (use_ldrd
5271 && GET_CODE (x) == POST_MODIFY
5272 && GET_CODE (addend) == REG)
5273 return 0;
5275 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5276 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5279 /* After reload constants split into minipools will have addresses
5280 from a LABEL_REF. */
5281 else if (reload_completed
5282 && (code == LABEL_REF
5283 || (code == CONST
5284 && GET_CODE (XEXP (x, 0)) == PLUS
5285 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5286 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5287 return 1;
5289 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5290 return 0;
5292 else if (code == PLUS)
5294 rtx xop0 = XEXP (x, 0);
5295 rtx xop1 = XEXP (x, 1);
5297 return ((arm_address_register_rtx_p (xop0, strict_p)
5298 && GET_CODE(xop1) == CONST_INT
5299 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5300 || (arm_address_register_rtx_p (xop1, strict_p)
5301 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5304 #if 0
5305 /* Reload currently can't handle MINUS, so disable this for now */
5306 else if (GET_CODE (x) == MINUS)
5308 rtx xop0 = XEXP (x, 0);
5309 rtx xop1 = XEXP (x, 1);
5311 return (arm_address_register_rtx_p (xop0, strict_p)
5312 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5314 #endif
5316 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5317 && code == SYMBOL_REF
5318 && CONSTANT_POOL_ADDRESS_P (x)
5319 && ! (flag_pic
5320 && symbol_mentioned_p (get_pool_constant (x))
5321 && ! pcrel_constant_p (get_pool_constant (x))))
5322 return 1;
5324 return 0;
5327 /* Return nonzero if X is a valid Thumb-2 address operand. */
5328 static int
5329 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5331 bool use_ldrd;
5332 enum rtx_code code = GET_CODE (x);
5334 if (arm_address_register_rtx_p (x, strict_p))
5335 return 1;
5337 use_ldrd = (TARGET_LDRD
5338 && (mode == DImode
5339 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5341 if (code == POST_INC || code == PRE_DEC
5342 || ((code == PRE_INC || code == POST_DEC)
5343 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5344 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5346 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5347 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5348 && GET_CODE (XEXP (x, 1)) == PLUS
5349 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5351 /* Thumb-2 only has autoincrement by constant. */
5352 rtx addend = XEXP (XEXP (x, 1), 1);
5353 HOST_WIDE_INT offset;
5355 if (GET_CODE (addend) != CONST_INT)
5356 return 0;
5358 offset = INTVAL(addend);
5359 if (GET_MODE_SIZE (mode) <= 4)
5360 return (offset > -256 && offset < 256);
5362 return (use_ldrd && offset > -1024 && offset < 1024
5363 && (offset & 3) == 0);
5366 /* After reload constants split into minipools will have addresses
5367 from a LABEL_REF. */
5368 else if (reload_completed
5369 && (code == LABEL_REF
5370 || (code == CONST
5371 && GET_CODE (XEXP (x, 0)) == PLUS
5372 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5373 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5374 return 1;
5376 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5377 return 0;
5379 else if (code == PLUS)
5381 rtx xop0 = XEXP (x, 0);
5382 rtx xop1 = XEXP (x, 1);
5384 return ((arm_address_register_rtx_p (xop0, strict_p)
5385 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5386 || (arm_address_register_rtx_p (xop1, strict_p)
5387 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5390 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5391 && code == SYMBOL_REF
5392 && CONSTANT_POOL_ADDRESS_P (x)
5393 && ! (flag_pic
5394 && symbol_mentioned_p (get_pool_constant (x))
5395 && ! pcrel_constant_p (get_pool_constant (x))))
5396 return 1;
5398 return 0;
5401 /* Return nonzero if INDEX is valid for an address index operand in
5402 ARM state. */
5403 static int
5404 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5405 int strict_p)
5407 HOST_WIDE_INT range;
5408 enum rtx_code code = GET_CODE (index);
5410 /* Standard coprocessor addressing modes. */
5411 if (TARGET_HARD_FLOAT
5412 && (TARGET_FPA || TARGET_MAVERICK)
5413 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5414 || (TARGET_MAVERICK && mode == DImode)))
5415 return (code == CONST_INT && INTVAL (index) < 1024
5416 && INTVAL (index) > -1024
5417 && (INTVAL (index) & 3) == 0);
5419 if (TARGET_NEON
5420 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5421 return (code == CONST_INT
5422 && INTVAL (index) < 1016
5423 && INTVAL (index) > -1024
5424 && (INTVAL (index) & 3) == 0);
5426 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5427 return (code == CONST_INT
5428 && INTVAL (index) < 1024
5429 && INTVAL (index) > -1024
5430 && (INTVAL (index) & 3) == 0);
5432 if (arm_address_register_rtx_p (index, strict_p)
5433 && (GET_MODE_SIZE (mode) <= 4))
5434 return 1;
5436 if (mode == DImode || mode == DFmode)
5438 if (code == CONST_INT)
5440 HOST_WIDE_INT val = INTVAL (index);
5442 if (TARGET_LDRD)
5443 return val > -256 && val < 256;
5444 else
5445 return val > -4096 && val < 4092;
5448 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5451 if (GET_MODE_SIZE (mode) <= 4
5452 && ! (arm_arch4
5453 && (mode == HImode
5454 || mode == HFmode
5455 || (mode == QImode && outer == SIGN_EXTEND))))
5457 if (code == MULT)
5459 rtx xiop0 = XEXP (index, 0);
5460 rtx xiop1 = XEXP (index, 1);
5462 return ((arm_address_register_rtx_p (xiop0, strict_p)
5463 && power_of_two_operand (xiop1, SImode))
5464 || (arm_address_register_rtx_p (xiop1, strict_p)
5465 && power_of_two_operand (xiop0, SImode)));
5467 else if (code == LSHIFTRT || code == ASHIFTRT
5468 || code == ASHIFT || code == ROTATERT)
5470 rtx op = XEXP (index, 1);
5472 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5473 && GET_CODE (op) == CONST_INT
5474 && INTVAL (op) > 0
5475 && INTVAL (op) <= 31);
5479 /* For ARM v4 we may be doing a sign-extend operation during the
5480 load. */
5481 if (arm_arch4)
5483 if (mode == HImode
5484 || mode == HFmode
5485 || (outer == SIGN_EXTEND && mode == QImode))
5486 range = 256;
5487 else
5488 range = 4096;
5490 else
5491 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5493 return (code == CONST_INT
5494 && INTVAL (index) < range
5495 && INTVAL (index) > -range);
5498 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5499 index operand. i.e. 1, 2, 4 or 8. */
5500 static bool
5501 thumb2_index_mul_operand (rtx op)
5503 HOST_WIDE_INT val;
5505 if (GET_CODE(op) != CONST_INT)
5506 return false;
5508 val = INTVAL(op);
5509 return (val == 1 || val == 2 || val == 4 || val == 8);
5512 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5513 static int
5514 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5516 enum rtx_code code = GET_CODE (index);
5518 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5519 /* Standard coprocessor addressing modes. */
5520 if (TARGET_HARD_FLOAT
5521 && (TARGET_FPA || TARGET_MAVERICK)
5522 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5523 || (TARGET_MAVERICK && mode == DImode)))
5524 return (code == CONST_INT && INTVAL (index) < 1024
5525 && INTVAL (index) > -1024
5526 && (INTVAL (index) & 3) == 0);
5528 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5530 /* For DImode assume values will usually live in core regs
5531 and only allow LDRD addressing modes. */
5532 if (!TARGET_LDRD || mode != DImode)
5533 return (code == CONST_INT
5534 && INTVAL (index) < 1024
5535 && INTVAL (index) > -1024
5536 && (INTVAL (index) & 3) == 0);
5539 if (TARGET_NEON
5540 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5541 return (code == CONST_INT
5542 && INTVAL (index) < 1016
5543 && INTVAL (index) > -1024
5544 && (INTVAL (index) & 3) == 0);
5546 if (arm_address_register_rtx_p (index, strict_p)
5547 && (GET_MODE_SIZE (mode) <= 4))
5548 return 1;
5550 if (mode == DImode || mode == DFmode)
5552 if (code == CONST_INT)
5554 HOST_WIDE_INT val = INTVAL (index);
5555 /* ??? Can we assume ldrd for thumb2? */
5556 /* Thumb-2 ldrd only has reg+const addressing modes. */
5557 /* ldrd supports offsets of +-1020.
5558 However the ldr fallback does not. */
5559 return val > -256 && val < 256 && (val & 3) == 0;
5561 else
5562 return 0;
5565 if (code == MULT)
5567 rtx xiop0 = XEXP (index, 0);
5568 rtx xiop1 = XEXP (index, 1);
5570 return ((arm_address_register_rtx_p (xiop0, strict_p)
5571 && thumb2_index_mul_operand (xiop1))
5572 || (arm_address_register_rtx_p (xiop1, strict_p)
5573 && thumb2_index_mul_operand (xiop0)));
5575 else if (code == ASHIFT)
5577 rtx op = XEXP (index, 1);
5579 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5580 && GET_CODE (op) == CONST_INT
5581 && INTVAL (op) > 0
5582 && INTVAL (op) <= 3);
5585 return (code == CONST_INT
5586 && INTVAL (index) < 4096
5587 && INTVAL (index) > -256);
5590 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5591 static int
5592 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5594 int regno;
5596 if (GET_CODE (x) != REG)
5597 return 0;
5599 regno = REGNO (x);
5601 if (strict_p)
5602 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5604 return (regno <= LAST_LO_REGNUM
5605 || regno > LAST_VIRTUAL_REGISTER
5606 || regno == FRAME_POINTER_REGNUM
5607 || (GET_MODE_SIZE (mode) >= 4
5608 && (regno == STACK_POINTER_REGNUM
5609 || regno >= FIRST_PSEUDO_REGISTER
5610 || x == hard_frame_pointer_rtx
5611 || x == arg_pointer_rtx)));
5614 /* Return nonzero if x is a legitimate index register. This is the case
5615 for any base register that can access a QImode object. */
5616 inline static int
5617 thumb1_index_register_rtx_p (rtx x, int strict_p)
5619 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5622 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5624 The AP may be eliminated to either the SP or the FP, so we use the
5625 least common denominator, e.g. SImode, and offsets from 0 to 64.
5627 ??? Verify whether the above is the right approach.
5629 ??? Also, the FP may be eliminated to the SP, so perhaps that
5630 needs special handling also.
5632 ??? Look at how the mips16 port solves this problem. It probably uses
5633 better ways to solve some of these problems.
5635 Although it is not incorrect, we don't accept QImode and HImode
5636 addresses based on the frame pointer or arg pointer until the
5637 reload pass starts. This is so that eliminating such addresses
5638 into stack based ones won't produce impossible code. */
5639 static int
5640 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5642 /* ??? Not clear if this is right. Experiment. */
5643 if (GET_MODE_SIZE (mode) < 4
5644 && !(reload_in_progress || reload_completed)
5645 && (reg_mentioned_p (frame_pointer_rtx, x)
5646 || reg_mentioned_p (arg_pointer_rtx, x)
5647 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5648 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5649 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5650 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5651 return 0;
5653 /* Accept any base register. SP only in SImode or larger. */
5654 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5655 return 1;
5657 /* This is PC relative data before arm_reorg runs. */
5658 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5659 && GET_CODE (x) == SYMBOL_REF
5660 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5661 return 1;
5663 /* This is PC relative data after arm_reorg runs. */
5664 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5665 && reload_completed
5666 && (GET_CODE (x) == LABEL_REF
5667 || (GET_CODE (x) == CONST
5668 && GET_CODE (XEXP (x, 0)) == PLUS
5669 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5670 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5671 return 1;
5673 /* Post-inc indexing only supported for SImode and larger. */
5674 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5675 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5676 return 1;
5678 else if (GET_CODE (x) == PLUS)
5680 /* REG+REG address can be any two index registers. */
5681 /* We disallow FRAME+REG addressing since we know that FRAME
5682 will be replaced with STACK, and SP relative addressing only
5683 permits SP+OFFSET. */
5684 if (GET_MODE_SIZE (mode) <= 4
5685 && XEXP (x, 0) != frame_pointer_rtx
5686 && XEXP (x, 1) != frame_pointer_rtx
5687 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5688 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5689 return 1;
5691 /* REG+const has 5-7 bit offset for non-SP registers. */
5692 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5693 || XEXP (x, 0) == arg_pointer_rtx)
5694 && GET_CODE (XEXP (x, 1)) == CONST_INT
5695 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5696 return 1;
5698 /* REG+const has 10-bit offset for SP, but only SImode and
5699 larger is supported. */
5700 /* ??? Should probably check for DI/DFmode overflow here
5701 just like GO_IF_LEGITIMATE_OFFSET does. */
5702 else if (GET_CODE (XEXP (x, 0)) == REG
5703 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5704 && GET_MODE_SIZE (mode) >= 4
5705 && GET_CODE (XEXP (x, 1)) == CONST_INT
5706 && INTVAL (XEXP (x, 1)) >= 0
5707 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5708 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5709 return 1;
5711 else if (GET_CODE (XEXP (x, 0)) == REG
5712 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5713 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5714 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5715 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5716 && GET_MODE_SIZE (mode) >= 4
5717 && GET_CODE (XEXP (x, 1)) == CONST_INT
5718 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5719 return 1;
5722 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5723 && GET_MODE_SIZE (mode) == 4
5724 && GET_CODE (x) == SYMBOL_REF
5725 && CONSTANT_POOL_ADDRESS_P (x)
5726 && ! (flag_pic
5727 && symbol_mentioned_p (get_pool_constant (x))
5728 && ! pcrel_constant_p (get_pool_constant (x))))
5729 return 1;
5731 return 0;
5734 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5735 instruction of mode MODE. */
5737 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5739 switch (GET_MODE_SIZE (mode))
5741 case 1:
5742 return val >= 0 && val < 32;
5744 case 2:
5745 return val >= 0 && val < 64 && (val & 1) == 0;
5747 default:
5748 return (val >= 0
5749 && (val + GET_MODE_SIZE (mode)) <= 128
5750 && (val & 3) == 0);
5754 bool
5755 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5757 if (TARGET_ARM)
5758 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5759 else if (TARGET_THUMB2)
5760 return thumb2_legitimate_address_p (mode, x, strict_p);
5761 else /* if (TARGET_THUMB1) */
5762 return thumb1_legitimate_address_p (mode, x, strict_p);
5765 /* Build the SYMBOL_REF for __tls_get_addr. */
5767 static GTY(()) rtx tls_get_addr_libfunc;
5769 static rtx
5770 get_tls_get_addr (void)
5772 if (!tls_get_addr_libfunc)
5773 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5774 return tls_get_addr_libfunc;
5777 static rtx
5778 arm_load_tp (rtx target)
5780 if (!target)
5781 target = gen_reg_rtx (SImode);
5783 if (TARGET_HARD_TP)
5785 /* Can return in any reg. */
5786 emit_insn (gen_load_tp_hard (target));
5788 else
5790 /* Always returned in r0. Immediately copy the result into a pseudo,
5791 otherwise other uses of r0 (e.g. setting up function arguments) may
5792 clobber the value. */
5794 rtx tmp;
5796 emit_insn (gen_load_tp_soft ());
5798 tmp = gen_rtx_REG (SImode, 0);
5799 emit_move_insn (target, tmp);
5801 return target;
5804 static rtx
5805 load_tls_operand (rtx x, rtx reg)
5807 rtx tmp;
5809 if (reg == NULL_RTX)
5810 reg = gen_reg_rtx (SImode);
5812 tmp = gen_rtx_CONST (SImode, x);
5814 emit_move_insn (reg, tmp);
5816 return reg;
5819 static rtx
5820 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5822 rtx insns, label, labelno, sum;
5824 start_sequence ();
5826 labelno = GEN_INT (pic_labelno++);
5827 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5828 label = gen_rtx_CONST (VOIDmode, label);
5830 sum = gen_rtx_UNSPEC (Pmode,
5831 gen_rtvec (4, x, GEN_INT (reloc), label,
5832 GEN_INT (TARGET_ARM ? 8 : 4)),
5833 UNSPEC_TLS);
5834 reg = load_tls_operand (sum, reg);
5836 if (TARGET_ARM)
5837 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5838 else if (TARGET_THUMB2)
5839 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5840 else /* TARGET_THUMB1 */
5841 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5843 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5844 Pmode, 1, reg, Pmode);
5846 insns = get_insns ();
5847 end_sequence ();
5849 return insns;
5853 legitimize_tls_address (rtx x, rtx reg)
5855 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5856 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5858 switch (model)
5860 case TLS_MODEL_GLOBAL_DYNAMIC:
5861 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5862 dest = gen_reg_rtx (Pmode);
5863 emit_libcall_block (insns, dest, ret, x);
5864 return dest;
5866 case TLS_MODEL_LOCAL_DYNAMIC:
5867 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5869 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5870 share the LDM result with other LD model accesses. */
5871 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5872 UNSPEC_TLS);
5873 dest = gen_reg_rtx (Pmode);
5874 emit_libcall_block (insns, dest, ret, eqv);
5876 /* Load the addend. */
5877 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5878 UNSPEC_TLS);
5879 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5880 return gen_rtx_PLUS (Pmode, dest, addend);
5882 case TLS_MODEL_INITIAL_EXEC:
5883 labelno = GEN_INT (pic_labelno++);
5884 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5885 label = gen_rtx_CONST (VOIDmode, label);
5886 sum = gen_rtx_UNSPEC (Pmode,
5887 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5888 GEN_INT (TARGET_ARM ? 8 : 4)),
5889 UNSPEC_TLS);
5890 reg = load_tls_operand (sum, reg);
5892 if (TARGET_ARM)
5893 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5894 else if (TARGET_THUMB2)
5895 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5896 else
5898 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5899 emit_move_insn (reg, gen_const_mem (SImode, reg));
5902 tp = arm_load_tp (NULL_RTX);
5904 return gen_rtx_PLUS (Pmode, tp, reg);
5906 case TLS_MODEL_LOCAL_EXEC:
5907 tp = arm_load_tp (NULL_RTX);
5909 reg = gen_rtx_UNSPEC (Pmode,
5910 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5911 UNSPEC_TLS);
5912 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5914 return gen_rtx_PLUS (Pmode, tp, reg);
5916 default:
5917 abort ();
5921 /* Try machine-dependent ways of modifying an illegitimate address
5922 to be legitimate. If we find one, return the new, valid address. */
5924 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5926 if (!TARGET_ARM)
5928 /* TODO: legitimize_address for Thumb2. */
5929 if (TARGET_THUMB2)
5930 return x;
5931 return thumb_legitimize_address (x, orig_x, mode);
5934 if (arm_tls_symbol_p (x))
5935 return legitimize_tls_address (x, NULL_RTX);
5937 if (GET_CODE (x) == PLUS)
5939 rtx xop0 = XEXP (x, 0);
5940 rtx xop1 = XEXP (x, 1);
5942 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5943 xop0 = force_reg (SImode, xop0);
5945 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5946 xop1 = force_reg (SImode, xop1);
5948 if (ARM_BASE_REGISTER_RTX_P (xop0)
5949 && GET_CODE (xop1) == CONST_INT)
5951 HOST_WIDE_INT n, low_n;
5952 rtx base_reg, val;
5953 n = INTVAL (xop1);
5955 /* VFP addressing modes actually allow greater offsets, but for
5956 now we just stick with the lowest common denominator. */
5957 if (mode == DImode
5958 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5960 low_n = n & 0x0f;
5961 n &= ~0x0f;
5962 if (low_n > 4)
5964 n += 16;
5965 low_n -= 16;
5968 else
5970 low_n = ((mode) == TImode ? 0
5971 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5972 n -= low_n;
5975 base_reg = gen_reg_rtx (SImode);
5976 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5977 emit_move_insn (base_reg, val);
5978 x = plus_constant (base_reg, low_n);
5980 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5981 x = gen_rtx_PLUS (SImode, xop0, xop1);
5984 /* XXX We don't allow MINUS any more -- see comment in
5985 arm_legitimate_address_outer_p (). */
5986 else if (GET_CODE (x) == MINUS)
5988 rtx xop0 = XEXP (x, 0);
5989 rtx xop1 = XEXP (x, 1);
5991 if (CONSTANT_P (xop0))
5992 xop0 = force_reg (SImode, xop0);
5994 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5995 xop1 = force_reg (SImode, xop1);
5997 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5998 x = gen_rtx_MINUS (SImode, xop0, xop1);
6001 /* Make sure to take full advantage of the pre-indexed addressing mode
6002 with absolute addresses which often allows for the base register to
6003 be factorized for multiple adjacent memory references, and it might
6004 even allows for the mini pool to be avoided entirely. */
6005 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6007 unsigned int bits;
6008 HOST_WIDE_INT mask, base, index;
6009 rtx base_reg;
6011 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6012 use a 8-bit index. So let's use a 12-bit index for SImode only and
6013 hope that arm_gen_constant will enable ldrb to use more bits. */
6014 bits = (mode == SImode) ? 12 : 8;
6015 mask = (1 << bits) - 1;
6016 base = INTVAL (x) & ~mask;
6017 index = INTVAL (x) & mask;
6018 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6020 /* It'll most probably be more efficient to generate the base
6021 with more bits set and use a negative index instead. */
6022 base |= mask;
6023 index -= mask;
6025 base_reg = force_reg (SImode, GEN_INT (base));
6026 x = plus_constant (base_reg, index);
6029 if (flag_pic)
6031 /* We need to find and carefully transform any SYMBOL and LABEL
6032 references; so go back to the original address expression. */
6033 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6035 if (new_x != orig_x)
6036 x = new_x;
6039 return x;
6043 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6044 to be legitimate. If we find one, return the new, valid address. */
6046 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6048 if (arm_tls_symbol_p (x))
6049 return legitimize_tls_address (x, NULL_RTX);
6051 if (GET_CODE (x) == PLUS
6052 && GET_CODE (XEXP (x, 1)) == CONST_INT
6053 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6054 || INTVAL (XEXP (x, 1)) < 0))
6056 rtx xop0 = XEXP (x, 0);
6057 rtx xop1 = XEXP (x, 1);
6058 HOST_WIDE_INT offset = INTVAL (xop1);
6060 /* Try and fold the offset into a biasing of the base register and
6061 then offsetting that. Don't do this when optimizing for space
6062 since it can cause too many CSEs. */
6063 if (optimize_size && offset >= 0
6064 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6066 HOST_WIDE_INT delta;
6068 if (offset >= 256)
6069 delta = offset - (256 - GET_MODE_SIZE (mode));
6070 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6071 delta = 31 * GET_MODE_SIZE (mode);
6072 else
6073 delta = offset & (~31 * GET_MODE_SIZE (mode));
6075 xop0 = force_operand (plus_constant (xop0, offset - delta),
6076 NULL_RTX);
6077 x = plus_constant (xop0, delta);
6079 else if (offset < 0 && offset > -256)
6080 /* Small negative offsets are best done with a subtract before the
6081 dereference, forcing these into a register normally takes two
6082 instructions. */
6083 x = force_operand (x, NULL_RTX);
6084 else
6086 /* For the remaining cases, force the constant into a register. */
6087 xop1 = force_reg (SImode, xop1);
6088 x = gen_rtx_PLUS (SImode, xop0, xop1);
6091 else if (GET_CODE (x) == PLUS
6092 && s_register_operand (XEXP (x, 1), SImode)
6093 && !s_register_operand (XEXP (x, 0), SImode))
6095 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6097 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6100 if (flag_pic)
6102 /* We need to find and carefully transform any SYMBOL and LABEL
6103 references; so go back to the original address expression. */
6104 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6106 if (new_x != orig_x)
6107 x = new_x;
6110 return x;
6114 thumb_legitimize_reload_address (rtx *x_p,
6115 enum machine_mode mode,
6116 int opnum, int type,
6117 int ind_levels ATTRIBUTE_UNUSED)
6119 rtx x = *x_p;
6121 if (GET_CODE (x) == PLUS
6122 && GET_MODE_SIZE (mode) < 4
6123 && REG_P (XEXP (x, 0))
6124 && XEXP (x, 0) == stack_pointer_rtx
6125 && GET_CODE (XEXP (x, 1)) == CONST_INT
6126 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6128 rtx orig_x = x;
6130 x = copy_rtx (x);
6131 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6132 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6133 return x;
6136 /* If both registers are hi-regs, then it's better to reload the
6137 entire expression rather than each register individually. That
6138 only requires one reload register rather than two. */
6139 if (GET_CODE (x) == PLUS
6140 && REG_P (XEXP (x, 0))
6141 && REG_P (XEXP (x, 1))
6142 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6143 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6145 rtx orig_x = x;
6147 x = copy_rtx (x);
6148 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6149 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6150 return x;
6153 return NULL;
6156 /* Test for various thread-local symbols. */
6158 /* Return TRUE if X is a thread-local symbol. */
6160 static bool
6161 arm_tls_symbol_p (rtx x)
6163 if (! TARGET_HAVE_TLS)
6164 return false;
6166 if (GET_CODE (x) != SYMBOL_REF)
6167 return false;
6169 return SYMBOL_REF_TLS_MODEL (x) != 0;
6172 /* Helper for arm_tls_referenced_p. */
6174 static int
6175 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6177 if (GET_CODE (*x) == SYMBOL_REF)
6178 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6180 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6181 TLS offsets, not real symbol references. */
6182 if (GET_CODE (*x) == UNSPEC
6183 && XINT (*x, 1) == UNSPEC_TLS)
6184 return -1;
6186 return 0;
6189 /* Return TRUE if X contains any TLS symbol references. */
6191 bool
6192 arm_tls_referenced_p (rtx x)
6194 if (! TARGET_HAVE_TLS)
6195 return false;
6197 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6200 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6202 bool
6203 arm_cannot_force_const_mem (rtx x)
6205 rtx base, offset;
6207 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6209 split_const (x, &base, &offset);
6210 if (GET_CODE (base) == SYMBOL_REF
6211 && !offset_within_block_p (base, INTVAL (offset)))
6212 return true;
6214 return arm_tls_referenced_p (x);
6217 #define REG_OR_SUBREG_REG(X) \
6218 (GET_CODE (X) == REG \
6219 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6221 #define REG_OR_SUBREG_RTX(X) \
6222 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6224 #ifndef COSTS_N_INSNS
6225 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6226 #endif
6227 static inline int
6228 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6230 enum machine_mode mode = GET_MODE (x);
6232 switch (code)
6234 case ASHIFT:
6235 case ASHIFTRT:
6236 case LSHIFTRT:
6237 case ROTATERT:
6238 case PLUS:
6239 case MINUS:
6240 case COMPARE:
6241 case NEG:
6242 case NOT:
6243 return COSTS_N_INSNS (1);
6245 case MULT:
6246 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6248 int cycles = 0;
6249 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6251 while (i)
6253 i >>= 2;
6254 cycles++;
6256 return COSTS_N_INSNS (2) + cycles;
6258 return COSTS_N_INSNS (1) + 16;
6260 case SET:
6261 return (COSTS_N_INSNS (1)
6262 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6263 + GET_CODE (SET_DEST (x)) == MEM));
6265 case CONST_INT:
6266 if (outer == SET)
6268 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6269 return 0;
6270 if (thumb_shiftable_const (INTVAL (x)))
6271 return COSTS_N_INSNS (2);
6272 return COSTS_N_INSNS (3);
6274 else if ((outer == PLUS || outer == COMPARE)
6275 && INTVAL (x) < 256 && INTVAL (x) > -256)
6276 return 0;
6277 else if ((outer == IOR || outer == XOR || outer == AND)
6278 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6279 return COSTS_N_INSNS (1);
6280 else if (outer == AND)
6282 int i;
6283 /* This duplicates the tests in the andsi3 expander. */
6284 for (i = 9; i <= 31; i++)
6285 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6286 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6287 return COSTS_N_INSNS (2);
6289 else if (outer == ASHIFT || outer == ASHIFTRT
6290 || outer == LSHIFTRT)
6291 return 0;
6292 return COSTS_N_INSNS (2);
6294 case CONST:
6295 case CONST_DOUBLE:
6296 case LABEL_REF:
6297 case SYMBOL_REF:
6298 return COSTS_N_INSNS (3);
6300 case UDIV:
6301 case UMOD:
6302 case DIV:
6303 case MOD:
6304 return 100;
6306 case TRUNCATE:
6307 return 99;
6309 case AND:
6310 case XOR:
6311 case IOR:
6312 /* XXX guess. */
6313 return 8;
6315 case MEM:
6316 /* XXX another guess. */
6317 /* Memory costs quite a lot for the first word, but subsequent words
6318 load at the equivalent of a single insn each. */
6319 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6320 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6321 ? 4 : 0));
6323 case IF_THEN_ELSE:
6324 /* XXX a guess. */
6325 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6326 return 14;
6327 return 2;
6329 case ZERO_EXTEND:
6330 /* XXX still guessing. */
6331 switch (GET_MODE (XEXP (x, 0)))
6333 case QImode:
6334 return (1 + (mode == DImode ? 4 : 0)
6335 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6337 case HImode:
6338 return (4 + (mode == DImode ? 4 : 0)
6339 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6341 case SImode:
6342 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6344 default:
6345 return 99;
6348 default:
6349 return 99;
6353 static inline bool
6354 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6356 enum machine_mode mode = GET_MODE (x);
6357 enum rtx_code subcode;
6358 rtx operand;
6359 enum rtx_code code = GET_CODE (x);
6360 *total = 0;
6362 switch (code)
6364 case MEM:
6365 /* Memory costs quite a lot for the first word, but subsequent words
6366 load at the equivalent of a single insn each. */
6367 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6368 return true;
6370 case DIV:
6371 case MOD:
6372 case UDIV:
6373 case UMOD:
6374 if (TARGET_HARD_FLOAT && mode == SFmode)
6375 *total = COSTS_N_INSNS (2);
6376 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6377 *total = COSTS_N_INSNS (4);
6378 else
6379 *total = COSTS_N_INSNS (20);
6380 return false;
6382 case ROTATE:
6383 if (GET_CODE (XEXP (x, 1)) == REG)
6384 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6385 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6386 *total = rtx_cost (XEXP (x, 1), code, speed);
6388 /* Fall through */
6389 case ROTATERT:
6390 if (mode != SImode)
6392 *total += COSTS_N_INSNS (4);
6393 return true;
6396 /* Fall through */
6397 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6398 *total += rtx_cost (XEXP (x, 0), code, speed);
6399 if (mode == DImode)
6401 *total += COSTS_N_INSNS (3);
6402 return true;
6405 *total += COSTS_N_INSNS (1);
6406 /* Increase the cost of complex shifts because they aren't any faster,
6407 and reduce dual issue opportunities. */
6408 if (arm_tune_cortex_a9
6409 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6410 ++*total;
6412 return true;
6414 case MINUS:
6415 if (TARGET_THUMB2)
6417 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6419 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6420 *total = COSTS_N_INSNS (1);
6421 else
6422 *total = COSTS_N_INSNS (20);
6424 else
6425 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6426 /* Thumb2 does not have RSB, so all arguments must be
6427 registers (subtracting a constant is canonicalized as
6428 addition of the negated constant). */
6429 return false;
6432 if (mode == DImode)
6434 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6435 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6436 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6438 *total += rtx_cost (XEXP (x, 1), code, speed);
6439 return true;
6442 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6443 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6445 *total += rtx_cost (XEXP (x, 0), code, speed);
6446 return true;
6449 return false;
6452 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6454 if (TARGET_HARD_FLOAT
6455 && (mode == SFmode
6456 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6458 *total = COSTS_N_INSNS (1);
6459 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6460 && arm_const_double_rtx (XEXP (x, 0)))
6462 *total += rtx_cost (XEXP (x, 1), code, speed);
6463 return true;
6466 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6467 && arm_const_double_rtx (XEXP (x, 1)))
6469 *total += rtx_cost (XEXP (x, 0), code, speed);
6470 return true;
6473 return false;
6475 *total = COSTS_N_INSNS (20);
6476 return false;
6479 *total = COSTS_N_INSNS (1);
6480 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6481 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6483 *total += rtx_cost (XEXP (x, 1), code, speed);
6484 return true;
6487 subcode = GET_CODE (XEXP (x, 1));
6488 if (subcode == ASHIFT || subcode == ASHIFTRT
6489 || subcode == LSHIFTRT
6490 || subcode == ROTATE || subcode == ROTATERT)
6492 *total += rtx_cost (XEXP (x, 0), code, speed);
6493 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6494 return true;
6497 /* A shift as a part of RSB costs no more than RSB itself. */
6498 if (GET_CODE (XEXP (x, 0)) == MULT
6499 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6501 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6502 *total += rtx_cost (XEXP (x, 1), code, speed);
6503 return true;
6506 if (subcode == MULT
6507 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6509 *total += rtx_cost (XEXP (x, 0), code, speed);
6510 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6511 return true;
6514 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6515 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6517 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6518 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6519 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6520 *total += COSTS_N_INSNS (1);
6522 return true;
6525 /* Fall through */
6527 case PLUS:
6528 if (code == PLUS && arm_arch6 && mode == SImode
6529 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6530 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6532 *total = COSTS_N_INSNS (1);
6533 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6534 speed);
6535 *total += rtx_cost (XEXP (x, 1), code, speed);
6536 return true;
6539 /* MLA: All arguments must be registers. We filter out
6540 multiplication by a power of two, so that we fall down into
6541 the code below. */
6542 if (GET_CODE (XEXP (x, 0)) == MULT
6543 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6545 /* The cost comes from the cost of the multiply. */
6546 return false;
6549 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6551 if (TARGET_HARD_FLOAT
6552 && (mode == SFmode
6553 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6555 *total = COSTS_N_INSNS (1);
6556 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6557 && arm_const_double_rtx (XEXP (x, 1)))
6559 *total += rtx_cost (XEXP (x, 0), code, speed);
6560 return true;
6563 return false;
6566 *total = COSTS_N_INSNS (20);
6567 return false;
6570 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6571 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6573 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6574 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6575 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6576 *total += COSTS_N_INSNS (1);
6577 return true;
6580 /* Fall through */
6582 case AND: case XOR: case IOR:
6584 /* Normally the frame registers will be spilt into reg+const during
6585 reload, so it is a bad idea to combine them with other instructions,
6586 since then they might not be moved outside of loops. As a compromise
6587 we allow integration with ops that have a constant as their second
6588 operand. */
6589 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6590 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6591 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6592 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6593 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6594 *total = 4;
6596 if (mode == DImode)
6598 *total += COSTS_N_INSNS (2);
6599 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6600 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6602 *total += rtx_cost (XEXP (x, 0), code, speed);
6603 return true;
6606 return false;
6609 *total += COSTS_N_INSNS (1);
6610 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6611 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6613 *total += rtx_cost (XEXP (x, 0), code, speed);
6614 return true;
6616 subcode = GET_CODE (XEXP (x, 0));
6617 if (subcode == ASHIFT || subcode == ASHIFTRT
6618 || subcode == LSHIFTRT
6619 || subcode == ROTATE || subcode == ROTATERT)
6621 *total += rtx_cost (XEXP (x, 1), code, speed);
6622 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6623 return true;
6626 if (subcode == MULT
6627 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6629 *total += rtx_cost (XEXP (x, 1), code, speed);
6630 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6631 return true;
6634 if (subcode == UMIN || subcode == UMAX
6635 || subcode == SMIN || subcode == SMAX)
6637 *total = COSTS_N_INSNS (3);
6638 return true;
6641 return false;
6643 case MULT:
6644 /* This should have been handled by the CPU specific routines. */
6645 gcc_unreachable ();
6647 case TRUNCATE:
6648 if (arm_arch3m && mode == SImode
6649 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6650 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6651 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6652 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6653 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6654 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6656 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6657 return true;
6659 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6660 return false;
6662 case NEG:
6663 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6665 if (TARGET_HARD_FLOAT
6666 && (mode == SFmode
6667 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6669 *total = COSTS_N_INSNS (1);
6670 return false;
6672 *total = COSTS_N_INSNS (2);
6673 return false;
6676 /* Fall through */
6677 case NOT:
6678 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6679 if (mode == SImode && code == NOT)
6681 subcode = GET_CODE (XEXP (x, 0));
6682 if (subcode == ASHIFT || subcode == ASHIFTRT
6683 || subcode == LSHIFTRT
6684 || subcode == ROTATE || subcode == ROTATERT
6685 || (subcode == MULT
6686 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6688 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6689 /* Register shifts cost an extra cycle. */
6690 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6691 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6692 subcode, speed);
6693 return true;
6697 return false;
6699 case IF_THEN_ELSE:
6700 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6702 *total = COSTS_N_INSNS (4);
6703 return true;
6706 operand = XEXP (x, 0);
6708 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6709 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6710 && GET_CODE (XEXP (operand, 0)) == REG
6711 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6712 *total += COSTS_N_INSNS (1);
6713 *total += (rtx_cost (XEXP (x, 1), code, speed)
6714 + rtx_cost (XEXP (x, 2), code, speed));
6715 return true;
6717 case NE:
6718 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6720 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6721 return true;
6723 goto scc_insn;
6725 case GE:
6726 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6727 && mode == SImode && XEXP (x, 1) == const0_rtx)
6729 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6730 return true;
6732 goto scc_insn;
6734 case LT:
6735 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6736 && mode == SImode && XEXP (x, 1) == const0_rtx)
6738 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6739 return true;
6741 goto scc_insn;
6743 case EQ:
6744 case GT:
6745 case LE:
6746 case GEU:
6747 case LTU:
6748 case GTU:
6749 case LEU:
6750 case UNORDERED:
6751 case ORDERED:
6752 case UNEQ:
6753 case UNGE:
6754 case UNLT:
6755 case UNGT:
6756 case UNLE:
6757 scc_insn:
6758 /* SCC insns. In the case where the comparison has already been
6759 performed, then they cost 2 instructions. Otherwise they need
6760 an additional comparison before them. */
6761 *total = COSTS_N_INSNS (2);
6762 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6764 return true;
6767 /* Fall through */
6768 case COMPARE:
6769 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6771 *total = 0;
6772 return true;
6775 *total += COSTS_N_INSNS (1);
6776 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6777 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6779 *total += rtx_cost (XEXP (x, 0), code, speed);
6780 return true;
6783 subcode = GET_CODE (XEXP (x, 0));
6784 if (subcode == ASHIFT || subcode == ASHIFTRT
6785 || subcode == LSHIFTRT
6786 || subcode == ROTATE || subcode == ROTATERT)
6788 *total += rtx_cost (XEXP (x, 1), code, speed);
6789 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6790 return true;
6793 if (subcode == MULT
6794 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6796 *total += rtx_cost (XEXP (x, 1), code, speed);
6797 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6798 return true;
6801 return false;
6803 case UMIN:
6804 case UMAX:
6805 case SMIN:
6806 case SMAX:
6807 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6808 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6809 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6810 *total += rtx_cost (XEXP (x, 1), code, speed);
6811 return true;
6813 case ABS:
6814 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6816 if (TARGET_HARD_FLOAT
6817 && (mode == SFmode
6818 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6820 *total = COSTS_N_INSNS (1);
6821 return false;
6823 *total = COSTS_N_INSNS (20);
6824 return false;
6826 *total = COSTS_N_INSNS (1);
6827 if (mode == DImode)
6828 *total += COSTS_N_INSNS (3);
6829 return false;
6831 case SIGN_EXTEND:
6832 if (GET_MODE_CLASS (mode) == MODE_INT)
6834 *total = 0;
6835 if (mode == DImode)
6836 *total += COSTS_N_INSNS (1);
6838 if (GET_MODE (XEXP (x, 0)) != SImode)
6840 if (arm_arch6)
6842 if (GET_CODE (XEXP (x, 0)) != MEM)
6843 *total += COSTS_N_INSNS (1);
6845 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6846 *total += COSTS_N_INSNS (2);
6849 return false;
6852 /* Fall through */
6853 case ZERO_EXTEND:
6854 *total = 0;
6855 if (GET_MODE_CLASS (mode) == MODE_INT)
6857 if (mode == DImode)
6858 *total += COSTS_N_INSNS (1);
6860 if (GET_MODE (XEXP (x, 0)) != SImode)
6862 if (arm_arch6)
6864 if (GET_CODE (XEXP (x, 0)) != MEM)
6865 *total += COSTS_N_INSNS (1);
6867 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6868 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6869 1 : 2);
6872 return false;
6875 switch (GET_MODE (XEXP (x, 0)))
6877 case V8QImode:
6878 case V4HImode:
6879 case V2SImode:
6880 case V4QImode:
6881 case V2HImode:
6882 *total = COSTS_N_INSNS (1);
6883 return false;
6885 default:
6886 gcc_unreachable ();
6888 gcc_unreachable ();
6890 case ZERO_EXTRACT:
6891 case SIGN_EXTRACT:
6892 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6893 return true;
6895 case CONST_INT:
6896 if (const_ok_for_arm (INTVAL (x))
6897 || const_ok_for_arm (~INTVAL (x)))
6898 *total = COSTS_N_INSNS (1);
6899 else
6900 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6901 INTVAL (x), NULL_RTX,
6902 NULL_RTX, 0, 0));
6903 return true;
6905 case CONST:
6906 case LABEL_REF:
6907 case SYMBOL_REF:
6908 *total = COSTS_N_INSNS (3);
6909 return true;
6911 case HIGH:
6912 *total = COSTS_N_INSNS (1);
6913 return true;
6915 case LO_SUM:
6916 *total = COSTS_N_INSNS (1);
6917 *total += rtx_cost (XEXP (x, 0), code, speed);
6918 return true;
6920 case CONST_DOUBLE:
6921 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6922 && (mode == SFmode || !TARGET_VFP_SINGLE))
6923 *total = COSTS_N_INSNS (1);
6924 else
6925 *total = COSTS_N_INSNS (4);
6926 return true;
6928 default:
6929 *total = COSTS_N_INSNS (4);
6930 return false;
6934 /* Estimates the size cost of thumb1 instructions.
6935 For now most of the code is copied from thumb1_rtx_costs. We need more
6936 fine grain tuning when we have more related test cases. */
6937 static inline int
6938 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6940 enum machine_mode mode = GET_MODE (x);
6942 switch (code)
6944 case ASHIFT:
6945 case ASHIFTRT:
6946 case LSHIFTRT:
6947 case ROTATERT:
6948 case PLUS:
6949 case MINUS:
6950 case COMPARE:
6951 case NEG:
6952 case NOT:
6953 return COSTS_N_INSNS (1);
6955 case MULT:
6956 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6958 /* Thumb1 mul instruction can't operate on const. We must Load it
6959 into a register first. */
6960 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
6961 return COSTS_N_INSNS (1) + const_size;
6963 return COSTS_N_INSNS (1);
6965 case SET:
6966 return (COSTS_N_INSNS (1)
6967 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6968 + GET_CODE (SET_DEST (x)) == MEM));
6970 case CONST_INT:
6971 if (outer == SET)
6973 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6974 return 0;
6975 if (thumb_shiftable_const (INTVAL (x)))
6976 return COSTS_N_INSNS (2);
6977 return COSTS_N_INSNS (3);
6979 else if ((outer == PLUS || outer == COMPARE)
6980 && INTVAL (x) < 256 && INTVAL (x) > -256)
6981 return 0;
6982 else if ((outer == IOR || outer == XOR || outer == AND)
6983 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6984 return COSTS_N_INSNS (1);
6985 else if (outer == AND)
6987 int i;
6988 /* This duplicates the tests in the andsi3 expander. */
6989 for (i = 9; i <= 31; i++)
6990 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6991 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6992 return COSTS_N_INSNS (2);
6994 else if (outer == ASHIFT || outer == ASHIFTRT
6995 || outer == LSHIFTRT)
6996 return 0;
6997 return COSTS_N_INSNS (2);
6999 case CONST:
7000 case CONST_DOUBLE:
7001 case LABEL_REF:
7002 case SYMBOL_REF:
7003 return COSTS_N_INSNS (3);
7005 case UDIV:
7006 case UMOD:
7007 case DIV:
7008 case MOD:
7009 return 100;
7011 case TRUNCATE:
7012 return 99;
7014 case AND:
7015 case XOR:
7016 case IOR:
7017 /* XXX guess. */
7018 return 8;
7020 case MEM:
7021 /* XXX another guess. */
7022 /* Memory costs quite a lot for the first word, but subsequent words
7023 load at the equivalent of a single insn each. */
7024 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7025 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7026 ? 4 : 0));
7028 case IF_THEN_ELSE:
7029 /* XXX a guess. */
7030 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7031 return 14;
7032 return 2;
7034 case ZERO_EXTEND:
7035 /* XXX still guessing. */
7036 switch (GET_MODE (XEXP (x, 0)))
7038 case QImode:
7039 return (1 + (mode == DImode ? 4 : 0)
7040 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7042 case HImode:
7043 return (4 + (mode == DImode ? 4 : 0)
7044 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7046 case SImode:
7047 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7049 default:
7050 return 99;
7053 default:
7054 return 99;
7058 /* RTX costs when optimizing for size. */
7059 static bool
7060 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7061 int *total)
7063 enum machine_mode mode = GET_MODE (x);
7064 if (TARGET_THUMB1)
7066 *total = thumb1_size_rtx_costs (x, code, outer_code);
7067 return true;
7070 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7071 switch (code)
7073 case MEM:
7074 /* A memory access costs 1 insn if the mode is small, or the address is
7075 a single register, otherwise it costs one insn per word. */
7076 if (REG_P (XEXP (x, 0)))
7077 *total = COSTS_N_INSNS (1);
7078 else
7079 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7080 return true;
7082 case DIV:
7083 case MOD:
7084 case UDIV:
7085 case UMOD:
7086 /* Needs a libcall, so it costs about this. */
7087 *total = COSTS_N_INSNS (2);
7088 return false;
7090 case ROTATE:
7091 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7093 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7094 return true;
7096 /* Fall through */
7097 case ROTATERT:
7098 case ASHIFT:
7099 case LSHIFTRT:
7100 case ASHIFTRT:
7101 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7103 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7104 return true;
7106 else if (mode == SImode)
7108 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7109 /* Slightly disparage register shifts, but not by much. */
7110 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7111 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7112 return true;
7115 /* Needs a libcall. */
7116 *total = COSTS_N_INSNS (2);
7117 return false;
7119 case MINUS:
7120 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7121 && (mode == SFmode || !TARGET_VFP_SINGLE))
7123 *total = COSTS_N_INSNS (1);
7124 return false;
7127 if (mode == SImode)
7129 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7130 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7132 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7133 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7134 || subcode1 == ROTATE || subcode1 == ROTATERT
7135 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7136 || subcode1 == ASHIFTRT)
7138 /* It's just the cost of the two operands. */
7139 *total = 0;
7140 return false;
7143 *total = COSTS_N_INSNS (1);
7144 return false;
7147 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7148 return false;
7150 case PLUS:
7151 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7152 && (mode == SFmode || !TARGET_VFP_SINGLE))
7154 *total = COSTS_N_INSNS (1);
7155 return false;
7158 /* A shift as a part of ADD costs nothing. */
7159 if (GET_CODE (XEXP (x, 0)) == MULT
7160 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7162 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7163 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7164 *total += rtx_cost (XEXP (x, 1), code, false);
7165 return true;
7168 /* Fall through */
7169 case AND: case XOR: case IOR:
7170 if (mode == SImode)
7172 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7174 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7175 || subcode == LSHIFTRT || subcode == ASHIFTRT
7176 || (code == AND && subcode == NOT))
7178 /* It's just the cost of the two operands. */
7179 *total = 0;
7180 return false;
7184 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7185 return false;
7187 case MULT:
7188 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7189 return false;
7191 case NEG:
7192 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7193 && (mode == SFmode || !TARGET_VFP_SINGLE))
7195 *total = COSTS_N_INSNS (1);
7196 return false;
7199 /* Fall through */
7200 case NOT:
7201 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7203 return false;
7205 case IF_THEN_ELSE:
7206 *total = 0;
7207 return false;
7209 case COMPARE:
7210 if (cc_register (XEXP (x, 0), VOIDmode))
7211 * total = 0;
7212 else
7213 *total = COSTS_N_INSNS (1);
7214 return false;
7216 case ABS:
7217 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7218 && (mode == SFmode || !TARGET_VFP_SINGLE))
7219 *total = COSTS_N_INSNS (1);
7220 else
7221 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7222 return false;
7224 case SIGN_EXTEND:
7225 *total = 0;
7226 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7228 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7229 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7231 if (mode == DImode)
7232 *total += COSTS_N_INSNS (1);
7233 return false;
7235 case ZERO_EXTEND:
7236 *total = 0;
7237 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7239 switch (GET_MODE (XEXP (x, 0)))
7241 case QImode:
7242 *total += COSTS_N_INSNS (1);
7243 break;
7245 case HImode:
7246 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7248 case SImode:
7249 break;
7251 default:
7252 *total += COSTS_N_INSNS (2);
7256 if (mode == DImode)
7257 *total += COSTS_N_INSNS (1);
7259 return false;
7261 case CONST_INT:
7262 if (const_ok_for_arm (INTVAL (x)))
7263 /* A multiplication by a constant requires another instruction
7264 to load the constant to a register. */
7265 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7266 ? 1 : 0);
7267 else if (const_ok_for_arm (~INTVAL (x)))
7268 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7269 else if (const_ok_for_arm (-INTVAL (x)))
7271 if (outer_code == COMPARE || outer_code == PLUS
7272 || outer_code == MINUS)
7273 *total = 0;
7274 else
7275 *total = COSTS_N_INSNS (1);
7277 else
7278 *total = COSTS_N_INSNS (2);
7279 return true;
7281 case CONST:
7282 case LABEL_REF:
7283 case SYMBOL_REF:
7284 *total = COSTS_N_INSNS (2);
7285 return true;
7287 case CONST_DOUBLE:
7288 *total = COSTS_N_INSNS (4);
7289 return true;
7291 case HIGH:
7292 case LO_SUM:
7293 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7294 cost of these slightly. */
7295 *total = COSTS_N_INSNS (1) + 1;
7296 return true;
7298 default:
7299 if (mode != VOIDmode)
7300 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7301 else
7302 *total = COSTS_N_INSNS (4); /* How knows? */
7303 return false;
7307 /* RTX costs when optimizing for size. */
7308 static bool
7309 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7310 bool speed)
7312 if (!speed)
7313 return arm_size_rtx_costs (x, (enum rtx_code) code,
7314 (enum rtx_code) outer_code, total);
7315 else
7316 return current_tune->rtx_costs (x, (enum rtx_code) code,
7317 (enum rtx_code) outer_code,
7318 total, speed);
7321 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7322 supported on any "slowmul" cores, so it can be ignored. */
7324 static bool
7325 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7326 int *total, bool speed)
7328 enum machine_mode mode = GET_MODE (x);
7330 if (TARGET_THUMB)
7332 *total = thumb1_rtx_costs (x, code, outer_code);
7333 return true;
7336 switch (code)
7338 case MULT:
7339 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7340 || mode == DImode)
7342 *total = COSTS_N_INSNS (20);
7343 return false;
7346 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7348 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7349 & (unsigned HOST_WIDE_INT) 0xffffffff);
7350 int cost, const_ok = const_ok_for_arm (i);
7351 int j, booth_unit_size;
7353 /* Tune as appropriate. */
7354 cost = const_ok ? 4 : 8;
7355 booth_unit_size = 2;
7356 for (j = 0; i && j < 32; j += booth_unit_size)
7358 i >>= booth_unit_size;
7359 cost++;
7362 *total = COSTS_N_INSNS (cost);
7363 *total += rtx_cost (XEXP (x, 0), code, speed);
7364 return true;
7367 *total = COSTS_N_INSNS (20);
7368 return false;
7370 default:
7371 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7376 /* RTX cost for cores with a fast multiply unit (M variants). */
7378 static bool
7379 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7380 int *total, bool speed)
7382 enum machine_mode mode = GET_MODE (x);
7384 if (TARGET_THUMB1)
7386 *total = thumb1_rtx_costs (x, code, outer_code);
7387 return true;
7390 /* ??? should thumb2 use different costs? */
7391 switch (code)
7393 case MULT:
7394 /* There is no point basing this on the tuning, since it is always the
7395 fast variant if it exists at all. */
7396 if (mode == DImode
7397 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7398 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7399 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7401 *total = COSTS_N_INSNS(2);
7402 return false;
7406 if (mode == DImode)
7408 *total = COSTS_N_INSNS (5);
7409 return false;
7412 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7414 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7415 & (unsigned HOST_WIDE_INT) 0xffffffff);
7416 int cost, const_ok = const_ok_for_arm (i);
7417 int j, booth_unit_size;
7419 /* Tune as appropriate. */
7420 cost = const_ok ? 4 : 8;
7421 booth_unit_size = 8;
7422 for (j = 0; i && j < 32; j += booth_unit_size)
7424 i >>= booth_unit_size;
7425 cost++;
7428 *total = COSTS_N_INSNS(cost);
7429 return false;
7432 if (mode == SImode)
7434 *total = COSTS_N_INSNS (4);
7435 return false;
7438 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7440 if (TARGET_HARD_FLOAT
7441 && (mode == SFmode
7442 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7444 *total = COSTS_N_INSNS (1);
7445 return false;
7449 /* Requires a lib call */
7450 *total = COSTS_N_INSNS (20);
7451 return false;
7453 default:
7454 return arm_rtx_costs_1 (x, outer_code, total, speed);
7459 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7460 so it can be ignored. */
7462 static bool
7463 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7464 int *total, bool speed)
7466 enum machine_mode mode = GET_MODE (x);
7468 if (TARGET_THUMB)
7470 *total = thumb1_rtx_costs (x, code, outer_code);
7471 return true;
7474 switch (code)
7476 case COMPARE:
7477 if (GET_CODE (XEXP (x, 0)) != MULT)
7478 return arm_rtx_costs_1 (x, outer_code, total, speed);
7480 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7481 will stall until the multiplication is complete. */
7482 *total = COSTS_N_INSNS (3);
7483 return false;
7485 case MULT:
7486 /* There is no point basing this on the tuning, since it is always the
7487 fast variant if it exists at all. */
7488 if (mode == DImode
7489 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7490 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7491 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7493 *total = COSTS_N_INSNS (2);
7494 return false;
7498 if (mode == DImode)
7500 *total = COSTS_N_INSNS (5);
7501 return false;
7504 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7506 /* If operand 1 is a constant we can more accurately
7507 calculate the cost of the multiply. The multiplier can
7508 retire 15 bits on the first cycle and a further 12 on the
7509 second. We do, of course, have to load the constant into
7510 a register first. */
7511 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7512 /* There's a general overhead of one cycle. */
7513 int cost = 1;
7514 unsigned HOST_WIDE_INT masked_const;
7516 if (i & 0x80000000)
7517 i = ~i;
7519 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7521 masked_const = i & 0xffff8000;
7522 if (masked_const != 0)
7524 cost++;
7525 masked_const = i & 0xf8000000;
7526 if (masked_const != 0)
7527 cost++;
7529 *total = COSTS_N_INSNS (cost);
7530 return false;
7533 if (mode == SImode)
7535 *total = COSTS_N_INSNS (3);
7536 return false;
7539 /* Requires a lib call */
7540 *total = COSTS_N_INSNS (20);
7541 return false;
7543 default:
7544 return arm_rtx_costs_1 (x, outer_code, total, speed);
7549 /* RTX costs for 9e (and later) cores. */
7551 static bool
7552 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7553 int *total, bool speed)
7555 enum machine_mode mode = GET_MODE (x);
7557 if (TARGET_THUMB1)
7559 switch (code)
7561 case MULT:
7562 *total = COSTS_N_INSNS (3);
7563 return true;
7565 default:
7566 *total = thumb1_rtx_costs (x, code, outer_code);
7567 return true;
7571 switch (code)
7573 case MULT:
7574 /* There is no point basing this on the tuning, since it is always the
7575 fast variant if it exists at all. */
7576 if (mode == DImode
7577 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7578 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7579 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7581 *total = COSTS_N_INSNS (2);
7582 return false;
7586 if (mode == DImode)
7588 *total = COSTS_N_INSNS (5);
7589 return false;
7592 if (mode == SImode)
7594 *total = COSTS_N_INSNS (2);
7595 return false;
7598 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7600 if (TARGET_HARD_FLOAT
7601 && (mode == SFmode
7602 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7604 *total = COSTS_N_INSNS (1);
7605 return false;
7609 *total = COSTS_N_INSNS (20);
7610 return false;
7612 default:
7613 return arm_rtx_costs_1 (x, outer_code, total, speed);
7616 /* All address computations that can be done are free, but rtx cost returns
7617 the same for practically all of them. So we weight the different types
7618 of address here in the order (most pref first):
7619 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7620 static inline int
7621 arm_arm_address_cost (rtx x)
7623 enum rtx_code c = GET_CODE (x);
7625 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7626 return 0;
7627 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7628 return 10;
7630 if (c == PLUS)
7632 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7633 return 2;
7635 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7636 return 3;
7638 return 4;
7641 return 6;
7644 static inline int
7645 arm_thumb_address_cost (rtx x)
7647 enum rtx_code c = GET_CODE (x);
7649 if (c == REG)
7650 return 1;
7651 if (c == PLUS
7652 && GET_CODE (XEXP (x, 0)) == REG
7653 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7654 return 1;
7656 return 2;
7659 static int
7660 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7662 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7665 static int
7666 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7668 rtx i_pat, d_pat;
7670 /* Some true dependencies can have a higher cost depending
7671 on precisely how certain input operands are used. */
7672 if (arm_tune_xscale
7673 && REG_NOTE_KIND (link) == 0
7674 && recog_memoized (insn) >= 0
7675 && recog_memoized (dep) >= 0)
7677 int shift_opnum = get_attr_shift (insn);
7678 enum attr_type attr_type = get_attr_type (dep);
7680 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7681 operand for INSN. If we have a shifted input operand and the
7682 instruction we depend on is another ALU instruction, then we may
7683 have to account for an additional stall. */
7684 if (shift_opnum != 0
7685 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7687 rtx shifted_operand;
7688 int opno;
7690 /* Get the shifted operand. */
7691 extract_insn (insn);
7692 shifted_operand = recog_data.operand[shift_opnum];
7694 /* Iterate over all the operands in DEP. If we write an operand
7695 that overlaps with SHIFTED_OPERAND, then we have increase the
7696 cost of this dependency. */
7697 extract_insn (dep);
7698 preprocess_constraints ();
7699 for (opno = 0; opno < recog_data.n_operands; opno++)
7701 /* We can ignore strict inputs. */
7702 if (recog_data.operand_type[opno] == OP_IN)
7703 continue;
7705 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7706 shifted_operand))
7707 return 2;
7712 /* XXX This is not strictly true for the FPA. */
7713 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7714 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7715 return 0;
7717 /* Call insns don't incur a stall, even if they follow a load. */
7718 if (REG_NOTE_KIND (link) == 0
7719 && GET_CODE (insn) == CALL_INSN)
7720 return 1;
7722 if ((i_pat = single_set (insn)) != NULL
7723 && GET_CODE (SET_SRC (i_pat)) == MEM
7724 && (d_pat = single_set (dep)) != NULL
7725 && GET_CODE (SET_DEST (d_pat)) == MEM)
7727 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7728 /* This is a load after a store, there is no conflict if the load reads
7729 from a cached area. Assume that loads from the stack, and from the
7730 constant pool are cached, and that others will miss. This is a
7731 hack. */
7733 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7734 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7735 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7736 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7737 return 1;
7740 return cost;
7743 static int fp_consts_inited = 0;
7745 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7746 static const char * const strings_fp[8] =
7748 "0", "1", "2", "3",
7749 "4", "5", "0.5", "10"
7752 static REAL_VALUE_TYPE values_fp[8];
7754 static void
7755 init_fp_table (void)
7757 int i;
7758 REAL_VALUE_TYPE r;
7760 if (TARGET_VFP)
7761 fp_consts_inited = 1;
7762 else
7763 fp_consts_inited = 8;
7765 for (i = 0; i < fp_consts_inited; i++)
7767 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7768 values_fp[i] = r;
7772 /* Return TRUE if rtx X is a valid immediate FP constant. */
7774 arm_const_double_rtx (rtx x)
7776 REAL_VALUE_TYPE r;
7777 int i;
7779 if (!fp_consts_inited)
7780 init_fp_table ();
7782 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7783 if (REAL_VALUE_MINUS_ZERO (r))
7784 return 0;
7786 for (i = 0; i < fp_consts_inited; i++)
7787 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7788 return 1;
7790 return 0;
7793 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7795 neg_const_double_rtx_ok_for_fpa (rtx x)
7797 REAL_VALUE_TYPE r;
7798 int i;
7800 if (!fp_consts_inited)
7801 init_fp_table ();
7803 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7804 r = REAL_VALUE_NEGATE (r);
7805 if (REAL_VALUE_MINUS_ZERO (r))
7806 return 0;
7808 for (i = 0; i < 8; i++)
7809 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7810 return 1;
7812 return 0;
7816 /* VFPv3 has a fairly wide range of representable immediates, formed from
7817 "quarter-precision" floating-point values. These can be evaluated using this
7818 formula (with ^ for exponentiation):
7820 -1^s * n * 2^-r
7822 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7823 16 <= n <= 31 and 0 <= r <= 7.
7825 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7827 - A (most-significant) is the sign bit.
7828 - BCD are the exponent (encoded as r XOR 3).
7829 - EFGH are the mantissa (encoded as n - 16).
7832 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7833 fconst[sd] instruction, or -1 if X isn't suitable. */
7834 static int
7835 vfp3_const_double_index (rtx x)
7837 REAL_VALUE_TYPE r, m;
7838 int sign, exponent;
7839 unsigned HOST_WIDE_INT mantissa, mant_hi;
7840 unsigned HOST_WIDE_INT mask;
7841 HOST_WIDE_INT m1, m2;
7842 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7844 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7845 return -1;
7847 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7849 /* We can't represent these things, so detect them first. */
7850 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7851 return -1;
7853 /* Extract sign, exponent and mantissa. */
7854 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7855 r = REAL_VALUE_ABS (r);
7856 exponent = REAL_EXP (&r);
7857 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7858 highest (sign) bit, with a fixed binary point at bit point_pos.
7859 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7860 bits for the mantissa, this may fail (low bits would be lost). */
7861 real_ldexp (&m, &r, point_pos - exponent);
7862 REAL_VALUE_TO_INT (&m1, &m2, m);
7863 mantissa = m1;
7864 mant_hi = m2;
7866 /* If there are bits set in the low part of the mantissa, we can't
7867 represent this value. */
7868 if (mantissa != 0)
7869 return -1;
7871 /* Now make it so that mantissa contains the most-significant bits, and move
7872 the point_pos to indicate that the least-significant bits have been
7873 discarded. */
7874 point_pos -= HOST_BITS_PER_WIDE_INT;
7875 mantissa = mant_hi;
7877 /* We can permit four significant bits of mantissa only, plus a high bit
7878 which is always 1. */
7879 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7880 if ((mantissa & mask) != 0)
7881 return -1;
7883 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7884 mantissa >>= point_pos - 5;
7886 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7887 floating-point immediate zero with Neon using an integer-zero load, but
7888 that case is handled elsewhere.) */
7889 if (mantissa == 0)
7890 return -1;
7892 gcc_assert (mantissa >= 16 && mantissa <= 31);
7894 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7895 normalized significands are in the range [1, 2). (Our mantissa is shifted
7896 left 4 places at this point relative to normalized IEEE754 values). GCC
7897 internally uses [0.5, 1) (see real.c), so the exponent returned from
7898 REAL_EXP must be altered. */
7899 exponent = 5 - exponent;
7901 if (exponent < 0 || exponent > 7)
7902 return -1;
7904 /* Sign, mantissa and exponent are now in the correct form to plug into the
7905 formula described in the comment above. */
7906 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7909 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7911 vfp3_const_double_rtx (rtx x)
7913 if (!TARGET_VFP3)
7914 return 0;
7916 return vfp3_const_double_index (x) != -1;
7919 /* Recognize immediates which can be used in various Neon instructions. Legal
7920 immediates are described by the following table (for VMVN variants, the
7921 bitwise inverse of the constant shown is recognized. In either case, VMOV
7922 is output and the correct instruction to use for a given constant is chosen
7923 by the assembler). The constant shown is replicated across all elements of
7924 the destination vector.
7926 insn elems variant constant (binary)
7927 ---- ----- ------- -----------------
7928 vmov i32 0 00000000 00000000 00000000 abcdefgh
7929 vmov i32 1 00000000 00000000 abcdefgh 00000000
7930 vmov i32 2 00000000 abcdefgh 00000000 00000000
7931 vmov i32 3 abcdefgh 00000000 00000000 00000000
7932 vmov i16 4 00000000 abcdefgh
7933 vmov i16 5 abcdefgh 00000000
7934 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7935 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7936 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7937 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7938 vmvn i16 10 00000000 abcdefgh
7939 vmvn i16 11 abcdefgh 00000000
7940 vmov i32 12 00000000 00000000 abcdefgh 11111111
7941 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7942 vmov i32 14 00000000 abcdefgh 11111111 11111111
7943 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7944 vmov i8 16 abcdefgh
7945 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7946 eeeeeeee ffffffff gggggggg hhhhhhhh
7947 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7949 For case 18, B = !b. Representable values are exactly those accepted by
7950 vfp3_const_double_index, but are output as floating-point numbers rather
7951 than indices.
7953 Variants 0-5 (inclusive) may also be used as immediates for the second
7954 operand of VORR/VBIC instructions.
7956 The INVERSE argument causes the bitwise inverse of the given operand to be
7957 recognized instead (used for recognizing legal immediates for the VAND/VORN
7958 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7959 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7960 output, rather than the real insns vbic/vorr).
7962 INVERSE makes no difference to the recognition of float vectors.
7964 The return value is the variant of immediate as shown in the above table, or
7965 -1 if the given value doesn't match any of the listed patterns.
7967 static int
7968 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7969 rtx *modconst, int *elementwidth)
7971 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7972 matches = 1; \
7973 for (i = 0; i < idx; i += (STRIDE)) \
7974 if (!(TEST)) \
7975 matches = 0; \
7976 if (matches) \
7978 immtype = (CLASS); \
7979 elsize = (ELSIZE); \
7980 break; \
7983 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7984 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7985 unsigned char bytes[16];
7986 int immtype = -1, matches;
7987 unsigned int invmask = inverse ? 0xff : 0;
7989 /* Vectors of float constants. */
7990 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7992 rtx el0 = CONST_VECTOR_ELT (op, 0);
7993 REAL_VALUE_TYPE r0;
7995 if (!vfp3_const_double_rtx (el0))
7996 return -1;
7998 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8000 for (i = 1; i < n_elts; i++)
8002 rtx elt = CONST_VECTOR_ELT (op, i);
8003 REAL_VALUE_TYPE re;
8005 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8007 if (!REAL_VALUES_EQUAL (r0, re))
8008 return -1;
8011 if (modconst)
8012 *modconst = CONST_VECTOR_ELT (op, 0);
8014 if (elementwidth)
8015 *elementwidth = 0;
8017 return 18;
8020 /* Splat vector constant out into a byte vector. */
8021 for (i = 0; i < n_elts; i++)
8023 rtx el = CONST_VECTOR_ELT (op, i);
8024 unsigned HOST_WIDE_INT elpart;
8025 unsigned int part, parts;
8027 if (GET_CODE (el) == CONST_INT)
8029 elpart = INTVAL (el);
8030 parts = 1;
8032 else if (GET_CODE (el) == CONST_DOUBLE)
8034 elpart = CONST_DOUBLE_LOW (el);
8035 parts = 2;
8037 else
8038 gcc_unreachable ();
8040 for (part = 0; part < parts; part++)
8042 unsigned int byte;
8043 for (byte = 0; byte < innersize; byte++)
8045 bytes[idx++] = (elpart & 0xff) ^ invmask;
8046 elpart >>= BITS_PER_UNIT;
8048 if (GET_CODE (el) == CONST_DOUBLE)
8049 elpart = CONST_DOUBLE_HIGH (el);
8053 /* Sanity check. */
8054 gcc_assert (idx == GET_MODE_SIZE (mode));
8058 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8059 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8061 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8062 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8064 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8065 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8067 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8068 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8070 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8072 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8074 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8075 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8077 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8078 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8080 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8081 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8083 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8084 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8086 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8088 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8090 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8091 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8093 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8094 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8096 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8097 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8099 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8100 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8102 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8104 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8105 && bytes[i] == bytes[(i + 8) % idx]);
8107 while (0);
8109 if (immtype == -1)
8110 return -1;
8112 if (elementwidth)
8113 *elementwidth = elsize;
8115 if (modconst)
8117 unsigned HOST_WIDE_INT imm = 0;
8119 /* Un-invert bytes of recognized vector, if necessary. */
8120 if (invmask != 0)
8121 for (i = 0; i < idx; i++)
8122 bytes[i] ^= invmask;
8124 if (immtype == 17)
8126 /* FIXME: Broken on 32-bit H_W_I hosts. */
8127 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8129 for (i = 0; i < 8; i++)
8130 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8131 << (i * BITS_PER_UNIT);
8133 *modconst = GEN_INT (imm);
8135 else
8137 unsigned HOST_WIDE_INT imm = 0;
8139 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8140 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8142 *modconst = GEN_INT (imm);
8146 return immtype;
8147 #undef CHECK
8150 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8151 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8152 float elements), and a modified constant (whatever should be output for a
8153 VMOV) in *MODCONST. */
8156 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8157 rtx *modconst, int *elementwidth)
8159 rtx tmpconst;
8160 int tmpwidth;
8161 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8163 if (retval == -1)
8164 return 0;
8166 if (modconst)
8167 *modconst = tmpconst;
8169 if (elementwidth)
8170 *elementwidth = tmpwidth;
8172 return 1;
8175 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8176 the immediate is valid, write a constant suitable for using as an operand
8177 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8178 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8181 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8182 rtx *modconst, int *elementwidth)
8184 rtx tmpconst;
8185 int tmpwidth;
8186 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8188 if (retval < 0 || retval > 5)
8189 return 0;
8191 if (modconst)
8192 *modconst = tmpconst;
8194 if (elementwidth)
8195 *elementwidth = tmpwidth;
8197 return 1;
8200 /* Return a string suitable for output of Neon immediate logic operation
8201 MNEM. */
8203 char *
8204 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8205 int inverse, int quad)
8207 int width, is_valid;
8208 static char templ[40];
8210 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8212 gcc_assert (is_valid != 0);
8214 if (quad)
8215 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8216 else
8217 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8219 return templ;
8222 /* Output a sequence of pairwise operations to implement a reduction.
8223 NOTE: We do "too much work" here, because pairwise operations work on two
8224 registers-worth of operands in one go. Unfortunately we can't exploit those
8225 extra calculations to do the full operation in fewer steps, I don't think.
8226 Although all vector elements of the result but the first are ignored, we
8227 actually calculate the same result in each of the elements. An alternative
8228 such as initially loading a vector with zero to use as each of the second
8229 operands would use up an additional register and take an extra instruction,
8230 for no particular gain. */
8232 void
8233 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8234 rtx (*reduc) (rtx, rtx, rtx))
8236 enum machine_mode inner = GET_MODE_INNER (mode);
8237 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8238 rtx tmpsum = op1;
8240 for (i = parts / 2; i >= 1; i /= 2)
8242 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8243 emit_insn (reduc (dest, tmpsum, tmpsum));
8244 tmpsum = dest;
8248 /* If VALS is a vector constant that can be loaded into a register
8249 using VDUP, generate instructions to do so and return an RTX to
8250 assign to the register. Otherwise return NULL_RTX. */
8252 static rtx
8253 neon_vdup_constant (rtx vals)
8255 enum machine_mode mode = GET_MODE (vals);
8256 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8257 int n_elts = GET_MODE_NUNITS (mode);
8258 bool all_same = true;
8259 rtx x;
8260 int i;
8262 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8263 return NULL_RTX;
8265 for (i = 0; i < n_elts; ++i)
8267 x = XVECEXP (vals, 0, i);
8268 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8269 all_same = false;
8272 if (!all_same)
8273 /* The elements are not all the same. We could handle repeating
8274 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8275 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8276 vdup.i16). */
8277 return NULL_RTX;
8279 /* We can load this constant by using VDUP and a constant in a
8280 single ARM register. This will be cheaper than a vector
8281 load. */
8283 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8284 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8285 UNSPEC_VDUP_N);
8288 /* Generate code to load VALS, which is a PARALLEL containing only
8289 constants (for vec_init) or CONST_VECTOR, efficiently into a
8290 register. Returns an RTX to copy into the register, or NULL_RTX
8291 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8294 neon_make_constant (rtx vals)
8296 enum machine_mode mode = GET_MODE (vals);
8297 rtx target;
8298 rtx const_vec = NULL_RTX;
8299 int n_elts = GET_MODE_NUNITS (mode);
8300 int n_const = 0;
8301 int i;
8303 if (GET_CODE (vals) == CONST_VECTOR)
8304 const_vec = vals;
8305 else if (GET_CODE (vals) == PARALLEL)
8307 /* A CONST_VECTOR must contain only CONST_INTs and
8308 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8309 Only store valid constants in a CONST_VECTOR. */
8310 for (i = 0; i < n_elts; ++i)
8312 rtx x = XVECEXP (vals, 0, i);
8313 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8314 n_const++;
8316 if (n_const == n_elts)
8317 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8319 else
8320 gcc_unreachable ();
8322 if (const_vec != NULL
8323 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8324 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8325 return const_vec;
8326 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8327 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8328 pipeline cycle; creating the constant takes one or two ARM
8329 pipeline cycles. */
8330 return target;
8331 else if (const_vec != NULL_RTX)
8332 /* Load from constant pool. On Cortex-A8 this takes two cycles
8333 (for either double or quad vectors). We can not take advantage
8334 of single-cycle VLD1 because we need a PC-relative addressing
8335 mode. */
8336 return const_vec;
8337 else
8338 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8339 We can not construct an initializer. */
8340 return NULL_RTX;
8343 /* Initialize vector TARGET to VALS. */
8345 void
8346 neon_expand_vector_init (rtx target, rtx vals)
8348 enum machine_mode mode = GET_MODE (target);
8349 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8350 int n_elts = GET_MODE_NUNITS (mode);
8351 int n_var = 0, one_var = -1;
8352 bool all_same = true;
8353 rtx x, mem;
8354 int i;
8356 for (i = 0; i < n_elts; ++i)
8358 x = XVECEXP (vals, 0, i);
8359 if (!CONSTANT_P (x))
8360 ++n_var, one_var = i;
8362 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8363 all_same = false;
8366 if (n_var == 0)
8368 rtx constant = neon_make_constant (vals);
8369 if (constant != NULL_RTX)
8371 emit_move_insn (target, constant);
8372 return;
8376 /* Splat a single non-constant element if we can. */
8377 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8379 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8380 emit_insn (gen_rtx_SET (VOIDmode, target,
8381 gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8382 UNSPEC_VDUP_N)));
8383 return;
8386 /* One field is non-constant. Load constant then overwrite varying
8387 field. This is more efficient than using the stack. */
8388 if (n_var == 1)
8390 rtx copy = copy_rtx (vals);
8391 rtvec ops;
8393 /* Load constant part of vector, substitute neighboring value for
8394 varying element. */
8395 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8396 neon_expand_vector_init (target, copy);
8398 /* Insert variable. */
8399 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8400 ops = gen_rtvec (3, x, target, GEN_INT (one_var));
8401 emit_insn (gen_rtx_SET (VOIDmode, target,
8402 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
8403 return;
8406 /* Construct the vector in memory one field at a time
8407 and load the whole vector. */
8408 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8409 for (i = 0; i < n_elts; i++)
8410 emit_move_insn (adjust_address_nv (mem, inner_mode,
8411 i * GET_MODE_SIZE (inner_mode)),
8412 XVECEXP (vals, 0, i));
8413 emit_move_insn (target, mem);
8416 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8417 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8418 reported source locations are bogus. */
8420 static void
8421 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8422 const char *err)
8424 HOST_WIDE_INT lane;
8426 gcc_assert (GET_CODE (operand) == CONST_INT);
8428 lane = INTVAL (operand);
8430 if (lane < low || lane >= high)
8431 error (err);
8434 /* Bounds-check lanes. */
8436 void
8437 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8439 bounds_check (operand, low, high, "lane out of range");
8442 /* Bounds-check constants. */
8444 void
8445 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8447 bounds_check (operand, low, high, "constant out of range");
8450 HOST_WIDE_INT
8451 neon_element_bits (enum machine_mode mode)
8453 if (mode == DImode)
8454 return GET_MODE_BITSIZE (mode);
8455 else
8456 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8460 /* Predicates for `match_operand' and `match_operator'. */
8462 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8464 cirrus_memory_offset (rtx op)
8466 /* Reject eliminable registers. */
8467 if (! (reload_in_progress || reload_completed)
8468 && ( reg_mentioned_p (frame_pointer_rtx, op)
8469 || reg_mentioned_p (arg_pointer_rtx, op)
8470 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8471 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8472 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8473 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8474 return 0;
8476 if (GET_CODE (op) == MEM)
8478 rtx ind;
8480 ind = XEXP (op, 0);
8482 /* Match: (mem (reg)). */
8483 if (GET_CODE (ind) == REG)
8484 return 1;
8486 /* Match:
8487 (mem (plus (reg)
8488 (const))). */
8489 if (GET_CODE (ind) == PLUS
8490 && GET_CODE (XEXP (ind, 0)) == REG
8491 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8492 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8493 return 1;
8496 return 0;
8499 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8500 WB is true if full writeback address modes are allowed and is false
8501 if limited writeback address modes (POST_INC and PRE_DEC) are
8502 allowed. */
8505 arm_coproc_mem_operand (rtx op, bool wb)
8507 rtx ind;
8509 /* Reject eliminable registers. */
8510 if (! (reload_in_progress || reload_completed)
8511 && ( reg_mentioned_p (frame_pointer_rtx, op)
8512 || reg_mentioned_p (arg_pointer_rtx, op)
8513 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8514 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8515 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8516 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8517 return FALSE;
8519 /* Constants are converted into offsets from labels. */
8520 if (GET_CODE (op) != MEM)
8521 return FALSE;
8523 ind = XEXP (op, 0);
8525 if (reload_completed
8526 && (GET_CODE (ind) == LABEL_REF
8527 || (GET_CODE (ind) == CONST
8528 && GET_CODE (XEXP (ind, 0)) == PLUS
8529 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8530 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8531 return TRUE;
8533 /* Match: (mem (reg)). */
8534 if (GET_CODE (ind) == REG)
8535 return arm_address_register_rtx_p (ind, 0);
8537 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8538 acceptable in any case (subject to verification by
8539 arm_address_register_rtx_p). We need WB to be true to accept
8540 PRE_INC and POST_DEC. */
8541 if (GET_CODE (ind) == POST_INC
8542 || GET_CODE (ind) == PRE_DEC
8543 || (wb
8544 && (GET_CODE (ind) == PRE_INC
8545 || GET_CODE (ind) == POST_DEC)))
8546 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8548 if (wb
8549 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8550 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8551 && GET_CODE (XEXP (ind, 1)) == PLUS
8552 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8553 ind = XEXP (ind, 1);
8555 /* Match:
8556 (plus (reg)
8557 (const)). */
8558 if (GET_CODE (ind) == PLUS
8559 && GET_CODE (XEXP (ind, 0)) == REG
8560 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8561 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8562 && INTVAL (XEXP (ind, 1)) > -1024
8563 && INTVAL (XEXP (ind, 1)) < 1024
8564 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8565 return TRUE;
8567 return FALSE;
8570 /* Return TRUE if OP is a memory operand which we can load or store a vector
8571 to/from. TYPE is one of the following values:
8572 0 - Vector load/stor (vldr)
8573 1 - Core registers (ldm)
8574 2 - Element/structure loads (vld1)
8577 neon_vector_mem_operand (rtx op, int type)
8579 rtx ind;
8581 /* Reject eliminable registers. */
8582 if (! (reload_in_progress || reload_completed)
8583 && ( reg_mentioned_p (frame_pointer_rtx, op)
8584 || reg_mentioned_p (arg_pointer_rtx, op)
8585 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8586 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8587 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8588 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8589 return FALSE;
8591 /* Constants are converted into offsets from labels. */
8592 if (GET_CODE (op) != MEM)
8593 return FALSE;
8595 ind = XEXP (op, 0);
8597 if (reload_completed
8598 && (GET_CODE (ind) == LABEL_REF
8599 || (GET_CODE (ind) == CONST
8600 && GET_CODE (XEXP (ind, 0)) == PLUS
8601 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8602 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8603 return TRUE;
8605 /* Match: (mem (reg)). */
8606 if (GET_CODE (ind) == REG)
8607 return arm_address_register_rtx_p (ind, 0);
8609 /* Allow post-increment with Neon registers. */
8610 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8611 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8613 /* FIXME: vld1 allows register post-modify. */
8615 /* Match:
8616 (plus (reg)
8617 (const)). */
8618 if (type == 0
8619 && GET_CODE (ind) == PLUS
8620 && GET_CODE (XEXP (ind, 0)) == REG
8621 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8622 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8623 && INTVAL (XEXP (ind, 1)) > -1024
8624 && INTVAL (XEXP (ind, 1)) < 1016
8625 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8626 return TRUE;
8628 return FALSE;
8631 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8632 type. */
8634 neon_struct_mem_operand (rtx op)
8636 rtx ind;
8638 /* Reject eliminable registers. */
8639 if (! (reload_in_progress || reload_completed)
8640 && ( reg_mentioned_p (frame_pointer_rtx, op)
8641 || reg_mentioned_p (arg_pointer_rtx, op)
8642 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8643 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8644 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8645 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8646 return FALSE;
8648 /* Constants are converted into offsets from labels. */
8649 if (GET_CODE (op) != MEM)
8650 return FALSE;
8652 ind = XEXP (op, 0);
8654 if (reload_completed
8655 && (GET_CODE (ind) == LABEL_REF
8656 || (GET_CODE (ind) == CONST
8657 && GET_CODE (XEXP (ind, 0)) == PLUS
8658 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8659 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8660 return TRUE;
8662 /* Match: (mem (reg)). */
8663 if (GET_CODE (ind) == REG)
8664 return arm_address_register_rtx_p (ind, 0);
8666 return FALSE;
8669 /* Return true if X is a register that will be eliminated later on. */
8671 arm_eliminable_register (rtx x)
8673 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8674 || REGNO (x) == ARG_POINTER_REGNUM
8675 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8676 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8679 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8680 coprocessor registers. Otherwise return NO_REGS. */
8682 enum reg_class
8683 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8685 if (mode == HFmode)
8687 if (!TARGET_NEON_FP16)
8688 return GENERAL_REGS;
8689 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8690 return NO_REGS;
8691 return GENERAL_REGS;
8694 if (TARGET_NEON
8695 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8696 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8697 && neon_vector_mem_operand (x, 0))
8698 return NO_REGS;
8700 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8701 return NO_REGS;
8703 return GENERAL_REGS;
8706 /* Values which must be returned in the most-significant end of the return
8707 register. */
8709 static bool
8710 arm_return_in_msb (const_tree valtype)
8712 return (TARGET_AAPCS_BASED
8713 && BYTES_BIG_ENDIAN
8714 && (AGGREGATE_TYPE_P (valtype)
8715 || TREE_CODE (valtype) == COMPLEX_TYPE));
8718 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8719 Use by the Cirrus Maverick code which has to workaround
8720 a hardware bug triggered by such instructions. */
8721 static bool
8722 arm_memory_load_p (rtx insn)
8724 rtx body, lhs, rhs;;
8726 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8727 return false;
8729 body = PATTERN (insn);
8731 if (GET_CODE (body) != SET)
8732 return false;
8734 lhs = XEXP (body, 0);
8735 rhs = XEXP (body, 1);
8737 lhs = REG_OR_SUBREG_RTX (lhs);
8739 /* If the destination is not a general purpose
8740 register we do not have to worry. */
8741 if (GET_CODE (lhs) != REG
8742 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8743 return false;
8745 /* As well as loads from memory we also have to react
8746 to loads of invalid constants which will be turned
8747 into loads from the minipool. */
8748 return (GET_CODE (rhs) == MEM
8749 || GET_CODE (rhs) == SYMBOL_REF
8750 || note_invalid_constants (insn, -1, false));
8753 /* Return TRUE if INSN is a Cirrus instruction. */
8754 static bool
8755 arm_cirrus_insn_p (rtx insn)
8757 enum attr_cirrus attr;
8759 /* get_attr cannot accept USE or CLOBBER. */
8760 if (!insn
8761 || GET_CODE (insn) != INSN
8762 || GET_CODE (PATTERN (insn)) == USE
8763 || GET_CODE (PATTERN (insn)) == CLOBBER)
8764 return 0;
8766 attr = get_attr_cirrus (insn);
8768 return attr != CIRRUS_NOT;
8771 /* Cirrus reorg for invalid instruction combinations. */
8772 static void
8773 cirrus_reorg (rtx first)
8775 enum attr_cirrus attr;
8776 rtx body = PATTERN (first);
8777 rtx t;
8778 int nops;
8780 /* Any branch must be followed by 2 non Cirrus instructions. */
8781 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8783 nops = 0;
8784 t = next_nonnote_insn (first);
8786 if (arm_cirrus_insn_p (t))
8787 ++ nops;
8789 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8790 ++ nops;
8792 while (nops --)
8793 emit_insn_after (gen_nop (), first);
8795 return;
8798 /* (float (blah)) is in parallel with a clobber. */
8799 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8800 body = XVECEXP (body, 0, 0);
8802 if (GET_CODE (body) == SET)
8804 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8806 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8807 be followed by a non Cirrus insn. */
8808 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8810 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8811 emit_insn_after (gen_nop (), first);
8813 return;
8815 else if (arm_memory_load_p (first))
8817 unsigned int arm_regno;
8819 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8820 ldr/cfmv64hr combination where the Rd field is the same
8821 in both instructions must be split with a non Cirrus
8822 insn. Example:
8824 ldr r0, blah
8826 cfmvsr mvf0, r0. */
8828 /* Get Arm register number for ldr insn. */
8829 if (GET_CODE (lhs) == REG)
8830 arm_regno = REGNO (lhs);
8831 else
8833 gcc_assert (GET_CODE (rhs) == REG);
8834 arm_regno = REGNO (rhs);
8837 /* Next insn. */
8838 first = next_nonnote_insn (first);
8840 if (! arm_cirrus_insn_p (first))
8841 return;
8843 body = PATTERN (first);
8845 /* (float (blah)) is in parallel with a clobber. */
8846 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8847 body = XVECEXP (body, 0, 0);
8849 if (GET_CODE (body) == FLOAT)
8850 body = XEXP (body, 0);
8852 if (get_attr_cirrus (first) == CIRRUS_MOVE
8853 && GET_CODE (XEXP (body, 1)) == REG
8854 && arm_regno == REGNO (XEXP (body, 1)))
8855 emit_insn_after (gen_nop (), first);
8857 return;
8861 /* get_attr cannot accept USE or CLOBBER. */
8862 if (!first
8863 || GET_CODE (first) != INSN
8864 || GET_CODE (PATTERN (first)) == USE
8865 || GET_CODE (PATTERN (first)) == CLOBBER)
8866 return;
8868 attr = get_attr_cirrus (first);
8870 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8871 must be followed by a non-coprocessor instruction. */
8872 if (attr == CIRRUS_COMPARE)
8874 nops = 0;
8876 t = next_nonnote_insn (first);
8878 if (arm_cirrus_insn_p (t))
8879 ++ nops;
8881 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8882 ++ nops;
8884 while (nops --)
8885 emit_insn_after (gen_nop (), first);
8887 return;
8891 /* Return TRUE if X references a SYMBOL_REF. */
8893 symbol_mentioned_p (rtx x)
8895 const char * fmt;
8896 int i;
8898 if (GET_CODE (x) == SYMBOL_REF)
8899 return 1;
8901 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8902 are constant offsets, not symbols. */
8903 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8904 return 0;
8906 fmt = GET_RTX_FORMAT (GET_CODE (x));
8908 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8910 if (fmt[i] == 'E')
8912 int j;
8914 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8915 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8916 return 1;
8918 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8919 return 1;
8922 return 0;
8925 /* Return TRUE if X references a LABEL_REF. */
8927 label_mentioned_p (rtx x)
8929 const char * fmt;
8930 int i;
8932 if (GET_CODE (x) == LABEL_REF)
8933 return 1;
8935 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8936 instruction, but they are constant offsets, not symbols. */
8937 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8938 return 0;
8940 fmt = GET_RTX_FORMAT (GET_CODE (x));
8941 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8943 if (fmt[i] == 'E')
8945 int j;
8947 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8948 if (label_mentioned_p (XVECEXP (x, i, j)))
8949 return 1;
8951 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8952 return 1;
8955 return 0;
8959 tls_mentioned_p (rtx x)
8961 switch (GET_CODE (x))
8963 case CONST:
8964 return tls_mentioned_p (XEXP (x, 0));
8966 case UNSPEC:
8967 if (XINT (x, 1) == UNSPEC_TLS)
8968 return 1;
8970 default:
8971 return 0;
8975 /* Must not copy any rtx that uses a pc-relative address. */
8977 static int
8978 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
8980 if (GET_CODE (*x) == UNSPEC
8981 && XINT (*x, 1) == UNSPEC_PIC_BASE)
8982 return 1;
8983 return 0;
8986 static bool
8987 arm_cannot_copy_insn_p (rtx insn)
8989 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
8992 enum rtx_code
8993 minmax_code (rtx x)
8995 enum rtx_code code = GET_CODE (x);
8997 switch (code)
8999 case SMAX:
9000 return GE;
9001 case SMIN:
9002 return LE;
9003 case UMIN:
9004 return LEU;
9005 case UMAX:
9006 return GEU;
9007 default:
9008 gcc_unreachable ();
9012 /* Return 1 if memory locations are adjacent. */
9014 adjacent_mem_locations (rtx a, rtx b)
9016 /* We don't guarantee to preserve the order of these memory refs. */
9017 if (volatile_refs_p (a) || volatile_refs_p (b))
9018 return 0;
9020 if ((GET_CODE (XEXP (a, 0)) == REG
9021 || (GET_CODE (XEXP (a, 0)) == PLUS
9022 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9023 && (GET_CODE (XEXP (b, 0)) == REG
9024 || (GET_CODE (XEXP (b, 0)) == PLUS
9025 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9027 HOST_WIDE_INT val0 = 0, val1 = 0;
9028 rtx reg0, reg1;
9029 int val_diff;
9031 if (GET_CODE (XEXP (a, 0)) == PLUS)
9033 reg0 = XEXP (XEXP (a, 0), 0);
9034 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9036 else
9037 reg0 = XEXP (a, 0);
9039 if (GET_CODE (XEXP (b, 0)) == PLUS)
9041 reg1 = XEXP (XEXP (b, 0), 0);
9042 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9044 else
9045 reg1 = XEXP (b, 0);
9047 /* Don't accept any offset that will require multiple
9048 instructions to handle, since this would cause the
9049 arith_adjacentmem pattern to output an overlong sequence. */
9050 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9051 return 0;
9053 /* Don't allow an eliminable register: register elimination can make
9054 the offset too large. */
9055 if (arm_eliminable_register (reg0))
9056 return 0;
9058 val_diff = val1 - val0;
9060 if (arm_ld_sched)
9062 /* If the target has load delay slots, then there's no benefit
9063 to using an ldm instruction unless the offset is zero and
9064 we are optimizing for size. */
9065 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9066 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9067 && (val_diff == 4 || val_diff == -4));
9070 return ((REGNO (reg0) == REGNO (reg1))
9071 && (val_diff == 4 || val_diff == -4));
9074 return 0;
9078 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9079 HOST_WIDE_INT *load_offset)
9081 int unsorted_regs[4];
9082 HOST_WIDE_INT unsorted_offsets[4];
9083 int order[4];
9084 int base_reg = -1;
9085 int i;
9087 /* Can only handle 2, 3, or 4 insns at present,
9088 though could be easily extended if required. */
9089 gcc_assert (nops >= 2 && nops <= 4);
9091 memset (order, 0, 4 * sizeof (int));
9093 /* Loop over the operands and check that the memory references are
9094 suitable (i.e. immediate offsets from the same base register). At
9095 the same time, extract the target register, and the memory
9096 offsets. */
9097 for (i = 0; i < nops; i++)
9099 rtx reg;
9100 rtx offset;
9102 /* Convert a subreg of a mem into the mem itself. */
9103 if (GET_CODE (operands[nops + i]) == SUBREG)
9104 operands[nops + i] = alter_subreg (operands + (nops + i));
9106 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9108 /* Don't reorder volatile memory references; it doesn't seem worth
9109 looking for the case where the order is ok anyway. */
9110 if (MEM_VOLATILE_P (operands[nops + i]))
9111 return 0;
9113 offset = const0_rtx;
9115 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9116 || (GET_CODE (reg) == SUBREG
9117 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9118 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9119 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9120 == REG)
9121 || (GET_CODE (reg) == SUBREG
9122 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9123 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9124 == CONST_INT)))
9126 if (i == 0)
9128 base_reg = REGNO (reg);
9129 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
9130 ? REGNO (operands[i])
9131 : REGNO (SUBREG_REG (operands[i])));
9132 order[0] = 0;
9134 else
9136 if (base_reg != (int) REGNO (reg))
9137 /* Not addressed from the same base register. */
9138 return 0;
9140 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9141 ? REGNO (operands[i])
9142 : REGNO (SUBREG_REG (operands[i])));
9143 if (unsorted_regs[i] < unsorted_regs[order[0]])
9144 order[0] = i;
9147 /* If it isn't an integer register, or if it overwrites the
9148 base register but isn't the last insn in the list, then
9149 we can't do this. */
9150 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9151 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9152 return 0;
9154 unsorted_offsets[i] = INTVAL (offset);
9156 else
9157 /* Not a suitable memory address. */
9158 return 0;
9161 /* All the useful information has now been extracted from the
9162 operands into unsorted_regs and unsorted_offsets; additionally,
9163 order[0] has been set to the lowest numbered register in the
9164 list. Sort the registers into order, and check that the memory
9165 offsets are ascending and adjacent. */
9167 for (i = 1; i < nops; i++)
9169 int j;
9171 order[i] = order[i - 1];
9172 for (j = 0; j < nops; j++)
9173 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9174 && (order[i] == order[i - 1]
9175 || unsorted_regs[j] < unsorted_regs[order[i]]))
9176 order[i] = j;
9178 /* Have we found a suitable register? if not, one must be used more
9179 than once. */
9180 if (order[i] == order[i - 1])
9181 return 0;
9183 /* Is the memory address adjacent and ascending? */
9184 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9185 return 0;
9188 if (base)
9190 *base = base_reg;
9192 for (i = 0; i < nops; i++)
9193 regs[i] = unsorted_regs[order[i]];
9195 *load_offset = unsorted_offsets[order[0]];
9198 if (unsorted_offsets[order[0]] == 0)
9199 return 1; /* ldmia */
9201 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9202 return 2; /* ldmib */
9204 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9205 return 3; /* ldmda */
9207 if (unsorted_offsets[order[nops - 1]] == -4)
9208 return 4; /* ldmdb */
9210 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9211 if the offset isn't small enough. The reason 2 ldrs are faster
9212 is because these ARMs are able to do more than one cache access
9213 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9214 whilst the ARM8 has a double bandwidth cache. This means that
9215 these cores can do both an instruction fetch and a data fetch in
9216 a single cycle, so the trick of calculating the address into a
9217 scratch register (one of the result regs) and then doing a load
9218 multiple actually becomes slower (and no smaller in code size).
9219 That is the transformation
9221 ldr rd1, [rbase + offset]
9222 ldr rd2, [rbase + offset + 4]
9226 add rd1, rbase, offset
9227 ldmia rd1, {rd1, rd2}
9229 produces worse code -- '3 cycles + any stalls on rd2' instead of
9230 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9231 access per cycle, the first sequence could never complete in less
9232 than 6 cycles, whereas the ldm sequence would only take 5 and
9233 would make better use of sequential accesses if not hitting the
9234 cache.
9236 We cheat here and test 'arm_ld_sched' which we currently know to
9237 only be true for the ARM8, ARM9 and StrongARM. If this ever
9238 changes, then the test below needs to be reworked. */
9239 if (nops == 2 && arm_ld_sched)
9240 return 0;
9242 /* Can't do it without setting up the offset, only do this if it takes
9243 no more than one insn. */
9244 return (const_ok_for_arm (unsorted_offsets[order[0]])
9245 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
9248 const char *
9249 emit_ldm_seq (rtx *operands, int nops)
9251 int regs[4];
9252 int base_reg;
9253 HOST_WIDE_INT offset;
9254 char buf[100];
9255 int i;
9257 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9259 case 1:
9260 strcpy (buf, "ldm%(ia%)\t");
9261 break;
9263 case 2:
9264 strcpy (buf, "ldm%(ib%)\t");
9265 break;
9267 case 3:
9268 strcpy (buf, "ldm%(da%)\t");
9269 break;
9271 case 4:
9272 strcpy (buf, "ldm%(db%)\t");
9273 break;
9275 case 5:
9276 if (offset >= 0)
9277 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9278 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9279 (long) offset);
9280 else
9281 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9282 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9283 (long) -offset);
9284 output_asm_insn (buf, operands);
9285 base_reg = regs[0];
9286 strcpy (buf, "ldm%(ia%)\t");
9287 break;
9289 default:
9290 gcc_unreachable ();
9293 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9294 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9296 for (i = 1; i < nops; i++)
9297 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9298 reg_names[regs[i]]);
9300 strcat (buf, "}\t%@ phole ldm");
9302 output_asm_insn (buf, operands);
9303 return "";
9307 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9308 HOST_WIDE_INT * load_offset)
9310 int unsorted_regs[4];
9311 HOST_WIDE_INT unsorted_offsets[4];
9312 int order[4];
9313 int base_reg = -1;
9314 int i;
9316 /* Can only handle 2, 3, or 4 insns at present, though could be easily
9317 extended if required. */
9318 gcc_assert (nops >= 2 && nops <= 4);
9320 memset (order, 0, 4 * sizeof (int));
9322 /* Loop over the operands and check that the memory references are
9323 suitable (i.e. immediate offsets from the same base register). At
9324 the same time, extract the target register, and the memory
9325 offsets. */
9326 for (i = 0; i < nops; i++)
9328 rtx reg;
9329 rtx offset;
9331 /* Convert a subreg of a mem into the mem itself. */
9332 if (GET_CODE (operands[nops + i]) == SUBREG)
9333 operands[nops + i] = alter_subreg (operands + (nops + i));
9335 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9337 /* Don't reorder volatile memory references; it doesn't seem worth
9338 looking for the case where the order is ok anyway. */
9339 if (MEM_VOLATILE_P (operands[nops + i]))
9340 return 0;
9342 offset = const0_rtx;
9344 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9345 || (GET_CODE (reg) == SUBREG
9346 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9347 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9348 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9349 == REG)
9350 || (GET_CODE (reg) == SUBREG
9351 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9352 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9353 == CONST_INT)))
9355 if (i == 0)
9357 base_reg = REGNO (reg);
9358 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
9359 ? REGNO (operands[i])
9360 : REGNO (SUBREG_REG (operands[i])));
9361 order[0] = 0;
9363 else
9365 if (base_reg != (int) REGNO (reg))
9366 /* Not addressed from the same base register. */
9367 return 0;
9369 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9370 ? REGNO (operands[i])
9371 : REGNO (SUBREG_REG (operands[i])));
9372 if (unsorted_regs[i] < unsorted_regs[order[0]])
9373 order[0] = i;
9376 /* If it isn't an integer register, then we can't do this. */
9377 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9378 return 0;
9380 unsorted_offsets[i] = INTVAL (offset);
9382 else
9383 /* Not a suitable memory address. */
9384 return 0;
9387 /* All the useful information has now been extracted from the
9388 operands into unsorted_regs and unsorted_offsets; additionally,
9389 order[0] has been set to the lowest numbered register in the
9390 list. Sort the registers into order, and check that the memory
9391 offsets are ascending and adjacent. */
9393 for (i = 1; i < nops; i++)
9395 int j;
9397 order[i] = order[i - 1];
9398 for (j = 0; j < nops; j++)
9399 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9400 && (order[i] == order[i - 1]
9401 || unsorted_regs[j] < unsorted_regs[order[i]]))
9402 order[i] = j;
9404 /* Have we found a suitable register? if not, one must be used more
9405 than once. */
9406 if (order[i] == order[i - 1])
9407 return 0;
9409 /* Is the memory address adjacent and ascending? */
9410 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9411 return 0;
9414 if (base)
9416 *base = base_reg;
9418 for (i = 0; i < nops; i++)
9419 regs[i] = unsorted_regs[order[i]];
9421 *load_offset = unsorted_offsets[order[0]];
9424 if (unsorted_offsets[order[0]] == 0)
9425 return 1; /* stmia */
9427 if (unsorted_offsets[order[0]] == 4)
9428 return 2; /* stmib */
9430 if (unsorted_offsets[order[nops - 1]] == 0)
9431 return 3; /* stmda */
9433 if (unsorted_offsets[order[nops - 1]] == -4)
9434 return 4; /* stmdb */
9436 return 0;
9439 const char *
9440 emit_stm_seq (rtx *operands, int nops)
9442 int regs[4];
9443 int base_reg;
9444 HOST_WIDE_INT offset;
9445 char buf[100];
9446 int i;
9448 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9450 case 1:
9451 strcpy (buf, "stm%(ia%)\t");
9452 break;
9454 case 2:
9455 strcpy (buf, "stm%(ib%)\t");
9456 break;
9458 case 3:
9459 strcpy (buf, "stm%(da%)\t");
9460 break;
9462 case 4:
9463 strcpy (buf, "stm%(db%)\t");
9464 break;
9466 default:
9467 gcc_unreachable ();
9470 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9471 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9473 for (i = 1; i < nops; i++)
9474 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9475 reg_names[regs[i]]);
9477 strcat (buf, "}\t%@ phole stm");
9479 output_asm_insn (buf, operands);
9480 return "";
9483 /* Routines for use in generating RTL. */
9486 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9487 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9489 HOST_WIDE_INT offset = *offsetp;
9490 int i = 0, j;
9491 rtx result;
9492 int sign = up ? 1 : -1;
9493 rtx mem, addr;
9495 /* XScale has load-store double instructions, but they have stricter
9496 alignment requirements than load-store multiple, so we cannot
9497 use them.
9499 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9500 the pipeline until completion.
9502 NREGS CYCLES
9508 An ldr instruction takes 1-3 cycles, but does not block the
9509 pipeline.
9511 NREGS CYCLES
9512 1 1-3
9513 2 2-6
9514 3 3-9
9515 4 4-12
9517 Best case ldr will always win. However, the more ldr instructions
9518 we issue, the less likely we are to be able to schedule them well.
9519 Using ldr instructions also increases code size.
9521 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9522 for counts of 3 or 4 regs. */
9523 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9525 rtx seq;
9527 start_sequence ();
9529 for (i = 0; i < count; i++)
9531 addr = plus_constant (from, i * 4 * sign);
9532 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9533 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9534 offset += 4 * sign;
9537 if (write_back)
9539 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9540 *offsetp = offset;
9543 seq = get_insns ();
9544 end_sequence ();
9546 return seq;
9549 result = gen_rtx_PARALLEL (VOIDmode,
9550 rtvec_alloc (count + (write_back ? 1 : 0)));
9551 if (write_back)
9553 XVECEXP (result, 0, 0)
9554 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9555 i = 1;
9556 count++;
9559 for (j = 0; i < count; i++, j++)
9561 addr = plus_constant (from, j * 4 * sign);
9562 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9563 XVECEXP (result, 0, i)
9564 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9565 offset += 4 * sign;
9568 if (write_back)
9569 *offsetp = offset;
9571 return result;
9575 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9576 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9578 HOST_WIDE_INT offset = *offsetp;
9579 int i = 0, j;
9580 rtx result;
9581 int sign = up ? 1 : -1;
9582 rtx mem, addr;
9584 /* See arm_gen_load_multiple for discussion of
9585 the pros/cons of ldm/stm usage for XScale. */
9586 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9588 rtx seq;
9590 start_sequence ();
9592 for (i = 0; i < count; i++)
9594 addr = plus_constant (to, i * 4 * sign);
9595 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9596 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9597 offset += 4 * sign;
9600 if (write_back)
9602 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9603 *offsetp = offset;
9606 seq = get_insns ();
9607 end_sequence ();
9609 return seq;
9612 result = gen_rtx_PARALLEL (VOIDmode,
9613 rtvec_alloc (count + (write_back ? 1 : 0)));
9614 if (write_back)
9616 XVECEXP (result, 0, 0)
9617 = gen_rtx_SET (VOIDmode, to,
9618 plus_constant (to, count * 4 * sign));
9619 i = 1;
9620 count++;
9623 for (j = 0; i < count; i++, j++)
9625 addr = plus_constant (to, j * 4 * sign);
9626 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9627 XVECEXP (result, 0, i)
9628 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9629 offset += 4 * sign;
9632 if (write_back)
9633 *offsetp = offset;
9635 return result;
9639 arm_gen_movmemqi (rtx *operands)
9641 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9642 HOST_WIDE_INT srcoffset, dstoffset;
9643 int i;
9644 rtx src, dst, srcbase, dstbase;
9645 rtx part_bytes_reg = NULL;
9646 rtx mem;
9648 if (GET_CODE (operands[2]) != CONST_INT
9649 || GET_CODE (operands[3]) != CONST_INT
9650 || INTVAL (operands[2]) > 64
9651 || INTVAL (operands[3]) & 3)
9652 return 0;
9654 dstbase = operands[0];
9655 srcbase = operands[1];
9657 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9658 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9660 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9661 out_words_to_go = INTVAL (operands[2]) / 4;
9662 last_bytes = INTVAL (operands[2]) & 3;
9663 dstoffset = srcoffset = 0;
9665 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9666 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9668 for (i = 0; in_words_to_go >= 2; i+=4)
9670 if (in_words_to_go > 4)
9671 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9672 srcbase, &srcoffset));
9673 else
9674 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9675 FALSE, srcbase, &srcoffset));
9677 if (out_words_to_go)
9679 if (out_words_to_go > 4)
9680 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9681 dstbase, &dstoffset));
9682 else if (out_words_to_go != 1)
9683 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9684 dst, TRUE,
9685 (last_bytes == 0
9686 ? FALSE : TRUE),
9687 dstbase, &dstoffset));
9688 else
9690 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9691 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9692 if (last_bytes != 0)
9694 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9695 dstoffset += 4;
9700 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9701 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9704 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9705 if (out_words_to_go)
9707 rtx sreg;
9709 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9710 sreg = copy_to_reg (mem);
9712 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9713 emit_move_insn (mem, sreg);
9714 in_words_to_go--;
9716 gcc_assert (!in_words_to_go); /* Sanity check */
9719 if (in_words_to_go)
9721 gcc_assert (in_words_to_go > 0);
9723 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9724 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9727 gcc_assert (!last_bytes || part_bytes_reg);
9729 if (BYTES_BIG_ENDIAN && last_bytes)
9731 rtx tmp = gen_reg_rtx (SImode);
9733 /* The bytes we want are in the top end of the word. */
9734 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9735 GEN_INT (8 * (4 - last_bytes))));
9736 part_bytes_reg = tmp;
9738 while (last_bytes)
9740 mem = adjust_automodify_address (dstbase, QImode,
9741 plus_constant (dst, last_bytes - 1),
9742 dstoffset + last_bytes - 1);
9743 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9745 if (--last_bytes)
9747 tmp = gen_reg_rtx (SImode);
9748 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9749 part_bytes_reg = tmp;
9754 else
9756 if (last_bytes > 1)
9758 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9759 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9760 last_bytes -= 2;
9761 if (last_bytes)
9763 rtx tmp = gen_reg_rtx (SImode);
9764 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9765 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9766 part_bytes_reg = tmp;
9767 dstoffset += 2;
9771 if (last_bytes)
9773 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9774 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9778 return 1;
9781 /* Select a dominance comparison mode if possible for a test of the general
9782 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9783 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9784 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9785 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9786 In all cases OP will be either EQ or NE, but we don't need to know which
9787 here. If we are unable to support a dominance comparison we return
9788 CC mode. This will then fail to match for the RTL expressions that
9789 generate this call. */
9790 enum machine_mode
9791 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9793 enum rtx_code cond1, cond2;
9794 int swapped = 0;
9796 /* Currently we will probably get the wrong result if the individual
9797 comparisons are not simple. This also ensures that it is safe to
9798 reverse a comparison if necessary. */
9799 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9800 != CCmode)
9801 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9802 != CCmode))
9803 return CCmode;
9805 /* The if_then_else variant of this tests the second condition if the
9806 first passes, but is true if the first fails. Reverse the first
9807 condition to get a true "inclusive-or" expression. */
9808 if (cond_or == DOM_CC_NX_OR_Y)
9809 cond1 = reverse_condition (cond1);
9811 /* If the comparisons are not equal, and one doesn't dominate the other,
9812 then we can't do this. */
9813 if (cond1 != cond2
9814 && !comparison_dominates_p (cond1, cond2)
9815 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9816 return CCmode;
9818 if (swapped)
9820 enum rtx_code temp = cond1;
9821 cond1 = cond2;
9822 cond2 = temp;
9825 switch (cond1)
9827 case EQ:
9828 if (cond_or == DOM_CC_X_AND_Y)
9829 return CC_DEQmode;
9831 switch (cond2)
9833 case EQ: return CC_DEQmode;
9834 case LE: return CC_DLEmode;
9835 case LEU: return CC_DLEUmode;
9836 case GE: return CC_DGEmode;
9837 case GEU: return CC_DGEUmode;
9838 default: gcc_unreachable ();
9841 case LT:
9842 if (cond_or == DOM_CC_X_AND_Y)
9843 return CC_DLTmode;
9845 switch (cond2)
9847 case LT:
9848 return CC_DLTmode;
9849 case LE:
9850 return CC_DLEmode;
9851 case NE:
9852 return CC_DNEmode;
9853 default:
9854 gcc_unreachable ();
9857 case GT:
9858 if (cond_or == DOM_CC_X_AND_Y)
9859 return CC_DGTmode;
9861 switch (cond2)
9863 case GT:
9864 return CC_DGTmode;
9865 case GE:
9866 return CC_DGEmode;
9867 case NE:
9868 return CC_DNEmode;
9869 default:
9870 gcc_unreachable ();
9873 case LTU:
9874 if (cond_or == DOM_CC_X_AND_Y)
9875 return CC_DLTUmode;
9877 switch (cond2)
9879 case LTU:
9880 return CC_DLTUmode;
9881 case LEU:
9882 return CC_DLEUmode;
9883 case NE:
9884 return CC_DNEmode;
9885 default:
9886 gcc_unreachable ();
9889 case GTU:
9890 if (cond_or == DOM_CC_X_AND_Y)
9891 return CC_DGTUmode;
9893 switch (cond2)
9895 case GTU:
9896 return CC_DGTUmode;
9897 case GEU:
9898 return CC_DGEUmode;
9899 case NE:
9900 return CC_DNEmode;
9901 default:
9902 gcc_unreachable ();
9905 /* The remaining cases only occur when both comparisons are the
9906 same. */
9907 case NE:
9908 gcc_assert (cond1 == cond2);
9909 return CC_DNEmode;
9911 case LE:
9912 gcc_assert (cond1 == cond2);
9913 return CC_DLEmode;
9915 case GE:
9916 gcc_assert (cond1 == cond2);
9917 return CC_DGEmode;
9919 case LEU:
9920 gcc_assert (cond1 == cond2);
9921 return CC_DLEUmode;
9923 case GEU:
9924 gcc_assert (cond1 == cond2);
9925 return CC_DGEUmode;
9927 default:
9928 gcc_unreachable ();
9932 enum machine_mode
9933 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9935 /* All floating point compares return CCFP if it is an equality
9936 comparison, and CCFPE otherwise. */
9937 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9939 switch (op)
9941 case EQ:
9942 case NE:
9943 case UNORDERED:
9944 case ORDERED:
9945 case UNLT:
9946 case UNLE:
9947 case UNGT:
9948 case UNGE:
9949 case UNEQ:
9950 case LTGT:
9951 return CCFPmode;
9953 case LT:
9954 case LE:
9955 case GT:
9956 case GE:
9957 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9958 return CCFPmode;
9959 return CCFPEmode;
9961 default:
9962 gcc_unreachable ();
9966 /* A compare with a shifted operand. Because of canonicalization, the
9967 comparison will have to be swapped when we emit the assembler. */
9968 if (GET_MODE (y) == SImode
9969 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9970 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9971 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9972 || GET_CODE (x) == ROTATERT))
9973 return CC_SWPmode;
9975 /* This operation is performed swapped, but since we only rely on the Z
9976 flag we don't need an additional mode. */
9977 if (GET_MODE (y) == SImode
9978 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9979 && GET_CODE (x) == NEG
9980 && (op == EQ || op == NE))
9981 return CC_Zmode;
9983 /* This is a special case that is used by combine to allow a
9984 comparison of a shifted byte load to be split into a zero-extend
9985 followed by a comparison of the shifted integer (only valid for
9986 equalities and unsigned inequalities). */
9987 if (GET_MODE (x) == SImode
9988 && GET_CODE (x) == ASHIFT
9989 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9990 && GET_CODE (XEXP (x, 0)) == SUBREG
9991 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9992 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9993 && (op == EQ || op == NE
9994 || op == GEU || op == GTU || op == LTU || op == LEU)
9995 && GET_CODE (y) == CONST_INT)
9996 return CC_Zmode;
9998 /* A construct for a conditional compare, if the false arm contains
9999 0, then both conditions must be true, otherwise either condition
10000 must be true. Not all conditions are possible, so CCmode is
10001 returned if it can't be done. */
10002 if (GET_CODE (x) == IF_THEN_ELSE
10003 && (XEXP (x, 2) == const0_rtx
10004 || XEXP (x, 2) == const1_rtx)
10005 && COMPARISON_P (XEXP (x, 0))
10006 && COMPARISON_P (XEXP (x, 1)))
10007 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10008 INTVAL (XEXP (x, 2)));
10010 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10011 if (GET_CODE (x) == AND
10012 && COMPARISON_P (XEXP (x, 0))
10013 && COMPARISON_P (XEXP (x, 1)))
10014 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10015 DOM_CC_X_AND_Y);
10017 if (GET_CODE (x) == IOR
10018 && COMPARISON_P (XEXP (x, 0))
10019 && COMPARISON_P (XEXP (x, 1)))
10020 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10021 DOM_CC_X_OR_Y);
10023 /* An operation (on Thumb) where we want to test for a single bit.
10024 This is done by shifting that bit up into the top bit of a
10025 scratch register; we can then branch on the sign bit. */
10026 if (TARGET_THUMB1
10027 && GET_MODE (x) == SImode
10028 && (op == EQ || op == NE)
10029 && GET_CODE (x) == ZERO_EXTRACT
10030 && XEXP (x, 1) == const1_rtx)
10031 return CC_Nmode;
10033 /* An operation that sets the condition codes as a side-effect, the
10034 V flag is not set correctly, so we can only use comparisons where
10035 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10036 instead.) */
10037 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10038 if (GET_MODE (x) == SImode
10039 && y == const0_rtx
10040 && (op == EQ || op == NE || op == LT || op == GE)
10041 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10042 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10043 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10044 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10045 || GET_CODE (x) == LSHIFTRT
10046 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10047 || GET_CODE (x) == ROTATERT
10048 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10049 return CC_NOOVmode;
10051 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10052 return CC_Zmode;
10054 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10055 && GET_CODE (x) == PLUS
10056 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10057 return CC_Cmode;
10059 return CCmode;
10062 /* X and Y are two things to compare using CODE. Emit the compare insn and
10063 return the rtx for register 0 in the proper mode. FP means this is a
10064 floating point compare: I don't think that it is needed on the arm. */
10066 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10068 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
10069 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10071 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10073 return cc_reg;
10076 /* Generate a sequence of insns that will generate the correct return
10077 address mask depending on the physical architecture that the program
10078 is running on. */
10080 arm_gen_return_addr_mask (void)
10082 rtx reg = gen_reg_rtx (Pmode);
10084 emit_insn (gen_return_addr_mask (reg));
10085 return reg;
10088 void
10089 arm_reload_in_hi (rtx *operands)
10091 rtx ref = operands[1];
10092 rtx base, scratch;
10093 HOST_WIDE_INT offset = 0;
10095 if (GET_CODE (ref) == SUBREG)
10097 offset = SUBREG_BYTE (ref);
10098 ref = SUBREG_REG (ref);
10101 if (GET_CODE (ref) == REG)
10103 /* We have a pseudo which has been spilt onto the stack; there
10104 are two cases here: the first where there is a simple
10105 stack-slot replacement and a second where the stack-slot is
10106 out of range, or is used as a subreg. */
10107 if (reg_equiv_mem[REGNO (ref)])
10109 ref = reg_equiv_mem[REGNO (ref)];
10110 base = find_replacement (&XEXP (ref, 0));
10112 else
10113 /* The slot is out of range, or was dressed up in a SUBREG. */
10114 base = reg_equiv_address[REGNO (ref)];
10116 else
10117 base = find_replacement (&XEXP (ref, 0));
10119 /* Handle the case where the address is too complex to be offset by 1. */
10120 if (GET_CODE (base) == MINUS
10121 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10123 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10125 emit_set_insn (base_plus, base);
10126 base = base_plus;
10128 else if (GET_CODE (base) == PLUS)
10130 /* The addend must be CONST_INT, or we would have dealt with it above. */
10131 HOST_WIDE_INT hi, lo;
10133 offset += INTVAL (XEXP (base, 1));
10134 base = XEXP (base, 0);
10136 /* Rework the address into a legal sequence of insns. */
10137 /* Valid range for lo is -4095 -> 4095 */
10138 lo = (offset >= 0
10139 ? (offset & 0xfff)
10140 : -((-offset) & 0xfff));
10142 /* Corner case, if lo is the max offset then we would be out of range
10143 once we have added the additional 1 below, so bump the msb into the
10144 pre-loading insn(s). */
10145 if (lo == 4095)
10146 lo &= 0x7ff;
10148 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10149 ^ (HOST_WIDE_INT) 0x80000000)
10150 - (HOST_WIDE_INT) 0x80000000);
10152 gcc_assert (hi + lo == offset);
10154 if (hi != 0)
10156 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10158 /* Get the base address; addsi3 knows how to handle constants
10159 that require more than one insn. */
10160 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10161 base = base_plus;
10162 offset = lo;
10166 /* Operands[2] may overlap operands[0] (though it won't overlap
10167 operands[1]), that's why we asked for a DImode reg -- so we can
10168 use the bit that does not overlap. */
10169 if (REGNO (operands[2]) == REGNO (operands[0]))
10170 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10171 else
10172 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10174 emit_insn (gen_zero_extendqisi2 (scratch,
10175 gen_rtx_MEM (QImode,
10176 plus_constant (base,
10177 offset))));
10178 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10179 gen_rtx_MEM (QImode,
10180 plus_constant (base,
10181 offset + 1))));
10182 if (!BYTES_BIG_ENDIAN)
10183 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10184 gen_rtx_IOR (SImode,
10185 gen_rtx_ASHIFT
10186 (SImode,
10187 gen_rtx_SUBREG (SImode, operands[0], 0),
10188 GEN_INT (8)),
10189 scratch));
10190 else
10191 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10192 gen_rtx_IOR (SImode,
10193 gen_rtx_ASHIFT (SImode, scratch,
10194 GEN_INT (8)),
10195 gen_rtx_SUBREG (SImode, operands[0], 0)));
10198 /* Handle storing a half-word to memory during reload by synthesizing as two
10199 byte stores. Take care not to clobber the input values until after we
10200 have moved them somewhere safe. This code assumes that if the DImode
10201 scratch in operands[2] overlaps either the input value or output address
10202 in some way, then that value must die in this insn (we absolutely need
10203 two scratch registers for some corner cases). */
10204 void
10205 arm_reload_out_hi (rtx *operands)
10207 rtx ref = operands[0];
10208 rtx outval = operands[1];
10209 rtx base, scratch;
10210 HOST_WIDE_INT offset = 0;
10212 if (GET_CODE (ref) == SUBREG)
10214 offset = SUBREG_BYTE (ref);
10215 ref = SUBREG_REG (ref);
10218 if (GET_CODE (ref) == REG)
10220 /* We have a pseudo which has been spilt onto the stack; there
10221 are two cases here: the first where there is a simple
10222 stack-slot replacement and a second where the stack-slot is
10223 out of range, or is used as a subreg. */
10224 if (reg_equiv_mem[REGNO (ref)])
10226 ref = reg_equiv_mem[REGNO (ref)];
10227 base = find_replacement (&XEXP (ref, 0));
10229 else
10230 /* The slot is out of range, or was dressed up in a SUBREG. */
10231 base = reg_equiv_address[REGNO (ref)];
10233 else
10234 base = find_replacement (&XEXP (ref, 0));
10236 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10238 /* Handle the case where the address is too complex to be offset by 1. */
10239 if (GET_CODE (base) == MINUS
10240 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10242 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10244 /* Be careful not to destroy OUTVAL. */
10245 if (reg_overlap_mentioned_p (base_plus, outval))
10247 /* Updating base_plus might destroy outval, see if we can
10248 swap the scratch and base_plus. */
10249 if (!reg_overlap_mentioned_p (scratch, outval))
10251 rtx tmp = scratch;
10252 scratch = base_plus;
10253 base_plus = tmp;
10255 else
10257 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10259 /* Be conservative and copy OUTVAL into the scratch now,
10260 this should only be necessary if outval is a subreg
10261 of something larger than a word. */
10262 /* XXX Might this clobber base? I can't see how it can,
10263 since scratch is known to overlap with OUTVAL, and
10264 must be wider than a word. */
10265 emit_insn (gen_movhi (scratch_hi, outval));
10266 outval = scratch_hi;
10270 emit_set_insn (base_plus, base);
10271 base = base_plus;
10273 else if (GET_CODE (base) == PLUS)
10275 /* The addend must be CONST_INT, or we would have dealt with it above. */
10276 HOST_WIDE_INT hi, lo;
10278 offset += INTVAL (XEXP (base, 1));
10279 base = XEXP (base, 0);
10281 /* Rework the address into a legal sequence of insns. */
10282 /* Valid range for lo is -4095 -> 4095 */
10283 lo = (offset >= 0
10284 ? (offset & 0xfff)
10285 : -((-offset) & 0xfff));
10287 /* Corner case, if lo is the max offset then we would be out of range
10288 once we have added the additional 1 below, so bump the msb into the
10289 pre-loading insn(s). */
10290 if (lo == 4095)
10291 lo &= 0x7ff;
10293 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10294 ^ (HOST_WIDE_INT) 0x80000000)
10295 - (HOST_WIDE_INT) 0x80000000);
10297 gcc_assert (hi + lo == offset);
10299 if (hi != 0)
10301 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10303 /* Be careful not to destroy OUTVAL. */
10304 if (reg_overlap_mentioned_p (base_plus, outval))
10306 /* Updating base_plus might destroy outval, see if we
10307 can swap the scratch and base_plus. */
10308 if (!reg_overlap_mentioned_p (scratch, outval))
10310 rtx tmp = scratch;
10311 scratch = base_plus;
10312 base_plus = tmp;
10314 else
10316 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10318 /* Be conservative and copy outval into scratch now,
10319 this should only be necessary if outval is a
10320 subreg of something larger than a word. */
10321 /* XXX Might this clobber base? I can't see how it
10322 can, since scratch is known to overlap with
10323 outval. */
10324 emit_insn (gen_movhi (scratch_hi, outval));
10325 outval = scratch_hi;
10329 /* Get the base address; addsi3 knows how to handle constants
10330 that require more than one insn. */
10331 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10332 base = base_plus;
10333 offset = lo;
10337 if (BYTES_BIG_ENDIAN)
10339 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10340 plus_constant (base, offset + 1)),
10341 gen_lowpart (QImode, outval)));
10342 emit_insn (gen_lshrsi3 (scratch,
10343 gen_rtx_SUBREG (SImode, outval, 0),
10344 GEN_INT (8)));
10345 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10346 gen_lowpart (QImode, scratch)));
10348 else
10350 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10351 gen_lowpart (QImode, outval)));
10352 emit_insn (gen_lshrsi3 (scratch,
10353 gen_rtx_SUBREG (SImode, outval, 0),
10354 GEN_INT (8)));
10355 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10356 plus_constant (base, offset + 1)),
10357 gen_lowpart (QImode, scratch)));
10361 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10362 (padded to the size of a word) should be passed in a register. */
10364 static bool
10365 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10367 if (TARGET_AAPCS_BASED)
10368 return must_pass_in_stack_var_size (mode, type);
10369 else
10370 return must_pass_in_stack_var_size_or_pad (mode, type);
10374 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10375 Return true if an argument passed on the stack should be padded upwards,
10376 i.e. if the least-significant byte has useful data.
10377 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10378 aggregate types are placed in the lowest memory address. */
10380 bool
10381 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10383 if (!TARGET_AAPCS_BASED)
10384 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10386 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10387 return false;
10389 return true;
10393 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10394 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10395 byte of the register has useful data, and return the opposite if the
10396 most significant byte does.
10397 For AAPCS, small aggregates and small complex types are always padded
10398 upwards. */
10400 bool
10401 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10402 tree type, int first ATTRIBUTE_UNUSED)
10404 if (TARGET_AAPCS_BASED
10405 && BYTES_BIG_ENDIAN
10406 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10407 && int_size_in_bytes (type) <= 4)
10408 return true;
10410 /* Otherwise, use default padding. */
10411 return !BYTES_BIG_ENDIAN;
10415 /* Print a symbolic form of X to the debug file, F. */
10416 static void
10417 arm_print_value (FILE *f, rtx x)
10419 switch (GET_CODE (x))
10421 case CONST_INT:
10422 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10423 return;
10425 case CONST_DOUBLE:
10426 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10427 return;
10429 case CONST_VECTOR:
10431 int i;
10433 fprintf (f, "<");
10434 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10436 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10437 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10438 fputc (',', f);
10440 fprintf (f, ">");
10442 return;
10444 case CONST_STRING:
10445 fprintf (f, "\"%s\"", XSTR (x, 0));
10446 return;
10448 case SYMBOL_REF:
10449 fprintf (f, "`%s'", XSTR (x, 0));
10450 return;
10452 case LABEL_REF:
10453 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10454 return;
10456 case CONST:
10457 arm_print_value (f, XEXP (x, 0));
10458 return;
10460 case PLUS:
10461 arm_print_value (f, XEXP (x, 0));
10462 fprintf (f, "+");
10463 arm_print_value (f, XEXP (x, 1));
10464 return;
10466 case PC:
10467 fprintf (f, "pc");
10468 return;
10470 default:
10471 fprintf (f, "????");
10472 return;
10476 /* Routines for manipulation of the constant pool. */
10478 /* Arm instructions cannot load a large constant directly into a
10479 register; they have to come from a pc relative load. The constant
10480 must therefore be placed in the addressable range of the pc
10481 relative load. Depending on the precise pc relative load
10482 instruction the range is somewhere between 256 bytes and 4k. This
10483 means that we often have to dump a constant inside a function, and
10484 generate code to branch around it.
10486 It is important to minimize this, since the branches will slow
10487 things down and make the code larger.
10489 Normally we can hide the table after an existing unconditional
10490 branch so that there is no interruption of the flow, but in the
10491 worst case the code looks like this:
10493 ldr rn, L1
10495 b L2
10496 align
10497 L1: .long value
10501 ldr rn, L3
10503 b L4
10504 align
10505 L3: .long value
10509 We fix this by performing a scan after scheduling, which notices
10510 which instructions need to have their operands fetched from the
10511 constant table and builds the table.
10513 The algorithm starts by building a table of all the constants that
10514 need fixing up and all the natural barriers in the function (places
10515 where a constant table can be dropped without breaking the flow).
10516 For each fixup we note how far the pc-relative replacement will be
10517 able to reach and the offset of the instruction into the function.
10519 Having built the table we then group the fixes together to form
10520 tables that are as large as possible (subject to addressing
10521 constraints) and emit each table of constants after the last
10522 barrier that is within range of all the instructions in the group.
10523 If a group does not contain a barrier, then we forcibly create one
10524 by inserting a jump instruction into the flow. Once the table has
10525 been inserted, the insns are then modified to reference the
10526 relevant entry in the pool.
10528 Possible enhancements to the algorithm (not implemented) are:
10530 1) For some processors and object formats, there may be benefit in
10531 aligning the pools to the start of cache lines; this alignment
10532 would need to be taken into account when calculating addressability
10533 of a pool. */
10535 /* These typedefs are located at the start of this file, so that
10536 they can be used in the prototypes there. This comment is to
10537 remind readers of that fact so that the following structures
10538 can be understood more easily.
10540 typedef struct minipool_node Mnode;
10541 typedef struct minipool_fixup Mfix; */
10543 struct minipool_node
10545 /* Doubly linked chain of entries. */
10546 Mnode * next;
10547 Mnode * prev;
10548 /* The maximum offset into the code that this entry can be placed. While
10549 pushing fixes for forward references, all entries are sorted in order
10550 of increasing max_address. */
10551 HOST_WIDE_INT max_address;
10552 /* Similarly for an entry inserted for a backwards ref. */
10553 HOST_WIDE_INT min_address;
10554 /* The number of fixes referencing this entry. This can become zero
10555 if we "unpush" an entry. In this case we ignore the entry when we
10556 come to emit the code. */
10557 int refcount;
10558 /* The offset from the start of the minipool. */
10559 HOST_WIDE_INT offset;
10560 /* The value in table. */
10561 rtx value;
10562 /* The mode of value. */
10563 enum machine_mode mode;
10564 /* The size of the value. With iWMMXt enabled
10565 sizes > 4 also imply an alignment of 8-bytes. */
10566 int fix_size;
10569 struct minipool_fixup
10571 Mfix * next;
10572 rtx insn;
10573 HOST_WIDE_INT address;
10574 rtx * loc;
10575 enum machine_mode mode;
10576 int fix_size;
10577 rtx value;
10578 Mnode * minipool;
10579 HOST_WIDE_INT forwards;
10580 HOST_WIDE_INT backwards;
10583 /* Fixes less than a word need padding out to a word boundary. */
10584 #define MINIPOOL_FIX_SIZE(mode) \
10585 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10587 static Mnode * minipool_vector_head;
10588 static Mnode * minipool_vector_tail;
10589 static rtx minipool_vector_label;
10590 static int minipool_pad;
10592 /* The linked list of all minipool fixes required for this function. */
10593 Mfix * minipool_fix_head;
10594 Mfix * minipool_fix_tail;
10595 /* The fix entry for the current minipool, once it has been placed. */
10596 Mfix * minipool_barrier;
10598 /* Determines if INSN is the start of a jump table. Returns the end
10599 of the TABLE or NULL_RTX. */
10600 static rtx
10601 is_jump_table (rtx insn)
10603 rtx table;
10605 if (GET_CODE (insn) == JUMP_INSN
10606 && JUMP_LABEL (insn) != NULL
10607 && ((table = next_real_insn (JUMP_LABEL (insn)))
10608 == next_real_insn (insn))
10609 && table != NULL
10610 && GET_CODE (table) == JUMP_INSN
10611 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10612 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10613 return table;
10615 return NULL_RTX;
10618 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10619 #define JUMP_TABLES_IN_TEXT_SECTION 0
10620 #endif
10622 static HOST_WIDE_INT
10623 get_jump_table_size (rtx insn)
10625 /* ADDR_VECs only take room if read-only data does into the text
10626 section. */
10627 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10629 rtx body = PATTERN (insn);
10630 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10631 HOST_WIDE_INT size;
10632 HOST_WIDE_INT modesize;
10634 modesize = GET_MODE_SIZE (GET_MODE (body));
10635 size = modesize * XVECLEN (body, elt);
10636 switch (modesize)
10638 case 1:
10639 /* Round up size of TBB table to a halfword boundary. */
10640 size = (size + 1) & ~(HOST_WIDE_INT)1;
10641 break;
10642 case 2:
10643 /* No padding necessary for TBH. */
10644 break;
10645 case 4:
10646 /* Add two bytes for alignment on Thumb. */
10647 if (TARGET_THUMB)
10648 size += 2;
10649 break;
10650 default:
10651 gcc_unreachable ();
10653 return size;
10656 return 0;
10659 /* Move a minipool fix MP from its current location to before MAX_MP.
10660 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10661 constraints may need updating. */
10662 static Mnode *
10663 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10664 HOST_WIDE_INT max_address)
10666 /* The code below assumes these are different. */
10667 gcc_assert (mp != max_mp);
10669 if (max_mp == NULL)
10671 if (max_address < mp->max_address)
10672 mp->max_address = max_address;
10674 else
10676 if (max_address > max_mp->max_address - mp->fix_size)
10677 mp->max_address = max_mp->max_address - mp->fix_size;
10678 else
10679 mp->max_address = max_address;
10681 /* Unlink MP from its current position. Since max_mp is non-null,
10682 mp->prev must be non-null. */
10683 mp->prev->next = mp->next;
10684 if (mp->next != NULL)
10685 mp->next->prev = mp->prev;
10686 else
10687 minipool_vector_tail = mp->prev;
10689 /* Re-insert it before MAX_MP. */
10690 mp->next = max_mp;
10691 mp->prev = max_mp->prev;
10692 max_mp->prev = mp;
10694 if (mp->prev != NULL)
10695 mp->prev->next = mp;
10696 else
10697 minipool_vector_head = mp;
10700 /* Save the new entry. */
10701 max_mp = mp;
10703 /* Scan over the preceding entries and adjust their addresses as
10704 required. */
10705 while (mp->prev != NULL
10706 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10708 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10709 mp = mp->prev;
10712 return max_mp;
10715 /* Add a constant to the minipool for a forward reference. Returns the
10716 node added or NULL if the constant will not fit in this pool. */
10717 static Mnode *
10718 add_minipool_forward_ref (Mfix *fix)
10720 /* If set, max_mp is the first pool_entry that has a lower
10721 constraint than the one we are trying to add. */
10722 Mnode * max_mp = NULL;
10723 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10724 Mnode * mp;
10726 /* If the minipool starts before the end of FIX->INSN then this FIX
10727 can not be placed into the current pool. Furthermore, adding the
10728 new constant pool entry may cause the pool to start FIX_SIZE bytes
10729 earlier. */
10730 if (minipool_vector_head &&
10731 (fix->address + get_attr_length (fix->insn)
10732 >= minipool_vector_head->max_address - fix->fix_size))
10733 return NULL;
10735 /* Scan the pool to see if a constant with the same value has
10736 already been added. While we are doing this, also note the
10737 location where we must insert the constant if it doesn't already
10738 exist. */
10739 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10741 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10742 && fix->mode == mp->mode
10743 && (GET_CODE (fix->value) != CODE_LABEL
10744 || (CODE_LABEL_NUMBER (fix->value)
10745 == CODE_LABEL_NUMBER (mp->value)))
10746 && rtx_equal_p (fix->value, mp->value))
10748 /* More than one fix references this entry. */
10749 mp->refcount++;
10750 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10753 /* Note the insertion point if necessary. */
10754 if (max_mp == NULL
10755 && mp->max_address > max_address)
10756 max_mp = mp;
10758 /* If we are inserting an 8-bytes aligned quantity and
10759 we have not already found an insertion point, then
10760 make sure that all such 8-byte aligned quantities are
10761 placed at the start of the pool. */
10762 if (ARM_DOUBLEWORD_ALIGN
10763 && max_mp == NULL
10764 && fix->fix_size >= 8
10765 && mp->fix_size < 8)
10767 max_mp = mp;
10768 max_address = mp->max_address;
10772 /* The value is not currently in the minipool, so we need to create
10773 a new entry for it. If MAX_MP is NULL, the entry will be put on
10774 the end of the list since the placement is less constrained than
10775 any existing entry. Otherwise, we insert the new fix before
10776 MAX_MP and, if necessary, adjust the constraints on the other
10777 entries. */
10778 mp = XNEW (Mnode);
10779 mp->fix_size = fix->fix_size;
10780 mp->mode = fix->mode;
10781 mp->value = fix->value;
10782 mp->refcount = 1;
10783 /* Not yet required for a backwards ref. */
10784 mp->min_address = -65536;
10786 if (max_mp == NULL)
10788 mp->max_address = max_address;
10789 mp->next = NULL;
10790 mp->prev = minipool_vector_tail;
10792 if (mp->prev == NULL)
10794 minipool_vector_head = mp;
10795 minipool_vector_label = gen_label_rtx ();
10797 else
10798 mp->prev->next = mp;
10800 minipool_vector_tail = mp;
10802 else
10804 if (max_address > max_mp->max_address - mp->fix_size)
10805 mp->max_address = max_mp->max_address - mp->fix_size;
10806 else
10807 mp->max_address = max_address;
10809 mp->next = max_mp;
10810 mp->prev = max_mp->prev;
10811 max_mp->prev = mp;
10812 if (mp->prev != NULL)
10813 mp->prev->next = mp;
10814 else
10815 minipool_vector_head = mp;
10818 /* Save the new entry. */
10819 max_mp = mp;
10821 /* Scan over the preceding entries and adjust their addresses as
10822 required. */
10823 while (mp->prev != NULL
10824 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10826 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10827 mp = mp->prev;
10830 return max_mp;
10833 static Mnode *
10834 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10835 HOST_WIDE_INT min_address)
10837 HOST_WIDE_INT offset;
10839 /* The code below assumes these are different. */
10840 gcc_assert (mp != min_mp);
10842 if (min_mp == NULL)
10844 if (min_address > mp->min_address)
10845 mp->min_address = min_address;
10847 else
10849 /* We will adjust this below if it is too loose. */
10850 mp->min_address = min_address;
10852 /* Unlink MP from its current position. Since min_mp is non-null,
10853 mp->next must be non-null. */
10854 mp->next->prev = mp->prev;
10855 if (mp->prev != NULL)
10856 mp->prev->next = mp->next;
10857 else
10858 minipool_vector_head = mp->next;
10860 /* Reinsert it after MIN_MP. */
10861 mp->prev = min_mp;
10862 mp->next = min_mp->next;
10863 min_mp->next = mp;
10864 if (mp->next != NULL)
10865 mp->next->prev = mp;
10866 else
10867 minipool_vector_tail = mp;
10870 min_mp = mp;
10872 offset = 0;
10873 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10875 mp->offset = offset;
10876 if (mp->refcount > 0)
10877 offset += mp->fix_size;
10879 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10880 mp->next->min_address = mp->min_address + mp->fix_size;
10883 return min_mp;
10886 /* Add a constant to the minipool for a backward reference. Returns the
10887 node added or NULL if the constant will not fit in this pool.
10889 Note that the code for insertion for a backwards reference can be
10890 somewhat confusing because the calculated offsets for each fix do
10891 not take into account the size of the pool (which is still under
10892 construction. */
10893 static Mnode *
10894 add_minipool_backward_ref (Mfix *fix)
10896 /* If set, min_mp is the last pool_entry that has a lower constraint
10897 than the one we are trying to add. */
10898 Mnode *min_mp = NULL;
10899 /* This can be negative, since it is only a constraint. */
10900 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10901 Mnode *mp;
10903 /* If we can't reach the current pool from this insn, or if we can't
10904 insert this entry at the end of the pool without pushing other
10905 fixes out of range, then we don't try. This ensures that we
10906 can't fail later on. */
10907 if (min_address >= minipool_barrier->address
10908 || (minipool_vector_tail->min_address + fix->fix_size
10909 >= minipool_barrier->address))
10910 return NULL;
10912 /* Scan the pool to see if a constant with the same value has
10913 already been added. While we are doing this, also note the
10914 location where we must insert the constant if it doesn't already
10915 exist. */
10916 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10918 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10919 && fix->mode == mp->mode
10920 && (GET_CODE (fix->value) != CODE_LABEL
10921 || (CODE_LABEL_NUMBER (fix->value)
10922 == CODE_LABEL_NUMBER (mp->value)))
10923 && rtx_equal_p (fix->value, mp->value)
10924 /* Check that there is enough slack to move this entry to the
10925 end of the table (this is conservative). */
10926 && (mp->max_address
10927 > (minipool_barrier->address
10928 + minipool_vector_tail->offset
10929 + minipool_vector_tail->fix_size)))
10931 mp->refcount++;
10932 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10935 if (min_mp != NULL)
10936 mp->min_address += fix->fix_size;
10937 else
10939 /* Note the insertion point if necessary. */
10940 if (mp->min_address < min_address)
10942 /* For now, we do not allow the insertion of 8-byte alignment
10943 requiring nodes anywhere but at the start of the pool. */
10944 if (ARM_DOUBLEWORD_ALIGN
10945 && fix->fix_size >= 8 && mp->fix_size < 8)
10946 return NULL;
10947 else
10948 min_mp = mp;
10950 else if (mp->max_address
10951 < minipool_barrier->address + mp->offset + fix->fix_size)
10953 /* Inserting before this entry would push the fix beyond
10954 its maximum address (which can happen if we have
10955 re-located a forwards fix); force the new fix to come
10956 after it. */
10957 if (ARM_DOUBLEWORD_ALIGN
10958 && fix->fix_size >= 8 && mp->fix_size < 8)
10959 return NULL;
10960 else
10962 min_mp = mp;
10963 min_address = mp->min_address + fix->fix_size;
10966 /* Do not insert a non-8-byte aligned quantity before 8-byte
10967 aligned quantities. */
10968 else if (ARM_DOUBLEWORD_ALIGN
10969 && fix->fix_size < 8
10970 && mp->fix_size >= 8)
10972 min_mp = mp;
10973 min_address = mp->min_address + fix->fix_size;
10978 /* We need to create a new entry. */
10979 mp = XNEW (Mnode);
10980 mp->fix_size = fix->fix_size;
10981 mp->mode = fix->mode;
10982 mp->value = fix->value;
10983 mp->refcount = 1;
10984 mp->max_address = minipool_barrier->address + 65536;
10986 mp->min_address = min_address;
10988 if (min_mp == NULL)
10990 mp->prev = NULL;
10991 mp->next = minipool_vector_head;
10993 if (mp->next == NULL)
10995 minipool_vector_tail = mp;
10996 minipool_vector_label = gen_label_rtx ();
10998 else
10999 mp->next->prev = mp;
11001 minipool_vector_head = mp;
11003 else
11005 mp->next = min_mp->next;
11006 mp->prev = min_mp;
11007 min_mp->next = mp;
11009 if (mp->next != NULL)
11010 mp->next->prev = mp;
11011 else
11012 minipool_vector_tail = mp;
11015 /* Save the new entry. */
11016 min_mp = mp;
11018 if (mp->prev)
11019 mp = mp->prev;
11020 else
11021 mp->offset = 0;
11023 /* Scan over the following entries and adjust their offsets. */
11024 while (mp->next != NULL)
11026 if (mp->next->min_address < mp->min_address + mp->fix_size)
11027 mp->next->min_address = mp->min_address + mp->fix_size;
11029 if (mp->refcount)
11030 mp->next->offset = mp->offset + mp->fix_size;
11031 else
11032 mp->next->offset = mp->offset;
11034 mp = mp->next;
11037 return min_mp;
11040 static void
11041 assign_minipool_offsets (Mfix *barrier)
11043 HOST_WIDE_INT offset = 0;
11044 Mnode *mp;
11046 minipool_barrier = barrier;
11048 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11050 mp->offset = offset;
11052 if (mp->refcount > 0)
11053 offset += mp->fix_size;
11057 /* Output the literal table */
11058 static void
11059 dump_minipool (rtx scan)
11061 Mnode * mp;
11062 Mnode * nmp;
11063 int align64 = 0;
11065 if (ARM_DOUBLEWORD_ALIGN)
11066 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11067 if (mp->refcount > 0 && mp->fix_size >= 8)
11069 align64 = 1;
11070 break;
11073 if (dump_file)
11074 fprintf (dump_file,
11075 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11076 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11078 scan = emit_label_after (gen_label_rtx (), scan);
11079 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11080 scan = emit_label_after (minipool_vector_label, scan);
11082 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11084 if (mp->refcount > 0)
11086 if (dump_file)
11088 fprintf (dump_file,
11089 ";; Offset %u, min %ld, max %ld ",
11090 (unsigned) mp->offset, (unsigned long) mp->min_address,
11091 (unsigned long) mp->max_address);
11092 arm_print_value (dump_file, mp->value);
11093 fputc ('\n', dump_file);
11096 switch (mp->fix_size)
11098 #ifdef HAVE_consttable_1
11099 case 1:
11100 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11101 break;
11103 #endif
11104 #ifdef HAVE_consttable_2
11105 case 2:
11106 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11107 break;
11109 #endif
11110 #ifdef HAVE_consttable_4
11111 case 4:
11112 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11113 break;
11115 #endif
11116 #ifdef HAVE_consttable_8
11117 case 8:
11118 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11119 break;
11121 #endif
11122 #ifdef HAVE_consttable_16
11123 case 16:
11124 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11125 break;
11127 #endif
11128 default:
11129 gcc_unreachable ();
11133 nmp = mp->next;
11134 free (mp);
11137 minipool_vector_head = minipool_vector_tail = NULL;
11138 scan = emit_insn_after (gen_consttable_end (), scan);
11139 scan = emit_barrier_after (scan);
11142 /* Return the cost of forcibly inserting a barrier after INSN. */
11143 static int
11144 arm_barrier_cost (rtx insn)
11146 /* Basing the location of the pool on the loop depth is preferable,
11147 but at the moment, the basic block information seems to be
11148 corrupt by this stage of the compilation. */
11149 int base_cost = 50;
11150 rtx next = next_nonnote_insn (insn);
11152 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11153 base_cost -= 20;
11155 switch (GET_CODE (insn))
11157 case CODE_LABEL:
11158 /* It will always be better to place the table before the label, rather
11159 than after it. */
11160 return 50;
11162 case INSN:
11163 case CALL_INSN:
11164 return base_cost;
11166 case JUMP_INSN:
11167 return base_cost - 10;
11169 default:
11170 return base_cost + 10;
11174 /* Find the best place in the insn stream in the range
11175 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11176 Create the barrier by inserting a jump and add a new fix entry for
11177 it. */
11178 static Mfix *
11179 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11181 HOST_WIDE_INT count = 0;
11182 rtx barrier;
11183 rtx from = fix->insn;
11184 /* The instruction after which we will insert the jump. */
11185 rtx selected = NULL;
11186 int selected_cost;
11187 /* The address at which the jump instruction will be placed. */
11188 HOST_WIDE_INT selected_address;
11189 Mfix * new_fix;
11190 HOST_WIDE_INT max_count = max_address - fix->address;
11191 rtx label = gen_label_rtx ();
11193 selected_cost = arm_barrier_cost (from);
11194 selected_address = fix->address;
11196 while (from && count < max_count)
11198 rtx tmp;
11199 int new_cost;
11201 /* This code shouldn't have been called if there was a natural barrier
11202 within range. */
11203 gcc_assert (GET_CODE (from) != BARRIER);
11205 /* Count the length of this insn. */
11206 count += get_attr_length (from);
11208 /* If there is a jump table, add its length. */
11209 tmp = is_jump_table (from);
11210 if (tmp != NULL)
11212 count += get_jump_table_size (tmp);
11214 /* Jump tables aren't in a basic block, so base the cost on
11215 the dispatch insn. If we select this location, we will
11216 still put the pool after the table. */
11217 new_cost = arm_barrier_cost (from);
11219 if (count < max_count
11220 && (!selected || new_cost <= selected_cost))
11222 selected = tmp;
11223 selected_cost = new_cost;
11224 selected_address = fix->address + count;
11227 /* Continue after the dispatch table. */
11228 from = NEXT_INSN (tmp);
11229 continue;
11232 new_cost = arm_barrier_cost (from);
11234 if (count < max_count
11235 && (!selected || new_cost <= selected_cost))
11237 selected = from;
11238 selected_cost = new_cost;
11239 selected_address = fix->address + count;
11242 from = NEXT_INSN (from);
11245 /* Make sure that we found a place to insert the jump. */
11246 gcc_assert (selected);
11248 /* Create a new JUMP_INSN that branches around a barrier. */
11249 from = emit_jump_insn_after (gen_jump (label), selected);
11250 JUMP_LABEL (from) = label;
11251 barrier = emit_barrier_after (from);
11252 emit_label_after (label, barrier);
11254 /* Create a minipool barrier entry for the new barrier. */
11255 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11256 new_fix->insn = barrier;
11257 new_fix->address = selected_address;
11258 new_fix->next = fix->next;
11259 fix->next = new_fix;
11261 return new_fix;
11264 /* Record that there is a natural barrier in the insn stream at
11265 ADDRESS. */
11266 static void
11267 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11269 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11271 fix->insn = insn;
11272 fix->address = address;
11274 fix->next = NULL;
11275 if (minipool_fix_head != NULL)
11276 minipool_fix_tail->next = fix;
11277 else
11278 minipool_fix_head = fix;
11280 minipool_fix_tail = fix;
11283 /* Record INSN, which will need fixing up to load a value from the
11284 minipool. ADDRESS is the offset of the insn since the start of the
11285 function; LOC is a pointer to the part of the insn which requires
11286 fixing; VALUE is the constant that must be loaded, which is of type
11287 MODE. */
11288 static void
11289 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11290 enum machine_mode mode, rtx value)
11292 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11294 fix->insn = insn;
11295 fix->address = address;
11296 fix->loc = loc;
11297 fix->mode = mode;
11298 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11299 fix->value = value;
11300 fix->forwards = get_attr_pool_range (insn);
11301 fix->backwards = get_attr_neg_pool_range (insn);
11302 fix->minipool = NULL;
11304 /* If an insn doesn't have a range defined for it, then it isn't
11305 expecting to be reworked by this code. Better to stop now than
11306 to generate duff assembly code. */
11307 gcc_assert (fix->forwards || fix->backwards);
11309 /* If an entry requires 8-byte alignment then assume all constant pools
11310 require 4 bytes of padding. Trying to do this later on a per-pool
11311 basis is awkward because existing pool entries have to be modified. */
11312 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11313 minipool_pad = 4;
11315 if (dump_file)
11317 fprintf (dump_file,
11318 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11319 GET_MODE_NAME (mode),
11320 INSN_UID (insn), (unsigned long) address,
11321 -1 * (long)fix->backwards, (long)fix->forwards);
11322 arm_print_value (dump_file, fix->value);
11323 fprintf (dump_file, "\n");
11326 /* Add it to the chain of fixes. */
11327 fix->next = NULL;
11329 if (minipool_fix_head != NULL)
11330 minipool_fix_tail->next = fix;
11331 else
11332 minipool_fix_head = fix;
11334 minipool_fix_tail = fix;
11337 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11338 Returns the number of insns needed, or 99 if we don't know how to
11339 do it. */
11341 arm_const_double_inline_cost (rtx val)
11343 rtx lowpart, highpart;
11344 enum machine_mode mode;
11346 mode = GET_MODE (val);
11348 if (mode == VOIDmode)
11349 mode = DImode;
11351 gcc_assert (GET_MODE_SIZE (mode) == 8);
11353 lowpart = gen_lowpart (SImode, val);
11354 highpart = gen_highpart_mode (SImode, mode, val);
11356 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11357 gcc_assert (GET_CODE (highpart) == CONST_INT);
11359 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11360 NULL_RTX, NULL_RTX, 0, 0)
11361 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11362 NULL_RTX, NULL_RTX, 0, 0));
11365 /* Return true if it is worthwhile to split a 64-bit constant into two
11366 32-bit operations. This is the case if optimizing for size, or
11367 if we have load delay slots, or if one 32-bit part can be done with
11368 a single data operation. */
11369 bool
11370 arm_const_double_by_parts (rtx val)
11372 enum machine_mode mode = GET_MODE (val);
11373 rtx part;
11375 if (optimize_size || arm_ld_sched)
11376 return true;
11378 if (mode == VOIDmode)
11379 mode = DImode;
11381 part = gen_highpart_mode (SImode, mode, val);
11383 gcc_assert (GET_CODE (part) == CONST_INT);
11385 if (const_ok_for_arm (INTVAL (part))
11386 || const_ok_for_arm (~INTVAL (part)))
11387 return true;
11389 part = gen_lowpart (SImode, val);
11391 gcc_assert (GET_CODE (part) == CONST_INT);
11393 if (const_ok_for_arm (INTVAL (part))
11394 || const_ok_for_arm (~INTVAL (part)))
11395 return true;
11397 return false;
11400 /* Scan INSN and note any of its operands that need fixing.
11401 If DO_PUSHES is false we do not actually push any of the fixups
11402 needed. The function returns TRUE if any fixups were needed/pushed.
11403 This is used by arm_memory_load_p() which needs to know about loads
11404 of constants that will be converted into minipool loads. */
11405 static bool
11406 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11408 bool result = false;
11409 int opno;
11411 extract_insn (insn);
11413 if (!constrain_operands (1))
11414 fatal_insn_not_found (insn);
11416 if (recog_data.n_alternatives == 0)
11417 return false;
11419 /* Fill in recog_op_alt with information about the constraints of
11420 this insn. */
11421 preprocess_constraints ();
11423 for (opno = 0; opno < recog_data.n_operands; opno++)
11425 /* Things we need to fix can only occur in inputs. */
11426 if (recog_data.operand_type[opno] != OP_IN)
11427 continue;
11429 /* If this alternative is a memory reference, then any mention
11430 of constants in this alternative is really to fool reload
11431 into allowing us to accept one there. We need to fix them up
11432 now so that we output the right code. */
11433 if (recog_op_alt[opno][which_alternative].memory_ok)
11435 rtx op = recog_data.operand[opno];
11437 if (CONSTANT_P (op))
11439 if (do_pushes)
11440 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11441 recog_data.operand_mode[opno], op);
11442 result = true;
11444 else if (GET_CODE (op) == MEM
11445 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11446 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11448 if (do_pushes)
11450 rtx cop = avoid_constant_pool_reference (op);
11452 /* Casting the address of something to a mode narrower
11453 than a word can cause avoid_constant_pool_reference()
11454 to return the pool reference itself. That's no good to
11455 us here. Lets just hope that we can use the
11456 constant pool value directly. */
11457 if (op == cop)
11458 cop = get_pool_constant (XEXP (op, 0));
11460 push_minipool_fix (insn, address,
11461 recog_data.operand_loc[opno],
11462 recog_data.operand_mode[opno], cop);
11465 result = true;
11470 return result;
11473 /* Gcc puts the pool in the wrong place for ARM, since we can only
11474 load addresses a limited distance around the pc. We do some
11475 special munging to move the constant pool values to the correct
11476 point in the code. */
11477 static void
11478 arm_reorg (void)
11480 rtx insn;
11481 HOST_WIDE_INT address = 0;
11482 Mfix * fix;
11484 minipool_fix_head = minipool_fix_tail = NULL;
11486 /* The first insn must always be a note, or the code below won't
11487 scan it properly. */
11488 insn = get_insns ();
11489 gcc_assert (GET_CODE (insn) == NOTE);
11490 minipool_pad = 0;
11492 /* Scan all the insns and record the operands that will need fixing. */
11493 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11495 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11496 && (arm_cirrus_insn_p (insn)
11497 || GET_CODE (insn) == JUMP_INSN
11498 || arm_memory_load_p (insn)))
11499 cirrus_reorg (insn);
11501 if (GET_CODE (insn) == BARRIER)
11502 push_minipool_barrier (insn, address);
11503 else if (INSN_P (insn))
11505 rtx table;
11507 note_invalid_constants (insn, address, true);
11508 address += get_attr_length (insn);
11510 /* If the insn is a vector jump, add the size of the table
11511 and skip the table. */
11512 if ((table = is_jump_table (insn)) != NULL)
11514 address += get_jump_table_size (table);
11515 insn = table;
11520 fix = minipool_fix_head;
11522 /* Now scan the fixups and perform the required changes. */
11523 while (fix)
11525 Mfix * ftmp;
11526 Mfix * fdel;
11527 Mfix * last_added_fix;
11528 Mfix * last_barrier = NULL;
11529 Mfix * this_fix;
11531 /* Skip any further barriers before the next fix. */
11532 while (fix && GET_CODE (fix->insn) == BARRIER)
11533 fix = fix->next;
11535 /* No more fixes. */
11536 if (fix == NULL)
11537 break;
11539 last_added_fix = NULL;
11541 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11543 if (GET_CODE (ftmp->insn) == BARRIER)
11545 if (ftmp->address >= minipool_vector_head->max_address)
11546 break;
11548 last_barrier = ftmp;
11550 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11551 break;
11553 last_added_fix = ftmp; /* Keep track of the last fix added. */
11556 /* If we found a barrier, drop back to that; any fixes that we
11557 could have reached but come after the barrier will now go in
11558 the next mini-pool. */
11559 if (last_barrier != NULL)
11561 /* Reduce the refcount for those fixes that won't go into this
11562 pool after all. */
11563 for (fdel = last_barrier->next;
11564 fdel && fdel != ftmp;
11565 fdel = fdel->next)
11567 fdel->minipool->refcount--;
11568 fdel->minipool = NULL;
11571 ftmp = last_barrier;
11573 else
11575 /* ftmp is first fix that we can't fit into this pool and
11576 there no natural barriers that we could use. Insert a
11577 new barrier in the code somewhere between the previous
11578 fix and this one, and arrange to jump around it. */
11579 HOST_WIDE_INT max_address;
11581 /* The last item on the list of fixes must be a barrier, so
11582 we can never run off the end of the list of fixes without
11583 last_barrier being set. */
11584 gcc_assert (ftmp);
11586 max_address = minipool_vector_head->max_address;
11587 /* Check that there isn't another fix that is in range that
11588 we couldn't fit into this pool because the pool was
11589 already too large: we need to put the pool before such an
11590 instruction. The pool itself may come just after the
11591 fix because create_fix_barrier also allows space for a
11592 jump instruction. */
11593 if (ftmp->address < max_address)
11594 max_address = ftmp->address + 1;
11596 last_barrier = create_fix_barrier (last_added_fix, max_address);
11599 assign_minipool_offsets (last_barrier);
11601 while (ftmp)
11603 if (GET_CODE (ftmp->insn) != BARRIER
11604 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11605 == NULL))
11606 break;
11608 ftmp = ftmp->next;
11611 /* Scan over the fixes we have identified for this pool, fixing them
11612 up and adding the constants to the pool itself. */
11613 for (this_fix = fix; this_fix && ftmp != this_fix;
11614 this_fix = this_fix->next)
11615 if (GET_CODE (this_fix->insn) != BARRIER)
11617 rtx addr
11618 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11619 minipool_vector_label),
11620 this_fix->minipool->offset);
11621 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11624 dump_minipool (last_barrier->insn);
11625 fix = ftmp;
11628 /* From now on we must synthesize any constants that we can't handle
11629 directly. This can happen if the RTL gets split during final
11630 instruction generation. */
11631 after_arm_reorg = 1;
11633 /* Free the minipool memory. */
11634 obstack_free (&minipool_obstack, minipool_startobj);
11637 /* Routines to output assembly language. */
11639 /* If the rtx is the correct value then return the string of the number.
11640 In this way we can ensure that valid double constants are generated even
11641 when cross compiling. */
11642 const char *
11643 fp_immediate_constant (rtx x)
11645 REAL_VALUE_TYPE r;
11646 int i;
11648 if (!fp_consts_inited)
11649 init_fp_table ();
11651 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11652 for (i = 0; i < 8; i++)
11653 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11654 return strings_fp[i];
11656 gcc_unreachable ();
11659 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11660 static const char *
11661 fp_const_from_val (REAL_VALUE_TYPE *r)
11663 int i;
11665 if (!fp_consts_inited)
11666 init_fp_table ();
11668 for (i = 0; i < 8; i++)
11669 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11670 return strings_fp[i];
11672 gcc_unreachable ();
11675 /* Output the operands of a LDM/STM instruction to STREAM.
11676 MASK is the ARM register set mask of which only bits 0-15 are important.
11677 REG is the base register, either the frame pointer or the stack pointer,
11678 INSTR is the possibly suffixed load or store instruction.
11679 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11681 static void
11682 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11683 unsigned long mask, int rfe)
11685 unsigned i;
11686 bool not_first = FALSE;
11688 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11689 fputc ('\t', stream);
11690 asm_fprintf (stream, instr, reg);
11691 fputc ('{', stream);
11693 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11694 if (mask & (1 << i))
11696 if (not_first)
11697 fprintf (stream, ", ");
11699 asm_fprintf (stream, "%r", i);
11700 not_first = TRUE;
11703 if (rfe)
11704 fprintf (stream, "}^\n");
11705 else
11706 fprintf (stream, "}\n");
11710 /* Output a FLDMD instruction to STREAM.
11711 BASE if the register containing the address.
11712 REG and COUNT specify the register range.
11713 Extra registers may be added to avoid hardware bugs.
11715 We output FLDMD even for ARMv5 VFP implementations. Although
11716 FLDMD is technically not supported until ARMv6, it is believed
11717 that all VFP implementations support its use in this context. */
11719 static void
11720 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11722 int i;
11724 /* Workaround ARM10 VFPr1 bug. */
11725 if (count == 2 && !arm_arch6)
11727 if (reg == 15)
11728 reg--;
11729 count++;
11732 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11733 load into multiple parts if we have to handle more than 16 registers. */
11734 if (count > 16)
11736 vfp_output_fldmd (stream, base, reg, 16);
11737 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11738 return;
11741 fputc ('\t', stream);
11742 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11744 for (i = reg; i < reg + count; i++)
11746 if (i > reg)
11747 fputs (", ", stream);
11748 asm_fprintf (stream, "d%d", i);
11750 fputs ("}\n", stream);
11755 /* Output the assembly for a store multiple. */
11757 const char *
11758 vfp_output_fstmd (rtx * operands)
11760 char pattern[100];
11761 int p;
11762 int base;
11763 int i;
11765 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11766 p = strlen (pattern);
11768 gcc_assert (GET_CODE (operands[1]) == REG);
11770 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11771 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11773 p += sprintf (&pattern[p], ", d%d", base + i);
11775 strcpy (&pattern[p], "}");
11777 output_asm_insn (pattern, operands);
11778 return "";
11782 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11783 number of bytes pushed. */
11785 static int
11786 vfp_emit_fstmd (int base_reg, int count)
11788 rtx par;
11789 rtx dwarf;
11790 rtx tmp, reg;
11791 int i;
11793 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11794 register pairs are stored by a store multiple insn. We avoid this
11795 by pushing an extra pair. */
11796 if (count == 2 && !arm_arch6)
11798 if (base_reg == LAST_VFP_REGNUM - 3)
11799 base_reg -= 2;
11800 count++;
11803 /* FSTMD may not store more than 16 doubleword registers at once. Split
11804 larger stores into multiple parts (up to a maximum of two, in
11805 practice). */
11806 if (count > 16)
11808 int saved;
11809 /* NOTE: base_reg is an internal register number, so each D register
11810 counts as 2. */
11811 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11812 saved += vfp_emit_fstmd (base_reg, 16);
11813 return saved;
11816 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11817 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11819 reg = gen_rtx_REG (DFmode, base_reg);
11820 base_reg += 2;
11822 XVECEXP (par, 0, 0)
11823 = gen_rtx_SET (VOIDmode,
11824 gen_frame_mem
11825 (BLKmode,
11826 gen_rtx_PRE_MODIFY (Pmode,
11827 stack_pointer_rtx,
11828 plus_constant
11829 (stack_pointer_rtx,
11830 - (count * 8)))
11832 gen_rtx_UNSPEC (BLKmode,
11833 gen_rtvec (1, reg),
11834 UNSPEC_PUSH_MULT));
11836 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11837 plus_constant (stack_pointer_rtx, -(count * 8)));
11838 RTX_FRAME_RELATED_P (tmp) = 1;
11839 XVECEXP (dwarf, 0, 0) = tmp;
11841 tmp = gen_rtx_SET (VOIDmode,
11842 gen_frame_mem (DFmode, stack_pointer_rtx),
11843 reg);
11844 RTX_FRAME_RELATED_P (tmp) = 1;
11845 XVECEXP (dwarf, 0, 1) = tmp;
11847 for (i = 1; i < count; i++)
11849 reg = gen_rtx_REG (DFmode, base_reg);
11850 base_reg += 2;
11851 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11853 tmp = gen_rtx_SET (VOIDmode,
11854 gen_frame_mem (DFmode,
11855 plus_constant (stack_pointer_rtx,
11856 i * 8)),
11857 reg);
11858 RTX_FRAME_RELATED_P (tmp) = 1;
11859 XVECEXP (dwarf, 0, i + 1) = tmp;
11862 par = emit_insn (par);
11863 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11864 RTX_FRAME_RELATED_P (par) = 1;
11866 return count * 8;
11869 /* Emit a call instruction with pattern PAT. ADDR is the address of
11870 the call target. */
11872 void
11873 arm_emit_call_insn (rtx pat, rtx addr)
11875 rtx insn;
11877 insn = emit_call_insn (pat);
11879 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11880 If the call might use such an entry, add a use of the PIC register
11881 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11882 if (TARGET_VXWORKS_RTP
11883 && flag_pic
11884 && GET_CODE (addr) == SYMBOL_REF
11885 && (SYMBOL_REF_DECL (addr)
11886 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11887 : !SYMBOL_REF_LOCAL_P (addr)))
11889 require_pic_register ();
11890 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11894 /* Output a 'call' insn. */
11895 const char *
11896 output_call (rtx *operands)
11898 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11900 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11901 if (REGNO (operands[0]) == LR_REGNUM)
11903 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11904 output_asm_insn ("mov%?\t%0, %|lr", operands);
11907 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11909 if (TARGET_INTERWORK || arm_arch4t)
11910 output_asm_insn ("bx%?\t%0", operands);
11911 else
11912 output_asm_insn ("mov%?\t%|pc, %0", operands);
11914 return "";
11917 /* Output a 'call' insn that is a reference in memory. This is
11918 disabled for ARMv5 and we prefer a blx instead because otherwise
11919 there's a significant performance overhead. */
11920 const char *
11921 output_call_mem (rtx *operands)
11923 gcc_assert (!arm_arch5);
11924 if (TARGET_INTERWORK)
11926 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11927 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11928 output_asm_insn ("bx%?\t%|ip", operands);
11930 else if (regno_use_in (LR_REGNUM, operands[0]))
11932 /* LR is used in the memory address. We load the address in the
11933 first instruction. It's safe to use IP as the target of the
11934 load since the call will kill it anyway. */
11935 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11936 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11937 if (arm_arch4t)
11938 output_asm_insn ("bx%?\t%|ip", operands);
11939 else
11940 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11942 else
11944 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11945 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11948 return "";
11952 /* Output a move from arm registers to an fpa registers.
11953 OPERANDS[0] is an fpa register.
11954 OPERANDS[1] is the first registers of an arm register pair. */
11955 const char *
11956 output_mov_long_double_fpa_from_arm (rtx *operands)
11958 int arm_reg0 = REGNO (operands[1]);
11959 rtx ops[3];
11961 gcc_assert (arm_reg0 != IP_REGNUM);
11963 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11964 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11965 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11967 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11968 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11970 return "";
11973 /* Output a move from an fpa register to arm registers.
11974 OPERANDS[0] is the first registers of an arm register pair.
11975 OPERANDS[1] is an fpa register. */
11976 const char *
11977 output_mov_long_double_arm_from_fpa (rtx *operands)
11979 int arm_reg0 = REGNO (operands[0]);
11980 rtx ops[3];
11982 gcc_assert (arm_reg0 != IP_REGNUM);
11984 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11985 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11986 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11988 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11989 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11990 return "";
11993 /* Output a move from arm registers to arm registers of a long double
11994 OPERANDS[0] is the destination.
11995 OPERANDS[1] is the source. */
11996 const char *
11997 output_mov_long_double_arm_from_arm (rtx *operands)
11999 /* We have to be careful here because the two might overlap. */
12000 int dest_start = REGNO (operands[0]);
12001 int src_start = REGNO (operands[1]);
12002 rtx ops[2];
12003 int i;
12005 if (dest_start < src_start)
12007 for (i = 0; i < 3; i++)
12009 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12010 ops[1] = gen_rtx_REG (SImode, src_start + i);
12011 output_asm_insn ("mov%?\t%0, %1", ops);
12014 else
12016 for (i = 2; i >= 0; i--)
12018 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12019 ops[1] = gen_rtx_REG (SImode, src_start + i);
12020 output_asm_insn ("mov%?\t%0, %1", ops);
12024 return "";
12027 void
12028 arm_emit_movpair (rtx dest, rtx src)
12030 /* If the src is an immediate, simplify it. */
12031 if (CONST_INT_P (src))
12033 HOST_WIDE_INT val = INTVAL (src);
12034 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12035 if ((val >> 16) & 0x0000ffff)
12036 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12037 GEN_INT (16)),
12038 GEN_INT ((val >> 16) & 0x0000ffff));
12039 return;
12041 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12042 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12045 /* Output a move from arm registers to an fpa registers.
12046 OPERANDS[0] is an fpa register.
12047 OPERANDS[1] is the first registers of an arm register pair. */
12048 const char *
12049 output_mov_double_fpa_from_arm (rtx *operands)
12051 int arm_reg0 = REGNO (operands[1]);
12052 rtx ops[2];
12054 gcc_assert (arm_reg0 != IP_REGNUM);
12056 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12057 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12058 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12059 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12060 return "";
12063 /* Output a move from an fpa register to arm registers.
12064 OPERANDS[0] is the first registers of an arm register pair.
12065 OPERANDS[1] is an fpa register. */
12066 const char *
12067 output_mov_double_arm_from_fpa (rtx *operands)
12069 int arm_reg0 = REGNO (operands[0]);
12070 rtx ops[2];
12072 gcc_assert (arm_reg0 != IP_REGNUM);
12074 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12075 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12076 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12077 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12078 return "";
12081 /* Output a move between double words.
12082 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12083 or MEM<-REG and all MEMs must be offsettable addresses. */
12084 const char *
12085 output_move_double (rtx *operands)
12087 enum rtx_code code0 = GET_CODE (operands[0]);
12088 enum rtx_code code1 = GET_CODE (operands[1]);
12089 rtx otherops[3];
12091 if (code0 == REG)
12093 unsigned int reg0 = REGNO (operands[0]);
12095 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12097 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12099 switch (GET_CODE (XEXP (operands[1], 0)))
12101 case REG:
12102 if (TARGET_LDRD
12103 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12104 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12105 else
12106 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12107 break;
12109 case PRE_INC:
12110 gcc_assert (TARGET_LDRD);
12111 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12112 break;
12114 case PRE_DEC:
12115 if (TARGET_LDRD)
12116 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12117 else
12118 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12119 break;
12121 case POST_INC:
12122 if (TARGET_LDRD)
12123 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12124 else
12125 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12126 break;
12128 case POST_DEC:
12129 gcc_assert (TARGET_LDRD);
12130 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12131 break;
12133 case PRE_MODIFY:
12134 case POST_MODIFY:
12135 /* Autoicrement addressing modes should never have overlapping
12136 base and destination registers, and overlapping index registers
12137 are already prohibited, so this doesn't need to worry about
12138 fix_cm3_ldrd. */
12139 otherops[0] = operands[0];
12140 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12141 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12143 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12145 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12147 /* Registers overlap so split out the increment. */
12148 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12149 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12151 else
12153 /* Use a single insn if we can.
12154 FIXME: IWMMXT allows offsets larger than ldrd can
12155 handle, fix these up with a pair of ldr. */
12156 if (TARGET_THUMB2
12157 || GET_CODE (otherops[2]) != CONST_INT
12158 || (INTVAL (otherops[2]) > -256
12159 && INTVAL (otherops[2]) < 256))
12160 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12161 else
12163 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12164 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12168 else
12170 /* Use a single insn if we can.
12171 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12172 fix these up with a pair of ldr. */
12173 if (TARGET_THUMB2
12174 || GET_CODE (otherops[2]) != CONST_INT
12175 || (INTVAL (otherops[2]) > -256
12176 && INTVAL (otherops[2]) < 256))
12177 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12178 else
12180 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12181 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12184 break;
12186 case LABEL_REF:
12187 case CONST:
12188 /* We might be able to use ldrd %0, %1 here. However the range is
12189 different to ldr/adr, and it is broken on some ARMv7-M
12190 implementations. */
12191 /* Use the second register of the pair to avoid problematic
12192 overlap. */
12193 otherops[1] = operands[1];
12194 output_asm_insn ("adr%?\t%0, %1", otherops);
12195 operands[1] = otherops[0];
12196 if (TARGET_LDRD)
12197 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12198 else
12199 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12200 break;
12202 /* ??? This needs checking for thumb2. */
12203 default:
12204 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12205 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12207 otherops[0] = operands[0];
12208 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12209 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12211 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12213 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12215 switch ((int) INTVAL (otherops[2]))
12217 case -8:
12218 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12219 return "";
12220 case -4:
12221 if (TARGET_THUMB2)
12222 break;
12223 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12224 return "";
12225 case 4:
12226 if (TARGET_THUMB2)
12227 break;
12228 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12229 return "";
12232 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12233 operands[1] = otherops[0];
12234 if (TARGET_LDRD
12235 && (GET_CODE (otherops[2]) == REG
12236 || TARGET_THUMB2
12237 || (GET_CODE (otherops[2]) == CONST_INT
12238 && INTVAL (otherops[2]) > -256
12239 && INTVAL (otherops[2]) < 256)))
12241 if (reg_overlap_mentioned_p (operands[0],
12242 otherops[2]))
12244 rtx tmp;
12245 /* Swap base and index registers over to
12246 avoid a conflict. */
12247 tmp = otherops[1];
12248 otherops[1] = otherops[2];
12249 otherops[2] = tmp;
12251 /* If both registers conflict, it will usually
12252 have been fixed by a splitter. */
12253 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12254 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12256 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12257 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12259 else
12261 otherops[0] = operands[0];
12262 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12264 return "";
12267 if (GET_CODE (otherops[2]) == CONST_INT)
12269 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12270 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12271 else
12272 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12274 else
12275 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12277 else
12278 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12280 if (TARGET_LDRD)
12281 return "ldr%(d%)\t%0, [%1]";
12283 return "ldm%(ia%)\t%1, %M0";
12285 else
12287 otherops[1] = adjust_address (operands[1], SImode, 4);
12288 /* Take care of overlapping base/data reg. */
12289 if (reg_mentioned_p (operands[0], operands[1]))
12291 output_asm_insn ("ldr%?\t%0, %1", otherops);
12292 output_asm_insn ("ldr%?\t%0, %1", operands);
12294 else
12296 output_asm_insn ("ldr%?\t%0, %1", operands);
12297 output_asm_insn ("ldr%?\t%0, %1", otherops);
12302 else
12304 /* Constraints should ensure this. */
12305 gcc_assert (code0 == MEM && code1 == REG);
12306 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12308 switch (GET_CODE (XEXP (operands[0], 0)))
12310 case REG:
12311 if (TARGET_LDRD)
12312 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12313 else
12314 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12315 break;
12317 case PRE_INC:
12318 gcc_assert (TARGET_LDRD);
12319 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12320 break;
12322 case PRE_DEC:
12323 if (TARGET_LDRD)
12324 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12325 else
12326 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12327 break;
12329 case POST_INC:
12330 if (TARGET_LDRD)
12331 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12332 else
12333 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12334 break;
12336 case POST_DEC:
12337 gcc_assert (TARGET_LDRD);
12338 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12339 break;
12341 case PRE_MODIFY:
12342 case POST_MODIFY:
12343 otherops[0] = operands[1];
12344 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12345 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12347 /* IWMMXT allows offsets larger than ldrd can handle,
12348 fix these up with a pair of ldr. */
12349 if (!TARGET_THUMB2
12350 && GET_CODE (otherops[2]) == CONST_INT
12351 && (INTVAL(otherops[2]) <= -256
12352 || INTVAL(otherops[2]) >= 256))
12354 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12356 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12357 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12359 else
12361 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12362 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12365 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12366 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12367 else
12368 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12369 break;
12371 case PLUS:
12372 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12373 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12375 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12377 case -8:
12378 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12379 return "";
12381 case -4:
12382 if (TARGET_THUMB2)
12383 break;
12384 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12385 return "";
12387 case 4:
12388 if (TARGET_THUMB2)
12389 break;
12390 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12391 return "";
12394 if (TARGET_LDRD
12395 && (GET_CODE (otherops[2]) == REG
12396 || TARGET_THUMB2
12397 || (GET_CODE (otherops[2]) == CONST_INT
12398 && INTVAL (otherops[2]) > -256
12399 && INTVAL (otherops[2]) < 256)))
12401 otherops[0] = operands[1];
12402 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12403 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12404 return "";
12406 /* Fall through */
12408 default:
12409 otherops[0] = adjust_address (operands[0], SImode, 4);
12410 otherops[1] = operands[1];
12411 output_asm_insn ("str%?\t%1, %0", operands);
12412 output_asm_insn ("str%?\t%H1, %0", otherops);
12416 return "";
12419 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12420 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12422 const char *
12423 output_move_quad (rtx *operands)
12425 if (REG_P (operands[0]))
12427 /* Load, or reg->reg move. */
12429 if (MEM_P (operands[1]))
12431 switch (GET_CODE (XEXP (operands[1], 0)))
12433 case REG:
12434 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12435 break;
12437 case LABEL_REF:
12438 case CONST:
12439 output_asm_insn ("adr%?\t%0, %1", operands);
12440 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12441 break;
12443 default:
12444 gcc_unreachable ();
12447 else
12449 rtx ops[2];
12450 int dest, src, i;
12452 gcc_assert (REG_P (operands[1]));
12454 dest = REGNO (operands[0]);
12455 src = REGNO (operands[1]);
12457 /* This seems pretty dumb, but hopefully GCC won't try to do it
12458 very often. */
12459 if (dest < src)
12460 for (i = 0; i < 4; i++)
12462 ops[0] = gen_rtx_REG (SImode, dest + i);
12463 ops[1] = gen_rtx_REG (SImode, src + i);
12464 output_asm_insn ("mov%?\t%0, %1", ops);
12466 else
12467 for (i = 3; i >= 0; i--)
12469 ops[0] = gen_rtx_REG (SImode, dest + i);
12470 ops[1] = gen_rtx_REG (SImode, src + i);
12471 output_asm_insn ("mov%?\t%0, %1", ops);
12475 else
12477 gcc_assert (MEM_P (operands[0]));
12478 gcc_assert (REG_P (operands[1]));
12479 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12481 switch (GET_CODE (XEXP (operands[0], 0)))
12483 case REG:
12484 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12485 break;
12487 default:
12488 gcc_unreachable ();
12492 return "";
12495 /* Output a VFP load or store instruction. */
12497 const char *
12498 output_move_vfp (rtx *operands)
12500 rtx reg, mem, addr, ops[2];
12501 int load = REG_P (operands[0]);
12502 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12503 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12504 const char *templ;
12505 char buff[50];
12506 enum machine_mode mode;
12508 reg = operands[!load];
12509 mem = operands[load];
12511 mode = GET_MODE (reg);
12513 gcc_assert (REG_P (reg));
12514 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12515 gcc_assert (mode == SFmode
12516 || mode == DFmode
12517 || mode == SImode
12518 || mode == DImode
12519 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12520 gcc_assert (MEM_P (mem));
12522 addr = XEXP (mem, 0);
12524 switch (GET_CODE (addr))
12526 case PRE_DEC:
12527 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12528 ops[0] = XEXP (addr, 0);
12529 ops[1] = reg;
12530 break;
12532 case POST_INC:
12533 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12534 ops[0] = XEXP (addr, 0);
12535 ops[1] = reg;
12536 break;
12538 default:
12539 templ = "f%s%c%%?\t%%%s0, %%1%s";
12540 ops[0] = reg;
12541 ops[1] = mem;
12542 break;
12545 sprintf (buff, templ,
12546 load ? "ld" : "st",
12547 dp ? 'd' : 's',
12548 dp ? "P" : "",
12549 integer_p ? "\t%@ int" : "");
12550 output_asm_insn (buff, ops);
12552 return "";
12555 /* Output a Neon quad-word load or store, or a load or store for
12556 larger structure modes.
12558 WARNING: The ordering of elements is weird in big-endian mode,
12559 because we use VSTM, as required by the EABI. GCC RTL defines
12560 element ordering based on in-memory order. This can be differ
12561 from the architectural ordering of elements within a NEON register.
12562 The intrinsics defined in arm_neon.h use the NEON register element
12563 ordering, not the GCC RTL element ordering.
12565 For example, the in-memory ordering of a big-endian a quadword
12566 vector with 16-bit elements when stored from register pair {d0,d1}
12567 will be (lowest address first, d0[N] is NEON register element N):
12569 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12571 When necessary, quadword registers (dN, dN+1) are moved to ARM
12572 registers from rN in the order:
12574 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12576 So that STM/LDM can be used on vectors in ARM registers, and the
12577 same memory layout will result as if VSTM/VLDM were used. */
12579 const char *
12580 output_move_neon (rtx *operands)
12582 rtx reg, mem, addr, ops[2];
12583 int regno, load = REG_P (operands[0]);
12584 const char *templ;
12585 char buff[50];
12586 enum machine_mode mode;
12588 reg = operands[!load];
12589 mem = operands[load];
12591 mode = GET_MODE (reg);
12593 gcc_assert (REG_P (reg));
12594 regno = REGNO (reg);
12595 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12596 || NEON_REGNO_OK_FOR_QUAD (regno));
12597 gcc_assert (VALID_NEON_DREG_MODE (mode)
12598 || VALID_NEON_QREG_MODE (mode)
12599 || VALID_NEON_STRUCT_MODE (mode));
12600 gcc_assert (MEM_P (mem));
12602 addr = XEXP (mem, 0);
12604 /* Strip off const from addresses like (const (plus (...))). */
12605 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12606 addr = XEXP (addr, 0);
12608 switch (GET_CODE (addr))
12610 case POST_INC:
12611 templ = "v%smia%%?\t%%0!, %%h1";
12612 ops[0] = XEXP (addr, 0);
12613 ops[1] = reg;
12614 break;
12616 case PRE_DEC:
12617 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12618 templ = "v%smdb%%?\t%%0!, %%h1";
12619 ops[0] = XEXP (addr, 0);
12620 ops[1] = reg;
12621 break;
12623 case POST_MODIFY:
12624 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12625 gcc_unreachable ();
12627 case LABEL_REF:
12628 case PLUS:
12630 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12631 int i;
12632 int overlap = -1;
12633 for (i = 0; i < nregs; i++)
12635 /* We're only using DImode here because it's a convenient size. */
12636 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12637 ops[1] = adjust_address (mem, DImode, 8 * i);
12638 if (reg_overlap_mentioned_p (ops[0], mem))
12640 gcc_assert (overlap == -1);
12641 overlap = i;
12643 else
12645 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12646 output_asm_insn (buff, ops);
12649 if (overlap != -1)
12651 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12652 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12653 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12654 output_asm_insn (buff, ops);
12657 return "";
12660 default:
12661 templ = "v%smia%%?\t%%m0, %%h1";
12662 ops[0] = mem;
12663 ops[1] = reg;
12666 sprintf (buff, templ, load ? "ld" : "st");
12667 output_asm_insn (buff, ops);
12669 return "";
12672 /* Output an ADD r, s, #n where n may be too big for one instruction.
12673 If adding zero to one register, output nothing. */
12674 const char *
12675 output_add_immediate (rtx *operands)
12677 HOST_WIDE_INT n = INTVAL (operands[2]);
12679 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12681 if (n < 0)
12682 output_multi_immediate (operands,
12683 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12684 -n);
12685 else
12686 output_multi_immediate (operands,
12687 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12691 return "";
12694 /* Output a multiple immediate operation.
12695 OPERANDS is the vector of operands referred to in the output patterns.
12696 INSTR1 is the output pattern to use for the first constant.
12697 INSTR2 is the output pattern to use for subsequent constants.
12698 IMMED_OP is the index of the constant slot in OPERANDS.
12699 N is the constant value. */
12700 static const char *
12701 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12702 int immed_op, HOST_WIDE_INT n)
12704 #if HOST_BITS_PER_WIDE_INT > 32
12705 n &= 0xffffffff;
12706 #endif
12708 if (n == 0)
12710 /* Quick and easy output. */
12711 operands[immed_op] = const0_rtx;
12712 output_asm_insn (instr1, operands);
12714 else
12716 int i;
12717 const char * instr = instr1;
12719 /* Note that n is never zero here (which would give no output). */
12720 for (i = 0; i < 32; i += 2)
12722 if (n & (3 << i))
12724 operands[immed_op] = GEN_INT (n & (255 << i));
12725 output_asm_insn (instr, operands);
12726 instr = instr2;
12727 i += 6;
12732 return "";
12735 /* Return the name of a shifter operation. */
12736 static const char *
12737 arm_shift_nmem(enum rtx_code code)
12739 switch (code)
12741 case ASHIFT:
12742 return ARM_LSL_NAME;
12744 case ASHIFTRT:
12745 return "asr";
12747 case LSHIFTRT:
12748 return "lsr";
12750 case ROTATERT:
12751 return "ror";
12753 default:
12754 abort();
12758 /* Return the appropriate ARM instruction for the operation code.
12759 The returned result should not be overwritten. OP is the rtx of the
12760 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12761 was shifted. */
12762 const char *
12763 arithmetic_instr (rtx op, int shift_first_arg)
12765 switch (GET_CODE (op))
12767 case PLUS:
12768 return "add";
12770 case MINUS:
12771 return shift_first_arg ? "rsb" : "sub";
12773 case IOR:
12774 return "orr";
12776 case XOR:
12777 return "eor";
12779 case AND:
12780 return "and";
12782 case ASHIFT:
12783 case ASHIFTRT:
12784 case LSHIFTRT:
12785 case ROTATERT:
12786 return arm_shift_nmem(GET_CODE(op));
12788 default:
12789 gcc_unreachable ();
12793 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12794 for the operation code. The returned result should not be overwritten.
12795 OP is the rtx code of the shift.
12796 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12797 shift. */
12798 static const char *
12799 shift_op (rtx op, HOST_WIDE_INT *amountp)
12801 const char * mnem;
12802 enum rtx_code code = GET_CODE (op);
12804 switch (GET_CODE (XEXP (op, 1)))
12806 case REG:
12807 case SUBREG:
12808 *amountp = -1;
12809 break;
12811 case CONST_INT:
12812 *amountp = INTVAL (XEXP (op, 1));
12813 break;
12815 default:
12816 gcc_unreachable ();
12819 switch (code)
12821 case ROTATE:
12822 gcc_assert (*amountp != -1);
12823 *amountp = 32 - *amountp;
12824 code = ROTATERT;
12826 /* Fall through. */
12828 case ASHIFT:
12829 case ASHIFTRT:
12830 case LSHIFTRT:
12831 case ROTATERT:
12832 mnem = arm_shift_nmem(code);
12833 break;
12835 case MULT:
12836 /* We never have to worry about the amount being other than a
12837 power of 2, since this case can never be reloaded from a reg. */
12838 gcc_assert (*amountp != -1);
12839 *amountp = int_log2 (*amountp);
12840 return ARM_LSL_NAME;
12842 default:
12843 gcc_unreachable ();
12846 if (*amountp != -1)
12848 /* This is not 100% correct, but follows from the desire to merge
12849 multiplication by a power of 2 with the recognizer for a
12850 shift. >=32 is not a valid shift for "lsl", so we must try and
12851 output a shift that produces the correct arithmetical result.
12852 Using lsr #32 is identical except for the fact that the carry bit
12853 is not set correctly if we set the flags; but we never use the
12854 carry bit from such an operation, so we can ignore that. */
12855 if (code == ROTATERT)
12856 /* Rotate is just modulo 32. */
12857 *amountp &= 31;
12858 else if (*amountp != (*amountp & 31))
12860 if (code == ASHIFT)
12861 mnem = "lsr";
12862 *amountp = 32;
12865 /* Shifts of 0 are no-ops. */
12866 if (*amountp == 0)
12867 return NULL;
12870 return mnem;
12873 /* Obtain the shift from the POWER of two. */
12875 static HOST_WIDE_INT
12876 int_log2 (HOST_WIDE_INT power)
12878 HOST_WIDE_INT shift = 0;
12880 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12882 gcc_assert (shift <= 31);
12883 shift++;
12886 return shift;
12889 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12890 because /bin/as is horribly restrictive. The judgement about
12891 whether or not each character is 'printable' (and can be output as
12892 is) or not (and must be printed with an octal escape) must be made
12893 with reference to the *host* character set -- the situation is
12894 similar to that discussed in the comments above pp_c_char in
12895 c-pretty-print.c. */
12897 #define MAX_ASCII_LEN 51
12899 void
12900 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12902 int i;
12903 int len_so_far = 0;
12905 fputs ("\t.ascii\t\"", stream);
12907 for (i = 0; i < len; i++)
12909 int c = p[i];
12911 if (len_so_far >= MAX_ASCII_LEN)
12913 fputs ("\"\n\t.ascii\t\"", stream);
12914 len_so_far = 0;
12917 if (ISPRINT (c))
12919 if (c == '\\' || c == '\"')
12921 putc ('\\', stream);
12922 len_so_far++;
12924 putc (c, stream);
12925 len_so_far++;
12927 else
12929 fprintf (stream, "\\%03o", c);
12930 len_so_far += 4;
12934 fputs ("\"\n", stream);
12937 /* Compute the register save mask for registers 0 through 12
12938 inclusive. This code is used by arm_compute_save_reg_mask. */
12940 static unsigned long
12941 arm_compute_save_reg0_reg12_mask (void)
12943 unsigned long func_type = arm_current_func_type ();
12944 unsigned long save_reg_mask = 0;
12945 unsigned int reg;
12947 if (IS_INTERRUPT (func_type))
12949 unsigned int max_reg;
12950 /* Interrupt functions must not corrupt any registers,
12951 even call clobbered ones. If this is a leaf function
12952 we can just examine the registers used by the RTL, but
12953 otherwise we have to assume that whatever function is
12954 called might clobber anything, and so we have to save
12955 all the call-clobbered registers as well. */
12956 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12957 /* FIQ handlers have registers r8 - r12 banked, so
12958 we only need to check r0 - r7, Normal ISRs only
12959 bank r14 and r15, so we must check up to r12.
12960 r13 is the stack pointer which is always preserved,
12961 so we do not need to consider it here. */
12962 max_reg = 7;
12963 else
12964 max_reg = 12;
12966 for (reg = 0; reg <= max_reg; reg++)
12967 if (df_regs_ever_live_p (reg)
12968 || (! current_function_is_leaf && call_used_regs[reg]))
12969 save_reg_mask |= (1 << reg);
12971 /* Also save the pic base register if necessary. */
12972 if (flag_pic
12973 && !TARGET_SINGLE_PIC_BASE
12974 && arm_pic_register != INVALID_REGNUM
12975 && crtl->uses_pic_offset_table)
12976 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12978 else if (IS_VOLATILE(func_type))
12980 /* For noreturn functions we historically omitted register saves
12981 altogether. However this really messes up debugging. As a
12982 compromise save just the frame pointers. Combined with the link
12983 register saved elsewhere this should be sufficient to get
12984 a backtrace. */
12985 if (frame_pointer_needed)
12986 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12987 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
12988 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12989 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
12990 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
12992 else
12994 /* In the normal case we only need to save those registers
12995 which are call saved and which are used by this function. */
12996 for (reg = 0; reg <= 11; reg++)
12997 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12998 save_reg_mask |= (1 << reg);
13000 /* Handle the frame pointer as a special case. */
13001 if (frame_pointer_needed)
13002 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13004 /* If we aren't loading the PIC register,
13005 don't stack it even though it may be live. */
13006 if (flag_pic
13007 && !TARGET_SINGLE_PIC_BASE
13008 && arm_pic_register != INVALID_REGNUM
13009 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13010 || crtl->uses_pic_offset_table))
13011 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13013 /* The prologue will copy SP into R0, so save it. */
13014 if (IS_STACKALIGN (func_type))
13015 save_reg_mask |= 1;
13018 /* Save registers so the exception handler can modify them. */
13019 if (crtl->calls_eh_return)
13021 unsigned int i;
13023 for (i = 0; ; i++)
13025 reg = EH_RETURN_DATA_REGNO (i);
13026 if (reg == INVALID_REGNUM)
13027 break;
13028 save_reg_mask |= 1 << reg;
13032 return save_reg_mask;
13036 /* Compute the number of bytes used to store the static chain register on the
13037 stack, above the stack frame. We need to know this accurately to get the
13038 alignment of the rest of the stack frame correct. */
13040 static int arm_compute_static_chain_stack_bytes (void)
13042 unsigned long func_type = arm_current_func_type ();
13043 int static_chain_stack_bytes = 0;
13045 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13046 IS_NESTED (func_type) &&
13047 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13048 static_chain_stack_bytes = 4;
13050 return static_chain_stack_bytes;
13054 /* Compute a bit mask of which registers need to be
13055 saved on the stack for the current function.
13056 This is used by arm_get_frame_offsets, which may add extra registers. */
13058 static unsigned long
13059 arm_compute_save_reg_mask (void)
13061 unsigned int save_reg_mask = 0;
13062 unsigned long func_type = arm_current_func_type ();
13063 unsigned int reg;
13065 if (IS_NAKED (func_type))
13066 /* This should never really happen. */
13067 return 0;
13069 /* If we are creating a stack frame, then we must save the frame pointer,
13070 IP (which will hold the old stack pointer), LR and the PC. */
13071 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13072 save_reg_mask |=
13073 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13074 | (1 << IP_REGNUM)
13075 | (1 << LR_REGNUM)
13076 | (1 << PC_REGNUM);
13078 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13080 /* Decide if we need to save the link register.
13081 Interrupt routines have their own banked link register,
13082 so they never need to save it.
13083 Otherwise if we do not use the link register we do not need to save
13084 it. If we are pushing other registers onto the stack however, we
13085 can save an instruction in the epilogue by pushing the link register
13086 now and then popping it back into the PC. This incurs extra memory
13087 accesses though, so we only do it when optimizing for size, and only
13088 if we know that we will not need a fancy return sequence. */
13089 if (df_regs_ever_live_p (LR_REGNUM)
13090 || (save_reg_mask
13091 && optimize_size
13092 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13093 && !crtl->calls_eh_return))
13094 save_reg_mask |= 1 << LR_REGNUM;
13096 if (cfun->machine->lr_save_eliminated)
13097 save_reg_mask &= ~ (1 << LR_REGNUM);
13099 if (TARGET_REALLY_IWMMXT
13100 && ((bit_count (save_reg_mask)
13101 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13102 arm_compute_static_chain_stack_bytes())
13103 ) % 2) != 0)
13105 /* The total number of registers that are going to be pushed
13106 onto the stack is odd. We need to ensure that the stack
13107 is 64-bit aligned before we start to save iWMMXt registers,
13108 and also before we start to create locals. (A local variable
13109 might be a double or long long which we will load/store using
13110 an iWMMXt instruction). Therefore we need to push another
13111 ARM register, so that the stack will be 64-bit aligned. We
13112 try to avoid using the arg registers (r0 -r3) as they might be
13113 used to pass values in a tail call. */
13114 for (reg = 4; reg <= 12; reg++)
13115 if ((save_reg_mask & (1 << reg)) == 0)
13116 break;
13118 if (reg <= 12)
13119 save_reg_mask |= (1 << reg);
13120 else
13122 cfun->machine->sibcall_blocked = 1;
13123 save_reg_mask |= (1 << 3);
13127 /* We may need to push an additional register for use initializing the
13128 PIC base register. */
13129 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13130 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13132 reg = thumb_find_work_register (1 << 4);
13133 if (!call_used_regs[reg])
13134 save_reg_mask |= (1 << reg);
13137 return save_reg_mask;
13141 /* Compute a bit mask of which registers need to be
13142 saved on the stack for the current function. */
13143 static unsigned long
13144 thumb1_compute_save_reg_mask (void)
13146 unsigned long mask;
13147 unsigned reg;
13149 mask = 0;
13150 for (reg = 0; reg < 12; reg ++)
13151 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13152 mask |= 1 << reg;
13154 if (flag_pic
13155 && !TARGET_SINGLE_PIC_BASE
13156 && arm_pic_register != INVALID_REGNUM
13157 && crtl->uses_pic_offset_table)
13158 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13160 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13161 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13162 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13164 /* LR will also be pushed if any lo regs are pushed. */
13165 if (mask & 0xff || thumb_force_lr_save ())
13166 mask |= (1 << LR_REGNUM);
13168 /* Make sure we have a low work register if we need one.
13169 We will need one if we are going to push a high register,
13170 but we are not currently intending to push a low register. */
13171 if ((mask & 0xff) == 0
13172 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13174 /* Use thumb_find_work_register to choose which register
13175 we will use. If the register is live then we will
13176 have to push it. Use LAST_LO_REGNUM as our fallback
13177 choice for the register to select. */
13178 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13179 /* Make sure the register returned by thumb_find_work_register is
13180 not part of the return value. */
13181 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13182 reg = LAST_LO_REGNUM;
13184 if (! call_used_regs[reg])
13185 mask |= 1 << reg;
13188 /* The 504 below is 8 bytes less than 512 because there are two possible
13189 alignment words. We can't tell here if they will be present or not so we
13190 have to play it safe and assume that they are. */
13191 if ((CALLER_INTERWORKING_SLOT_SIZE +
13192 ROUND_UP_WORD (get_frame_size ()) +
13193 crtl->outgoing_args_size) >= 504)
13195 /* This is the same as the code in thumb1_expand_prologue() which
13196 determines which register to use for stack decrement. */
13197 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13198 if (mask & (1 << reg))
13199 break;
13201 if (reg > LAST_LO_REGNUM)
13203 /* Make sure we have a register available for stack decrement. */
13204 mask |= 1 << LAST_LO_REGNUM;
13208 return mask;
13212 /* Return the number of bytes required to save VFP registers. */
13213 static int
13214 arm_get_vfp_saved_size (void)
13216 unsigned int regno;
13217 int count;
13218 int saved;
13220 saved = 0;
13221 /* Space for saved VFP registers. */
13222 if (TARGET_HARD_FLOAT && TARGET_VFP)
13224 count = 0;
13225 for (regno = FIRST_VFP_REGNUM;
13226 regno < LAST_VFP_REGNUM;
13227 regno += 2)
13229 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13230 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13232 if (count > 0)
13234 /* Workaround ARM10 VFPr1 bug. */
13235 if (count == 2 && !arm_arch6)
13236 count++;
13237 saved += count * 8;
13239 count = 0;
13241 else
13242 count++;
13244 if (count > 0)
13246 if (count == 2 && !arm_arch6)
13247 count++;
13248 saved += count * 8;
13251 return saved;
13255 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13256 everything bar the final return instruction. */
13257 const char *
13258 output_return_instruction (rtx operand, int really_return, int reverse)
13260 char conditional[10];
13261 char instr[100];
13262 unsigned reg;
13263 unsigned long live_regs_mask;
13264 unsigned long func_type;
13265 arm_stack_offsets *offsets;
13267 func_type = arm_current_func_type ();
13269 if (IS_NAKED (func_type))
13270 return "";
13272 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13274 /* If this function was declared non-returning, and we have
13275 found a tail call, then we have to trust that the called
13276 function won't return. */
13277 if (really_return)
13279 rtx ops[2];
13281 /* Otherwise, trap an attempted return by aborting. */
13282 ops[0] = operand;
13283 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13284 : "abort");
13285 assemble_external_libcall (ops[1]);
13286 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13289 return "";
13292 gcc_assert (!cfun->calls_alloca || really_return);
13294 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13296 cfun->machine->return_used_this_function = 1;
13298 offsets = arm_get_frame_offsets ();
13299 live_regs_mask = offsets->saved_regs_mask;
13301 if (live_regs_mask)
13303 const char * return_reg;
13305 /* If we do not have any special requirements for function exit
13306 (e.g. interworking) then we can load the return address
13307 directly into the PC. Otherwise we must load it into LR. */
13308 if (really_return
13309 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13310 return_reg = reg_names[PC_REGNUM];
13311 else
13312 return_reg = reg_names[LR_REGNUM];
13314 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13316 /* There are three possible reasons for the IP register
13317 being saved. 1) a stack frame was created, in which case
13318 IP contains the old stack pointer, or 2) an ISR routine
13319 corrupted it, or 3) it was saved to align the stack on
13320 iWMMXt. In case 1, restore IP into SP, otherwise just
13321 restore IP. */
13322 if (frame_pointer_needed)
13324 live_regs_mask &= ~ (1 << IP_REGNUM);
13325 live_regs_mask |= (1 << SP_REGNUM);
13327 else
13328 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13331 /* On some ARM architectures it is faster to use LDR rather than
13332 LDM to load a single register. On other architectures, the
13333 cost is the same. In 26 bit mode, or for exception handlers,
13334 we have to use LDM to load the PC so that the CPSR is also
13335 restored. */
13336 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13337 if (live_regs_mask == (1U << reg))
13338 break;
13340 if (reg <= LAST_ARM_REGNUM
13341 && (reg != LR_REGNUM
13342 || ! really_return
13343 || ! IS_INTERRUPT (func_type)))
13345 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13346 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13348 else
13350 char *p;
13351 int first = 1;
13353 /* Generate the load multiple instruction to restore the
13354 registers. Note we can get here, even if
13355 frame_pointer_needed is true, but only if sp already
13356 points to the base of the saved core registers. */
13357 if (live_regs_mask & (1 << SP_REGNUM))
13359 unsigned HOST_WIDE_INT stack_adjust;
13361 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13362 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13364 if (stack_adjust && arm_arch5 && TARGET_ARM)
13365 if (TARGET_UNIFIED_ASM)
13366 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13367 else
13368 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13369 else
13371 /* If we can't use ldmib (SA110 bug),
13372 then try to pop r3 instead. */
13373 if (stack_adjust)
13374 live_regs_mask |= 1 << 3;
13376 if (TARGET_UNIFIED_ASM)
13377 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13378 else
13379 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13382 else
13383 if (TARGET_UNIFIED_ASM)
13384 sprintf (instr, "pop%s\t{", conditional);
13385 else
13386 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13388 p = instr + strlen (instr);
13390 for (reg = 0; reg <= SP_REGNUM; reg++)
13391 if (live_regs_mask & (1 << reg))
13393 int l = strlen (reg_names[reg]);
13395 if (first)
13396 first = 0;
13397 else
13399 memcpy (p, ", ", 2);
13400 p += 2;
13403 memcpy (p, "%|", 2);
13404 memcpy (p + 2, reg_names[reg], l);
13405 p += l + 2;
13408 if (live_regs_mask & (1 << LR_REGNUM))
13410 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13411 /* If returning from an interrupt, restore the CPSR. */
13412 if (IS_INTERRUPT (func_type))
13413 strcat (p, "^");
13415 else
13416 strcpy (p, "}");
13419 output_asm_insn (instr, & operand);
13421 /* See if we need to generate an extra instruction to
13422 perform the actual function return. */
13423 if (really_return
13424 && func_type != ARM_FT_INTERWORKED
13425 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13427 /* The return has already been handled
13428 by loading the LR into the PC. */
13429 really_return = 0;
13433 if (really_return)
13435 switch ((int) ARM_FUNC_TYPE (func_type))
13437 case ARM_FT_ISR:
13438 case ARM_FT_FIQ:
13439 /* ??? This is wrong for unified assembly syntax. */
13440 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13441 break;
13443 case ARM_FT_INTERWORKED:
13444 sprintf (instr, "bx%s\t%%|lr", conditional);
13445 break;
13447 case ARM_FT_EXCEPTION:
13448 /* ??? This is wrong for unified assembly syntax. */
13449 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13450 break;
13452 default:
13453 /* Use bx if it's available. */
13454 if (arm_arch5 || arm_arch4t)
13455 sprintf (instr, "bx%s\t%%|lr", conditional);
13456 else
13457 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13458 break;
13461 output_asm_insn (instr, & operand);
13464 return "";
13467 /* Write the function name into the code section, directly preceding
13468 the function prologue.
13470 Code will be output similar to this:
13472 .ascii "arm_poke_function_name", 0
13473 .align
13475 .word 0xff000000 + (t1 - t0)
13476 arm_poke_function_name
13477 mov ip, sp
13478 stmfd sp!, {fp, ip, lr, pc}
13479 sub fp, ip, #4
13481 When performing a stack backtrace, code can inspect the value
13482 of 'pc' stored at 'fp' + 0. If the trace function then looks
13483 at location pc - 12 and the top 8 bits are set, then we know
13484 that there is a function name embedded immediately preceding this
13485 location and has length ((pc[-3]) & 0xff000000).
13487 We assume that pc is declared as a pointer to an unsigned long.
13489 It is of no benefit to output the function name if we are assembling
13490 a leaf function. These function types will not contain a stack
13491 backtrace structure, therefore it is not possible to determine the
13492 function name. */
13493 void
13494 arm_poke_function_name (FILE *stream, const char *name)
13496 unsigned long alignlength;
13497 unsigned long length;
13498 rtx x;
13500 length = strlen (name) + 1;
13501 alignlength = ROUND_UP_WORD (length);
13503 ASM_OUTPUT_ASCII (stream, name, length);
13504 ASM_OUTPUT_ALIGN (stream, 2);
13505 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13506 assemble_aligned_integer (UNITS_PER_WORD, x);
13509 /* Place some comments into the assembler stream
13510 describing the current function. */
13511 static void
13512 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13514 unsigned long func_type;
13516 if (TARGET_THUMB1)
13518 thumb1_output_function_prologue (f, frame_size);
13519 return;
13522 /* Sanity check. */
13523 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13525 func_type = arm_current_func_type ();
13527 switch ((int) ARM_FUNC_TYPE (func_type))
13529 default:
13530 case ARM_FT_NORMAL:
13531 break;
13532 case ARM_FT_INTERWORKED:
13533 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13534 break;
13535 case ARM_FT_ISR:
13536 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13537 break;
13538 case ARM_FT_FIQ:
13539 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13540 break;
13541 case ARM_FT_EXCEPTION:
13542 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13543 break;
13546 if (IS_NAKED (func_type))
13547 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13549 if (IS_VOLATILE (func_type))
13550 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13552 if (IS_NESTED (func_type))
13553 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13554 if (IS_STACKALIGN (func_type))
13555 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13557 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13558 crtl->args.size,
13559 crtl->args.pretend_args_size, frame_size);
13561 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13562 frame_pointer_needed,
13563 cfun->machine->uses_anonymous_args);
13565 if (cfun->machine->lr_save_eliminated)
13566 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13568 if (crtl->calls_eh_return)
13569 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13573 const char *
13574 arm_output_epilogue (rtx sibling)
13576 int reg;
13577 unsigned long saved_regs_mask;
13578 unsigned long func_type;
13579 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13580 frame that is $fp + 4 for a non-variadic function. */
13581 int floats_offset = 0;
13582 rtx operands[3];
13583 FILE * f = asm_out_file;
13584 unsigned int lrm_count = 0;
13585 int really_return = (sibling == NULL);
13586 int start_reg;
13587 arm_stack_offsets *offsets;
13589 /* If we have already generated the return instruction
13590 then it is futile to generate anything else. */
13591 if (use_return_insn (FALSE, sibling) &&
13592 (cfun->machine->return_used_this_function != 0))
13593 return "";
13595 func_type = arm_current_func_type ();
13597 if (IS_NAKED (func_type))
13598 /* Naked functions don't have epilogues. */
13599 return "";
13601 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13603 rtx op;
13605 /* A volatile function should never return. Call abort. */
13606 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13607 assemble_external_libcall (op);
13608 output_asm_insn ("bl\t%a0", &op);
13610 return "";
13613 /* If we are throwing an exception, then we really must be doing a
13614 return, so we can't tail-call. */
13615 gcc_assert (!crtl->calls_eh_return || really_return);
13617 offsets = arm_get_frame_offsets ();
13618 saved_regs_mask = offsets->saved_regs_mask;
13620 if (TARGET_IWMMXT)
13621 lrm_count = bit_count (saved_regs_mask);
13623 floats_offset = offsets->saved_args;
13624 /* Compute how far away the floats will be. */
13625 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13626 if (saved_regs_mask & (1 << reg))
13627 floats_offset += 4;
13629 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13631 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13632 int vfp_offset = offsets->frame;
13634 if (TARGET_FPA_EMU2)
13636 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13637 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13639 floats_offset += 12;
13640 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13641 reg, FP_REGNUM, floats_offset - vfp_offset);
13644 else
13646 start_reg = LAST_FPA_REGNUM;
13648 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13650 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13652 floats_offset += 12;
13654 /* We can't unstack more than four registers at once. */
13655 if (start_reg - reg == 3)
13657 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13658 reg, FP_REGNUM, floats_offset - vfp_offset);
13659 start_reg = reg - 1;
13662 else
13664 if (reg != start_reg)
13665 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13666 reg + 1, start_reg - reg,
13667 FP_REGNUM, floats_offset - vfp_offset);
13668 start_reg = reg - 1;
13672 /* Just in case the last register checked also needs unstacking. */
13673 if (reg != start_reg)
13674 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13675 reg + 1, start_reg - reg,
13676 FP_REGNUM, floats_offset - vfp_offset);
13679 if (TARGET_HARD_FLOAT && TARGET_VFP)
13681 int saved_size;
13683 /* The fldmd insns do not have base+offset addressing
13684 modes, so we use IP to hold the address. */
13685 saved_size = arm_get_vfp_saved_size ();
13687 if (saved_size > 0)
13689 floats_offset += saved_size;
13690 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13691 FP_REGNUM, floats_offset - vfp_offset);
13693 start_reg = FIRST_VFP_REGNUM;
13694 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13696 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13697 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13699 if (start_reg != reg)
13700 vfp_output_fldmd (f, IP_REGNUM,
13701 (start_reg - FIRST_VFP_REGNUM) / 2,
13702 (reg - start_reg) / 2);
13703 start_reg = reg + 2;
13706 if (start_reg != reg)
13707 vfp_output_fldmd (f, IP_REGNUM,
13708 (start_reg - FIRST_VFP_REGNUM) / 2,
13709 (reg - start_reg) / 2);
13712 if (TARGET_IWMMXT)
13714 /* The frame pointer is guaranteed to be non-double-word aligned.
13715 This is because it is set to (old_stack_pointer - 4) and the
13716 old_stack_pointer was double word aligned. Thus the offset to
13717 the iWMMXt registers to be loaded must also be non-double-word
13718 sized, so that the resultant address *is* double-word aligned.
13719 We can ignore floats_offset since that was already included in
13720 the live_regs_mask. */
13721 lrm_count += (lrm_count % 2 ? 2 : 1);
13723 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13724 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13726 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13727 reg, FP_REGNUM, lrm_count * 4);
13728 lrm_count += 2;
13732 /* saved_regs_mask should contain the IP, which at the time of stack
13733 frame generation actually contains the old stack pointer. So a
13734 quick way to unwind the stack is just pop the IP register directly
13735 into the stack pointer. */
13736 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13737 saved_regs_mask &= ~ (1 << IP_REGNUM);
13738 saved_regs_mask |= (1 << SP_REGNUM);
13740 /* There are two registers left in saved_regs_mask - LR and PC. We
13741 only need to restore the LR register (the return address), but to
13742 save time we can load it directly into the PC, unless we need a
13743 special function exit sequence, or we are not really returning. */
13744 if (really_return
13745 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13746 && !crtl->calls_eh_return)
13747 /* Delete the LR from the register mask, so that the LR on
13748 the stack is loaded into the PC in the register mask. */
13749 saved_regs_mask &= ~ (1 << LR_REGNUM);
13750 else
13751 saved_regs_mask &= ~ (1 << PC_REGNUM);
13753 /* We must use SP as the base register, because SP is one of the
13754 registers being restored. If an interrupt or page fault
13755 happens in the ldm instruction, the SP might or might not
13756 have been restored. That would be bad, as then SP will no
13757 longer indicate the safe area of stack, and we can get stack
13758 corruption. Using SP as the base register means that it will
13759 be reset correctly to the original value, should an interrupt
13760 occur. If the stack pointer already points at the right
13761 place, then omit the subtraction. */
13762 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13763 || cfun->calls_alloca)
13764 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13765 4 * bit_count (saved_regs_mask));
13766 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13768 if (IS_INTERRUPT (func_type))
13769 /* Interrupt handlers will have pushed the
13770 IP onto the stack, so restore it now. */
13771 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13773 else
13775 /* This branch is executed for ARM mode (non-apcs frames) and
13776 Thumb-2 mode. Frame layout is essentially the same for those
13777 cases, except that in ARM mode frame pointer points to the
13778 first saved register, while in Thumb-2 mode the frame pointer points
13779 to the last saved register.
13781 It is possible to make frame pointer point to last saved
13782 register in both cases, and remove some conditionals below.
13783 That means that fp setup in prologue would be just "mov fp, sp"
13784 and sp restore in epilogue would be just "mov sp, fp", whereas
13785 now we have to use add/sub in those cases. However, the value
13786 of that would be marginal, as both mov and add/sub are 32-bit
13787 in ARM mode, and it would require extra conditionals
13788 in arm_expand_prologue to distingish ARM-apcs-frame case
13789 (where frame pointer is required to point at first register)
13790 and ARM-non-apcs-frame. Therefore, such change is postponed
13791 until real need arise. */
13792 unsigned HOST_WIDE_INT amount;
13793 int rfe;
13794 /* Restore stack pointer if necessary. */
13795 if (TARGET_ARM && frame_pointer_needed)
13797 operands[0] = stack_pointer_rtx;
13798 operands[1] = hard_frame_pointer_rtx;
13800 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13801 output_add_immediate (operands);
13803 else
13805 if (frame_pointer_needed)
13807 /* For Thumb-2 restore sp from the frame pointer.
13808 Operand restrictions mean we have to incrememnt FP, then copy
13809 to SP. */
13810 amount = offsets->locals_base - offsets->saved_regs;
13811 operands[0] = hard_frame_pointer_rtx;
13813 else
13815 unsigned long count;
13816 operands[0] = stack_pointer_rtx;
13817 amount = offsets->outgoing_args - offsets->saved_regs;
13818 /* pop call clobbered registers if it avoids a
13819 separate stack adjustment. */
13820 count = offsets->saved_regs - offsets->saved_args;
13821 if (optimize_size
13822 && count != 0
13823 && !crtl->calls_eh_return
13824 && bit_count(saved_regs_mask) * 4 == count
13825 && !IS_INTERRUPT (func_type)
13826 && !crtl->tail_call_emit)
13828 unsigned long mask;
13829 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13830 mask ^= 0xf;
13831 mask &= ~saved_regs_mask;
13832 reg = 0;
13833 while (bit_count (mask) * 4 > amount)
13835 while ((mask & (1 << reg)) == 0)
13836 reg++;
13837 mask &= ~(1 << reg);
13839 if (bit_count (mask) * 4 == amount) {
13840 amount = 0;
13841 saved_regs_mask |= mask;
13846 if (amount)
13848 operands[1] = operands[0];
13849 operands[2] = GEN_INT (amount);
13850 output_add_immediate (operands);
13852 if (frame_pointer_needed)
13853 asm_fprintf (f, "\tmov\t%r, %r\n",
13854 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13857 if (TARGET_FPA_EMU2)
13859 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13860 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13861 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13862 reg, SP_REGNUM);
13864 else
13866 start_reg = FIRST_FPA_REGNUM;
13868 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13870 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13872 if (reg - start_reg == 3)
13874 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13875 start_reg, SP_REGNUM);
13876 start_reg = reg + 1;
13879 else
13881 if (reg != start_reg)
13882 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13883 start_reg, reg - start_reg,
13884 SP_REGNUM);
13886 start_reg = reg + 1;
13890 /* Just in case the last register checked also needs unstacking. */
13891 if (reg != start_reg)
13892 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13893 start_reg, reg - start_reg, SP_REGNUM);
13896 if (TARGET_HARD_FLOAT && TARGET_VFP)
13898 int end_reg = LAST_VFP_REGNUM + 1;
13900 /* Scan the registers in reverse order. We need to match
13901 any groupings made in the prologue and generate matching
13902 pop operations. */
13903 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
13905 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13906 && (!df_regs_ever_live_p (reg + 1)
13907 || call_used_regs[reg + 1]))
13909 if (end_reg > reg + 2)
13910 vfp_output_fldmd (f, SP_REGNUM,
13911 (reg + 2 - FIRST_VFP_REGNUM) / 2,
13912 (end_reg - (reg + 2)) / 2);
13913 end_reg = reg;
13916 if (end_reg > reg + 2)
13917 vfp_output_fldmd (f, SP_REGNUM, 0,
13918 (end_reg - (reg + 2)) / 2);
13921 if (TARGET_IWMMXT)
13922 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13923 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13924 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13926 /* If we can, restore the LR into the PC. */
13927 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13928 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13929 && !IS_STACKALIGN (func_type)
13930 && really_return
13931 && crtl->args.pretend_args_size == 0
13932 && saved_regs_mask & (1 << LR_REGNUM)
13933 && !crtl->calls_eh_return)
13935 saved_regs_mask &= ~ (1 << LR_REGNUM);
13936 saved_regs_mask |= (1 << PC_REGNUM);
13937 rfe = IS_INTERRUPT (func_type);
13939 else
13940 rfe = 0;
13942 /* Load the registers off the stack. If we only have one register
13943 to load use the LDR instruction - it is faster. For Thumb-2
13944 always use pop and the assembler will pick the best instruction.*/
13945 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13946 && !IS_INTERRUPT(func_type))
13948 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13950 else if (saved_regs_mask)
13952 if (saved_regs_mask & (1 << SP_REGNUM))
13953 /* Note - write back to the stack register is not enabled
13954 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13955 in the list of registers and if we add writeback the
13956 instruction becomes UNPREDICTABLE. */
13957 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13958 rfe);
13959 else if (TARGET_ARM)
13960 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13961 rfe);
13962 else
13963 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13966 if (crtl->args.pretend_args_size)
13968 /* Unwind the pre-pushed regs. */
13969 operands[0] = operands[1] = stack_pointer_rtx;
13970 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13971 output_add_immediate (operands);
13975 /* We may have already restored PC directly from the stack. */
13976 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13977 return "";
13979 /* Stack adjustment for exception handler. */
13980 if (crtl->calls_eh_return)
13981 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13982 ARM_EH_STACKADJ_REGNUM);
13984 /* Generate the return instruction. */
13985 switch ((int) ARM_FUNC_TYPE (func_type))
13987 case ARM_FT_ISR:
13988 case ARM_FT_FIQ:
13989 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13990 break;
13992 case ARM_FT_EXCEPTION:
13993 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13994 break;
13996 case ARM_FT_INTERWORKED:
13997 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13998 break;
14000 default:
14001 if (IS_STACKALIGN (func_type))
14003 /* See comment in arm_expand_prologue. */
14004 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14006 if (arm_arch5 || arm_arch4t)
14007 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14008 else
14009 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14010 break;
14013 return "";
14016 static void
14017 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14018 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14020 arm_stack_offsets *offsets;
14022 if (TARGET_THUMB1)
14024 int regno;
14026 /* Emit any call-via-reg trampolines that are needed for v4t support
14027 of call_reg and call_value_reg type insns. */
14028 for (regno = 0; regno < LR_REGNUM; regno++)
14030 rtx label = cfun->machine->call_via[regno];
14032 if (label != NULL)
14034 switch_to_section (function_section (current_function_decl));
14035 targetm.asm_out.internal_label (asm_out_file, "L",
14036 CODE_LABEL_NUMBER (label));
14037 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14041 /* ??? Probably not safe to set this here, since it assumes that a
14042 function will be emitted as assembly immediately after we generate
14043 RTL for it. This does not happen for inline functions. */
14044 cfun->machine->return_used_this_function = 0;
14046 else /* TARGET_32BIT */
14048 /* We need to take into account any stack-frame rounding. */
14049 offsets = arm_get_frame_offsets ();
14051 gcc_assert (!use_return_insn (FALSE, NULL)
14052 || (cfun->machine->return_used_this_function != 0)
14053 || offsets->saved_regs == offsets->outgoing_args
14054 || frame_pointer_needed);
14056 /* Reset the ARM-specific per-function variables. */
14057 after_arm_reorg = 0;
14061 /* Generate and emit an insn that we will recognize as a push_multi.
14062 Unfortunately, since this insn does not reflect very well the actual
14063 semantics of the operation, we need to annotate the insn for the benefit
14064 of DWARF2 frame unwind information. */
14065 static rtx
14066 emit_multi_reg_push (unsigned long mask)
14068 int num_regs = 0;
14069 int num_dwarf_regs;
14070 int i, j;
14071 rtx par;
14072 rtx dwarf;
14073 int dwarf_par_index;
14074 rtx tmp, reg;
14076 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14077 if (mask & (1 << i))
14078 num_regs++;
14080 gcc_assert (num_regs && num_regs <= 16);
14082 /* We don't record the PC in the dwarf frame information. */
14083 num_dwarf_regs = num_regs;
14084 if (mask & (1 << PC_REGNUM))
14085 num_dwarf_regs--;
14087 /* For the body of the insn we are going to generate an UNSPEC in
14088 parallel with several USEs. This allows the insn to be recognized
14089 by the push_multi pattern in the arm.md file.
14091 The body of the insn looks something like this:
14093 (parallel [
14094 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14095 (const_int:SI <num>)))
14096 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14097 (use (reg:SI XX))
14098 (use (reg:SI YY))
14102 For the frame note however, we try to be more explicit and actually
14103 show each register being stored into the stack frame, plus a (single)
14104 decrement of the stack pointer. We do it this way in order to be
14105 friendly to the stack unwinding code, which only wants to see a single
14106 stack decrement per instruction. The RTL we generate for the note looks
14107 something like this:
14109 (sequence [
14110 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14111 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14112 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14113 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14117 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14118 instead we'd have a parallel expression detailing all
14119 the stores to the various memory addresses so that debug
14120 information is more up-to-date. Remember however while writing
14121 this to take care of the constraints with the push instruction.
14123 Note also that this has to be taken care of for the VFP registers.
14125 For more see PR43399. */
14127 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14128 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14129 dwarf_par_index = 1;
14131 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14133 if (mask & (1 << i))
14135 reg = gen_rtx_REG (SImode, i);
14137 XVECEXP (par, 0, 0)
14138 = gen_rtx_SET (VOIDmode,
14139 gen_frame_mem
14140 (BLKmode,
14141 gen_rtx_PRE_MODIFY (Pmode,
14142 stack_pointer_rtx,
14143 plus_constant
14144 (stack_pointer_rtx,
14145 -4 * num_regs))
14147 gen_rtx_UNSPEC (BLKmode,
14148 gen_rtvec (1, reg),
14149 UNSPEC_PUSH_MULT));
14151 if (i != PC_REGNUM)
14153 tmp = gen_rtx_SET (VOIDmode,
14154 gen_frame_mem (SImode, stack_pointer_rtx),
14155 reg);
14156 RTX_FRAME_RELATED_P (tmp) = 1;
14157 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14158 dwarf_par_index++;
14161 break;
14165 for (j = 1, i++; j < num_regs; i++)
14167 if (mask & (1 << i))
14169 reg = gen_rtx_REG (SImode, i);
14171 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14173 if (i != PC_REGNUM)
14176 = gen_rtx_SET (VOIDmode,
14177 gen_frame_mem
14178 (SImode,
14179 plus_constant (stack_pointer_rtx,
14180 4 * j)),
14181 reg);
14182 RTX_FRAME_RELATED_P (tmp) = 1;
14183 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14186 j++;
14190 par = emit_insn (par);
14192 tmp = gen_rtx_SET (VOIDmode,
14193 stack_pointer_rtx,
14194 plus_constant (stack_pointer_rtx, -4 * num_regs));
14195 RTX_FRAME_RELATED_P (tmp) = 1;
14196 XVECEXP (dwarf, 0, 0) = tmp;
14198 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14200 return par;
14203 /* Calculate the size of the return value that is passed in registers. */
14204 static unsigned
14205 arm_size_return_regs (void)
14207 enum machine_mode mode;
14209 if (crtl->return_rtx != 0)
14210 mode = GET_MODE (crtl->return_rtx);
14211 else
14212 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14214 return GET_MODE_SIZE (mode);
14217 static rtx
14218 emit_sfm (int base_reg, int count)
14220 rtx par;
14221 rtx dwarf;
14222 rtx tmp, reg;
14223 int i;
14225 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14226 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14228 reg = gen_rtx_REG (XFmode, base_reg++);
14230 XVECEXP (par, 0, 0)
14231 = gen_rtx_SET (VOIDmode,
14232 gen_frame_mem
14233 (BLKmode,
14234 gen_rtx_PRE_MODIFY (Pmode,
14235 stack_pointer_rtx,
14236 plus_constant
14237 (stack_pointer_rtx,
14238 -12 * count))
14240 gen_rtx_UNSPEC (BLKmode,
14241 gen_rtvec (1, reg),
14242 UNSPEC_PUSH_MULT));
14243 tmp = gen_rtx_SET (VOIDmode,
14244 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14245 RTX_FRAME_RELATED_P (tmp) = 1;
14246 XVECEXP (dwarf, 0, 1) = tmp;
14248 for (i = 1; i < count; i++)
14250 reg = gen_rtx_REG (XFmode, base_reg++);
14251 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14253 tmp = gen_rtx_SET (VOIDmode,
14254 gen_frame_mem (XFmode,
14255 plus_constant (stack_pointer_rtx,
14256 i * 12)),
14257 reg);
14258 RTX_FRAME_RELATED_P (tmp) = 1;
14259 XVECEXP (dwarf, 0, i + 1) = tmp;
14262 tmp = gen_rtx_SET (VOIDmode,
14263 stack_pointer_rtx,
14264 plus_constant (stack_pointer_rtx, -12 * count));
14266 RTX_FRAME_RELATED_P (tmp) = 1;
14267 XVECEXP (dwarf, 0, 0) = tmp;
14269 par = emit_insn (par);
14270 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14272 return par;
14276 /* Return true if the current function needs to save/restore LR. */
14278 static bool
14279 thumb_force_lr_save (void)
14281 return !cfun->machine->lr_save_eliminated
14282 && (!leaf_function_p ()
14283 || thumb_far_jump_used_p ()
14284 || df_regs_ever_live_p (LR_REGNUM));
14288 /* Compute the distance from register FROM to register TO.
14289 These can be the arg pointer (26), the soft frame pointer (25),
14290 the stack pointer (13) or the hard frame pointer (11).
14291 In thumb mode r7 is used as the soft frame pointer, if needed.
14292 Typical stack layout looks like this:
14294 old stack pointer -> | |
14295 ----
14296 | | \
14297 | | saved arguments for
14298 | | vararg functions
14299 | | /
14301 hard FP & arg pointer -> | | \
14302 | | stack
14303 | | frame
14304 | | /
14306 | | \
14307 | | call saved
14308 | | registers
14309 soft frame pointer -> | | /
14311 | | \
14312 | | local
14313 | | variables
14314 locals base pointer -> | | /
14316 | | \
14317 | | outgoing
14318 | | arguments
14319 current stack pointer -> | | /
14322 For a given function some or all of these stack components
14323 may not be needed, giving rise to the possibility of
14324 eliminating some of the registers.
14326 The values returned by this function must reflect the behavior
14327 of arm_expand_prologue() and arm_compute_save_reg_mask().
14329 The sign of the number returned reflects the direction of stack
14330 growth, so the values are positive for all eliminations except
14331 from the soft frame pointer to the hard frame pointer.
14333 SFP may point just inside the local variables block to ensure correct
14334 alignment. */
14337 /* Calculate stack offsets. These are used to calculate register elimination
14338 offsets and in prologue/epilogue code. Also calculates which registers
14339 should be saved. */
14341 static arm_stack_offsets *
14342 arm_get_frame_offsets (void)
14344 struct arm_stack_offsets *offsets;
14345 unsigned long func_type;
14346 int leaf;
14347 int saved;
14348 int core_saved;
14349 HOST_WIDE_INT frame_size;
14350 int i;
14352 offsets = &cfun->machine->stack_offsets;
14354 /* We need to know if we are a leaf function. Unfortunately, it
14355 is possible to be called after start_sequence has been called,
14356 which causes get_insns to return the insns for the sequence,
14357 not the function, which will cause leaf_function_p to return
14358 the incorrect result.
14360 to know about leaf functions once reload has completed, and the
14361 frame size cannot be changed after that time, so we can safely
14362 use the cached value. */
14364 if (reload_completed)
14365 return offsets;
14367 /* Initially this is the size of the local variables. It will translated
14368 into an offset once we have determined the size of preceding data. */
14369 frame_size = ROUND_UP_WORD (get_frame_size ());
14371 leaf = leaf_function_p ();
14373 /* Space for variadic functions. */
14374 offsets->saved_args = crtl->args.pretend_args_size;
14376 /* In Thumb mode this is incorrect, but never used. */
14377 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14378 arm_compute_static_chain_stack_bytes();
14380 if (TARGET_32BIT)
14382 unsigned int regno;
14384 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14385 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14386 saved = core_saved;
14388 /* We know that SP will be doubleword aligned on entry, and we must
14389 preserve that condition at any subroutine call. We also require the
14390 soft frame pointer to be doubleword aligned. */
14392 if (TARGET_REALLY_IWMMXT)
14394 /* Check for the call-saved iWMMXt registers. */
14395 for (regno = FIRST_IWMMXT_REGNUM;
14396 regno <= LAST_IWMMXT_REGNUM;
14397 regno++)
14398 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14399 saved += 8;
14402 func_type = arm_current_func_type ();
14403 if (! IS_VOLATILE (func_type))
14405 /* Space for saved FPA registers. */
14406 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14407 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14408 saved += 12;
14410 /* Space for saved VFP registers. */
14411 if (TARGET_HARD_FLOAT && TARGET_VFP)
14412 saved += arm_get_vfp_saved_size ();
14415 else /* TARGET_THUMB1 */
14417 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14418 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14419 saved = core_saved;
14420 if (TARGET_BACKTRACE)
14421 saved += 16;
14424 /* Saved registers include the stack frame. */
14425 offsets->saved_regs = offsets->saved_args + saved +
14426 arm_compute_static_chain_stack_bytes();
14427 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14428 /* A leaf function does not need any stack alignment if it has nothing
14429 on the stack. */
14430 if (leaf && frame_size == 0)
14432 offsets->outgoing_args = offsets->soft_frame;
14433 offsets->locals_base = offsets->soft_frame;
14434 return offsets;
14437 /* Ensure SFP has the correct alignment. */
14438 if (ARM_DOUBLEWORD_ALIGN
14439 && (offsets->soft_frame & 7))
14441 offsets->soft_frame += 4;
14442 /* Try to align stack by pushing an extra reg. Don't bother doing this
14443 when there is a stack frame as the alignment will be rolled into
14444 the normal stack adjustment. */
14445 if (frame_size + crtl->outgoing_args_size == 0)
14447 int reg = -1;
14449 /* If it is safe to use r3, then do so. This sometimes
14450 generates better code on Thumb-2 by avoiding the need to
14451 use 32-bit push/pop instructions. */
14452 if (!crtl->tail_call_emit
14453 && arm_size_return_regs () <= 12)
14455 reg = 3;
14457 else
14458 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14460 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14462 reg = i;
14463 break;
14467 if (reg != -1)
14469 offsets->saved_regs += 4;
14470 offsets->saved_regs_mask |= (1 << reg);
14475 offsets->locals_base = offsets->soft_frame + frame_size;
14476 offsets->outgoing_args = (offsets->locals_base
14477 + crtl->outgoing_args_size);
14479 if (ARM_DOUBLEWORD_ALIGN)
14481 /* Ensure SP remains doubleword aligned. */
14482 if (offsets->outgoing_args & 7)
14483 offsets->outgoing_args += 4;
14484 gcc_assert (!(offsets->outgoing_args & 7));
14487 return offsets;
14491 /* Calculate the relative offsets for the different stack pointers. Positive
14492 offsets are in the direction of stack growth. */
14494 HOST_WIDE_INT
14495 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14497 arm_stack_offsets *offsets;
14499 offsets = arm_get_frame_offsets ();
14501 /* OK, now we have enough information to compute the distances.
14502 There must be an entry in these switch tables for each pair
14503 of registers in ELIMINABLE_REGS, even if some of the entries
14504 seem to be redundant or useless. */
14505 switch (from)
14507 case ARG_POINTER_REGNUM:
14508 switch (to)
14510 case THUMB_HARD_FRAME_POINTER_REGNUM:
14511 return 0;
14513 case FRAME_POINTER_REGNUM:
14514 /* This is the reverse of the soft frame pointer
14515 to hard frame pointer elimination below. */
14516 return offsets->soft_frame - offsets->saved_args;
14518 case ARM_HARD_FRAME_POINTER_REGNUM:
14519 /* This is only non-zero in the case where the static chain register
14520 is stored above the frame. */
14521 return offsets->frame - offsets->saved_args - 4;
14523 case STACK_POINTER_REGNUM:
14524 /* If nothing has been pushed on the stack at all
14525 then this will return -4. This *is* correct! */
14526 return offsets->outgoing_args - (offsets->saved_args + 4);
14528 default:
14529 gcc_unreachable ();
14531 gcc_unreachable ();
14533 case FRAME_POINTER_REGNUM:
14534 switch (to)
14536 case THUMB_HARD_FRAME_POINTER_REGNUM:
14537 return 0;
14539 case ARM_HARD_FRAME_POINTER_REGNUM:
14540 /* The hard frame pointer points to the top entry in the
14541 stack frame. The soft frame pointer to the bottom entry
14542 in the stack frame. If there is no stack frame at all,
14543 then they are identical. */
14545 return offsets->frame - offsets->soft_frame;
14547 case STACK_POINTER_REGNUM:
14548 return offsets->outgoing_args - offsets->soft_frame;
14550 default:
14551 gcc_unreachable ();
14553 gcc_unreachable ();
14555 default:
14556 /* You cannot eliminate from the stack pointer.
14557 In theory you could eliminate from the hard frame
14558 pointer to the stack pointer, but this will never
14559 happen, since if a stack frame is not needed the
14560 hard frame pointer will never be used. */
14561 gcc_unreachable ();
14565 /* Given FROM and TO register numbers, say whether this elimination is
14566 allowed. Frame pointer elimination is automatically handled.
14568 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14569 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14570 pointer, we must eliminate FRAME_POINTER_REGNUM into
14571 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14572 ARG_POINTER_REGNUM. */
14574 bool
14575 arm_can_eliminate (const int from, const int to)
14577 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14578 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14579 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14580 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14581 true);
14584 /* Emit RTL to save coprocessor registers on function entry. Returns the
14585 number of bytes pushed. */
14587 static int
14588 arm_save_coproc_regs(void)
14590 int saved_size = 0;
14591 unsigned reg;
14592 unsigned start_reg;
14593 rtx insn;
14595 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14596 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14598 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14599 insn = gen_rtx_MEM (V2SImode, insn);
14600 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14601 RTX_FRAME_RELATED_P (insn) = 1;
14602 saved_size += 8;
14605 /* Save any floating point call-saved registers used by this
14606 function. */
14607 if (TARGET_FPA_EMU2)
14609 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14610 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14612 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14613 insn = gen_rtx_MEM (XFmode, insn);
14614 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14615 RTX_FRAME_RELATED_P (insn) = 1;
14616 saved_size += 12;
14619 else
14621 start_reg = LAST_FPA_REGNUM;
14623 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14625 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14627 if (start_reg - reg == 3)
14629 insn = emit_sfm (reg, 4);
14630 RTX_FRAME_RELATED_P (insn) = 1;
14631 saved_size += 48;
14632 start_reg = reg - 1;
14635 else
14637 if (start_reg != reg)
14639 insn = emit_sfm (reg + 1, start_reg - reg);
14640 RTX_FRAME_RELATED_P (insn) = 1;
14641 saved_size += (start_reg - reg) * 12;
14643 start_reg = reg - 1;
14647 if (start_reg != reg)
14649 insn = emit_sfm (reg + 1, start_reg - reg);
14650 saved_size += (start_reg - reg) * 12;
14651 RTX_FRAME_RELATED_P (insn) = 1;
14654 if (TARGET_HARD_FLOAT && TARGET_VFP)
14656 start_reg = FIRST_VFP_REGNUM;
14658 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14660 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14661 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14663 if (start_reg != reg)
14664 saved_size += vfp_emit_fstmd (start_reg,
14665 (reg - start_reg) / 2);
14666 start_reg = reg + 2;
14669 if (start_reg != reg)
14670 saved_size += vfp_emit_fstmd (start_reg,
14671 (reg - start_reg) / 2);
14673 return saved_size;
14677 /* Set the Thumb frame pointer from the stack pointer. */
14679 static void
14680 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14682 HOST_WIDE_INT amount;
14683 rtx insn, dwarf;
14685 amount = offsets->outgoing_args - offsets->locals_base;
14686 if (amount < 1024)
14687 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14688 stack_pointer_rtx, GEN_INT (amount)));
14689 else
14691 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14692 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14693 expects the first two operands to be the same. */
14694 if (TARGET_THUMB2)
14696 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14697 stack_pointer_rtx,
14698 hard_frame_pointer_rtx));
14700 else
14702 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14703 hard_frame_pointer_rtx,
14704 stack_pointer_rtx));
14706 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14707 plus_constant (stack_pointer_rtx, amount));
14708 RTX_FRAME_RELATED_P (dwarf) = 1;
14709 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14712 RTX_FRAME_RELATED_P (insn) = 1;
14715 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14716 function. */
14717 void
14718 arm_expand_prologue (void)
14720 rtx amount;
14721 rtx insn;
14722 rtx ip_rtx;
14723 unsigned long live_regs_mask;
14724 unsigned long func_type;
14725 int fp_offset = 0;
14726 int saved_pretend_args = 0;
14727 int saved_regs = 0;
14728 unsigned HOST_WIDE_INT args_to_push;
14729 arm_stack_offsets *offsets;
14731 func_type = arm_current_func_type ();
14733 /* Naked functions don't have prologues. */
14734 if (IS_NAKED (func_type))
14735 return;
14737 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14738 args_to_push = crtl->args.pretend_args_size;
14740 /* Compute which register we will have to save onto the stack. */
14741 offsets = arm_get_frame_offsets ();
14742 live_regs_mask = offsets->saved_regs_mask;
14744 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14746 if (IS_STACKALIGN (func_type))
14748 rtx dwarf;
14749 rtx r0;
14750 rtx r1;
14751 /* Handle a word-aligned stack pointer. We generate the following:
14753 mov r0, sp
14754 bic r1, r0, #7
14755 mov sp, r1
14756 <save and restore r0 in normal prologue/epilogue>
14757 mov sp, r0
14758 bx lr
14760 The unwinder doesn't need to know about the stack realignment.
14761 Just tell it we saved SP in r0. */
14762 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14764 r0 = gen_rtx_REG (SImode, 0);
14765 r1 = gen_rtx_REG (SImode, 1);
14766 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14767 compiler won't choke. */
14768 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14769 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14770 insn = gen_movsi (r0, stack_pointer_rtx);
14771 RTX_FRAME_RELATED_P (insn) = 1;
14772 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14773 emit_insn (insn);
14774 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14775 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14778 /* For APCS frames, if IP register is clobbered
14779 when creating frame, save that register in a special
14780 way. */
14781 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14783 if (IS_INTERRUPT (func_type))
14785 /* Interrupt functions must not corrupt any registers.
14786 Creating a frame pointer however, corrupts the IP
14787 register, so we must push it first. */
14788 insn = emit_multi_reg_push (1 << IP_REGNUM);
14790 /* Do not set RTX_FRAME_RELATED_P on this insn.
14791 The dwarf stack unwinding code only wants to see one
14792 stack decrement per function, and this is not it. If
14793 this instruction is labeled as being part of the frame
14794 creation sequence then dwarf2out_frame_debug_expr will
14795 die when it encounters the assignment of IP to FP
14796 later on, since the use of SP here establishes SP as
14797 the CFA register and not IP.
14799 Anyway this instruction is not really part of the stack
14800 frame creation although it is part of the prologue. */
14802 else if (IS_NESTED (func_type))
14804 /* The Static chain register is the same as the IP register
14805 used as a scratch register during stack frame creation.
14806 To get around this need to find somewhere to store IP
14807 whilst the frame is being created. We try the following
14808 places in order:
14810 1. The last argument register.
14811 2. A slot on the stack above the frame. (This only
14812 works if the function is not a varargs function).
14813 3. Register r3, after pushing the argument registers
14814 onto the stack.
14816 Note - we only need to tell the dwarf2 backend about the SP
14817 adjustment in the second variant; the static chain register
14818 doesn't need to be unwound, as it doesn't contain a value
14819 inherited from the caller. */
14821 if (df_regs_ever_live_p (3) == false)
14822 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14823 else if (args_to_push == 0)
14825 rtx dwarf;
14827 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14828 saved_regs += 4;
14830 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14831 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14832 fp_offset = 4;
14834 /* Just tell the dwarf backend that we adjusted SP. */
14835 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14836 plus_constant (stack_pointer_rtx,
14837 -fp_offset));
14838 RTX_FRAME_RELATED_P (insn) = 1;
14839 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14841 else
14843 /* Store the args on the stack. */
14844 if (cfun->machine->uses_anonymous_args)
14845 insn = emit_multi_reg_push
14846 ((0xf0 >> (args_to_push / 4)) & 0xf);
14847 else
14848 insn = emit_insn
14849 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14850 GEN_INT (- args_to_push)));
14852 RTX_FRAME_RELATED_P (insn) = 1;
14854 saved_pretend_args = 1;
14855 fp_offset = args_to_push;
14856 args_to_push = 0;
14858 /* Now reuse r3 to preserve IP. */
14859 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14863 insn = emit_set_insn (ip_rtx,
14864 plus_constant (stack_pointer_rtx, fp_offset));
14865 RTX_FRAME_RELATED_P (insn) = 1;
14868 if (args_to_push)
14870 /* Push the argument registers, or reserve space for them. */
14871 if (cfun->machine->uses_anonymous_args)
14872 insn = emit_multi_reg_push
14873 ((0xf0 >> (args_to_push / 4)) & 0xf);
14874 else
14875 insn = emit_insn
14876 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14877 GEN_INT (- args_to_push)));
14878 RTX_FRAME_RELATED_P (insn) = 1;
14881 /* If this is an interrupt service routine, and the link register
14882 is going to be pushed, and we're not generating extra
14883 push of IP (needed when frame is needed and frame layout if apcs),
14884 subtracting four from LR now will mean that the function return
14885 can be done with a single instruction. */
14886 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14887 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14888 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14889 && TARGET_ARM)
14891 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14893 emit_set_insn (lr, plus_constant (lr, -4));
14896 if (live_regs_mask)
14898 saved_regs += bit_count (live_regs_mask) * 4;
14899 if (optimize_size && !frame_pointer_needed
14900 && saved_regs == offsets->saved_regs - offsets->saved_args)
14902 /* If no coprocessor registers are being pushed and we don't have
14903 to worry about a frame pointer then push extra registers to
14904 create the stack frame. This is done is a way that does not
14905 alter the frame layout, so is independent of the epilogue. */
14906 int n;
14907 int frame;
14908 n = 0;
14909 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14910 n++;
14911 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14912 if (frame && n * 4 >= frame)
14914 n = frame / 4;
14915 live_regs_mask |= (1 << n) - 1;
14916 saved_regs += frame;
14919 insn = emit_multi_reg_push (live_regs_mask);
14920 RTX_FRAME_RELATED_P (insn) = 1;
14923 if (! IS_VOLATILE (func_type))
14924 saved_regs += arm_save_coproc_regs ();
14926 if (frame_pointer_needed && TARGET_ARM)
14928 /* Create the new frame pointer. */
14929 if (TARGET_APCS_FRAME)
14931 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14932 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14933 RTX_FRAME_RELATED_P (insn) = 1;
14935 if (IS_NESTED (func_type))
14937 /* Recover the static chain register. */
14938 if (!df_regs_ever_live_p (3)
14939 || saved_pretend_args)
14940 insn = gen_rtx_REG (SImode, 3);
14941 else /* if (crtl->args.pretend_args_size == 0) */
14943 insn = plus_constant (hard_frame_pointer_rtx, 4);
14944 insn = gen_frame_mem (SImode, insn);
14946 emit_set_insn (ip_rtx, insn);
14947 /* Add a USE to stop propagate_one_insn() from barfing. */
14948 emit_insn (gen_prologue_use (ip_rtx));
14951 else
14953 insn = GEN_INT (saved_regs - 4);
14954 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14955 stack_pointer_rtx, insn));
14956 RTX_FRAME_RELATED_P (insn) = 1;
14960 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14962 /* This add can produce multiple insns for a large constant, so we
14963 need to get tricky. */
14964 rtx last = get_last_insn ();
14966 amount = GEN_INT (offsets->saved_args + saved_regs
14967 - offsets->outgoing_args);
14969 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14970 amount));
14973 last = last ? NEXT_INSN (last) : get_insns ();
14974 RTX_FRAME_RELATED_P (last) = 1;
14976 while (last != insn);
14978 /* If the frame pointer is needed, emit a special barrier that
14979 will prevent the scheduler from moving stores to the frame
14980 before the stack adjustment. */
14981 if (frame_pointer_needed)
14982 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14983 hard_frame_pointer_rtx));
14987 if (frame_pointer_needed && TARGET_THUMB2)
14988 thumb_set_frame_pointer (offsets);
14990 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14992 unsigned long mask;
14994 mask = live_regs_mask;
14995 mask &= THUMB2_WORK_REGS;
14996 if (!IS_NESTED (func_type))
14997 mask |= (1 << IP_REGNUM);
14998 arm_load_pic_register (mask);
15001 /* If we are profiling, make sure no instructions are scheduled before
15002 the call to mcount. Similarly if the user has requested no
15003 scheduling in the prolog. Similarly if we want non-call exceptions
15004 using the EABI unwinder, to prevent faulting instructions from being
15005 swapped with a stack adjustment. */
15006 if (crtl->profile || !TARGET_SCHED_PROLOG
15007 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
15008 emit_insn (gen_blockage ());
15010 /* If the link register is being kept alive, with the return address in it,
15011 then make sure that it does not get reused by the ce2 pass. */
15012 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15013 cfun->machine->lr_save_eliminated = 1;
15016 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15017 static void
15018 arm_print_condition (FILE *stream)
15020 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15022 /* Branch conversion is not implemented for Thumb-2. */
15023 if (TARGET_THUMB)
15025 output_operand_lossage ("predicated Thumb instruction");
15026 return;
15028 if (current_insn_predicate != NULL)
15030 output_operand_lossage
15031 ("predicated instruction in conditional sequence");
15032 return;
15035 fputs (arm_condition_codes[arm_current_cc], stream);
15037 else if (current_insn_predicate)
15039 enum arm_cond_code code;
15041 if (TARGET_THUMB1)
15043 output_operand_lossage ("predicated Thumb instruction");
15044 return;
15047 code = get_arm_condition_code (current_insn_predicate);
15048 fputs (arm_condition_codes[code], stream);
15053 /* If CODE is 'd', then the X is a condition operand and the instruction
15054 should only be executed if the condition is true.
15055 if CODE is 'D', then the X is a condition operand and the instruction
15056 should only be executed if the condition is false: however, if the mode
15057 of the comparison is CCFPEmode, then always execute the instruction -- we
15058 do this because in these circumstances !GE does not necessarily imply LT;
15059 in these cases the instruction pattern will take care to make sure that
15060 an instruction containing %d will follow, thereby undoing the effects of
15061 doing this instruction unconditionally.
15062 If CODE is 'N' then X is a floating point operand that must be negated
15063 before output.
15064 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15065 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15066 void
15067 arm_print_operand (FILE *stream, rtx x, int code)
15069 switch (code)
15071 case '@':
15072 fputs (ASM_COMMENT_START, stream);
15073 return;
15075 case '_':
15076 fputs (user_label_prefix, stream);
15077 return;
15079 case '|':
15080 fputs (REGISTER_PREFIX, stream);
15081 return;
15083 case '?':
15084 arm_print_condition (stream);
15085 return;
15087 case '(':
15088 /* Nothing in unified syntax, otherwise the current condition code. */
15089 if (!TARGET_UNIFIED_ASM)
15090 arm_print_condition (stream);
15091 break;
15093 case ')':
15094 /* The current condition code in unified syntax, otherwise nothing. */
15095 if (TARGET_UNIFIED_ASM)
15096 arm_print_condition (stream);
15097 break;
15099 case '.':
15100 /* The current condition code for a condition code setting instruction.
15101 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15102 if (TARGET_UNIFIED_ASM)
15104 fputc('s', stream);
15105 arm_print_condition (stream);
15107 else
15109 arm_print_condition (stream);
15110 fputc('s', stream);
15112 return;
15114 case '!':
15115 /* If the instruction is conditionally executed then print
15116 the current condition code, otherwise print 's'. */
15117 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15118 if (current_insn_predicate)
15119 arm_print_condition (stream);
15120 else
15121 fputc('s', stream);
15122 break;
15124 /* %# is a "break" sequence. It doesn't output anything, but is used to
15125 separate e.g. operand numbers from following text, if that text consists
15126 of further digits which we don't want to be part of the operand
15127 number. */
15128 case '#':
15129 return;
15131 case 'N':
15133 REAL_VALUE_TYPE r;
15134 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15135 r = REAL_VALUE_NEGATE (r);
15136 fprintf (stream, "%s", fp_const_from_val (&r));
15138 return;
15140 /* An integer or symbol address without a preceding # sign. */
15141 case 'c':
15142 switch (GET_CODE (x))
15144 case CONST_INT:
15145 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15146 break;
15148 case SYMBOL_REF:
15149 output_addr_const (stream, x);
15150 break;
15152 default:
15153 gcc_unreachable ();
15155 return;
15157 case 'B':
15158 if (GET_CODE (x) == CONST_INT)
15160 HOST_WIDE_INT val;
15161 val = ARM_SIGN_EXTEND (~INTVAL (x));
15162 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15164 else
15166 putc ('~', stream);
15167 output_addr_const (stream, x);
15169 return;
15171 case 'L':
15172 /* The low 16 bits of an immediate constant. */
15173 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15174 return;
15176 case 'i':
15177 fprintf (stream, "%s", arithmetic_instr (x, 1));
15178 return;
15180 /* Truncate Cirrus shift counts. */
15181 case 's':
15182 if (GET_CODE (x) == CONST_INT)
15184 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15185 return;
15187 arm_print_operand (stream, x, 0);
15188 return;
15190 case 'I':
15191 fprintf (stream, "%s", arithmetic_instr (x, 0));
15192 return;
15194 case 'S':
15196 HOST_WIDE_INT val;
15197 const char *shift;
15199 if (!shift_operator (x, SImode))
15201 output_operand_lossage ("invalid shift operand");
15202 break;
15205 shift = shift_op (x, &val);
15207 if (shift)
15209 fprintf (stream, ", %s ", shift);
15210 if (val == -1)
15211 arm_print_operand (stream, XEXP (x, 1), 0);
15212 else
15213 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15216 return;
15218 /* An explanation of the 'Q', 'R' and 'H' register operands:
15220 In a pair of registers containing a DI or DF value the 'Q'
15221 operand returns the register number of the register containing
15222 the least significant part of the value. The 'R' operand returns
15223 the register number of the register containing the most
15224 significant part of the value.
15226 The 'H' operand returns the higher of the two register numbers.
15227 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15228 same as the 'Q' operand, since the most significant part of the
15229 value is held in the lower number register. The reverse is true
15230 on systems where WORDS_BIG_ENDIAN is false.
15232 The purpose of these operands is to distinguish between cases
15233 where the endian-ness of the values is important (for example
15234 when they are added together), and cases where the endian-ness
15235 is irrelevant, but the order of register operations is important.
15236 For example when loading a value from memory into a register
15237 pair, the endian-ness does not matter. Provided that the value
15238 from the lower memory address is put into the lower numbered
15239 register, and the value from the higher address is put into the
15240 higher numbered register, the load will work regardless of whether
15241 the value being loaded is big-wordian or little-wordian. The
15242 order of the two register loads can matter however, if the address
15243 of the memory location is actually held in one of the registers
15244 being overwritten by the load. */
15245 case 'Q':
15246 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15248 output_operand_lossage ("invalid operand for code '%c'", code);
15249 return;
15252 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15253 return;
15255 case 'R':
15256 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15258 output_operand_lossage ("invalid operand for code '%c'", code);
15259 return;
15262 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15263 return;
15265 case 'H':
15266 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15268 output_operand_lossage ("invalid operand for code '%c'", code);
15269 return;
15272 asm_fprintf (stream, "%r", REGNO (x) + 1);
15273 return;
15275 case 'J':
15276 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15278 output_operand_lossage ("invalid operand for code '%c'", code);
15279 return;
15282 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15283 return;
15285 case 'K':
15286 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15288 output_operand_lossage ("invalid operand for code '%c'", code);
15289 return;
15292 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15293 return;
15295 case 'm':
15296 asm_fprintf (stream, "%r",
15297 GET_CODE (XEXP (x, 0)) == REG
15298 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15299 return;
15301 case 'M':
15302 asm_fprintf (stream, "{%r-%r}",
15303 REGNO (x),
15304 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15305 return;
15307 /* Like 'M', but writing doubleword vector registers, for use by Neon
15308 insns. */
15309 case 'h':
15311 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15312 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15313 if (numregs == 1)
15314 asm_fprintf (stream, "{d%d}", regno);
15315 else
15316 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15318 return;
15320 case 'd':
15321 /* CONST_TRUE_RTX means always -- that's the default. */
15322 if (x == const_true_rtx)
15323 return;
15325 if (!COMPARISON_P (x))
15327 output_operand_lossage ("invalid operand for code '%c'", code);
15328 return;
15331 fputs (arm_condition_codes[get_arm_condition_code (x)],
15332 stream);
15333 return;
15335 case 'D':
15336 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15337 want to do that. */
15338 if (x == const_true_rtx)
15340 output_operand_lossage ("instruction never executed");
15341 return;
15343 if (!COMPARISON_P (x))
15345 output_operand_lossage ("invalid operand for code '%c'", code);
15346 return;
15349 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15350 (get_arm_condition_code (x))],
15351 stream);
15352 return;
15354 /* Cirrus registers can be accessed in a variety of ways:
15355 single floating point (f)
15356 double floating point (d)
15357 32bit integer (fx)
15358 64bit integer (dx). */
15359 case 'W': /* Cirrus register in F mode. */
15360 case 'X': /* Cirrus register in D mode. */
15361 case 'Y': /* Cirrus register in FX mode. */
15362 case 'Z': /* Cirrus register in DX mode. */
15363 gcc_assert (GET_CODE (x) == REG
15364 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15366 fprintf (stream, "mv%s%s",
15367 code == 'W' ? "f"
15368 : code == 'X' ? "d"
15369 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15371 return;
15373 /* Print cirrus register in the mode specified by the register's mode. */
15374 case 'V':
15376 int mode = GET_MODE (x);
15378 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15380 output_operand_lossage ("invalid operand for code '%c'", code);
15381 return;
15384 fprintf (stream, "mv%s%s",
15385 mode == DFmode ? "d"
15386 : mode == SImode ? "fx"
15387 : mode == DImode ? "dx"
15388 : "f", reg_names[REGNO (x)] + 2);
15390 return;
15393 case 'U':
15394 if (GET_CODE (x) != REG
15395 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15396 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15397 /* Bad value for wCG register number. */
15399 output_operand_lossage ("invalid operand for code '%c'", code);
15400 return;
15403 else
15404 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15405 return;
15407 /* Print an iWMMXt control register name. */
15408 case 'w':
15409 if (GET_CODE (x) != CONST_INT
15410 || INTVAL (x) < 0
15411 || INTVAL (x) >= 16)
15412 /* Bad value for wC register number. */
15414 output_operand_lossage ("invalid operand for code '%c'", code);
15415 return;
15418 else
15420 static const char * wc_reg_names [16] =
15422 "wCID", "wCon", "wCSSF", "wCASF",
15423 "wC4", "wC5", "wC6", "wC7",
15424 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15425 "wC12", "wC13", "wC14", "wC15"
15428 fprintf (stream, wc_reg_names [INTVAL (x)]);
15430 return;
15432 /* Print the high single-precision register of a VFP double-precision
15433 register. */
15434 case 'p':
15436 int mode = GET_MODE (x);
15437 int regno;
15439 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15441 output_operand_lossage ("invalid operand for code '%c'", code);
15442 return;
15445 regno = REGNO (x);
15446 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15448 output_operand_lossage ("invalid operand for code '%c'", code);
15449 return;
15452 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15454 return;
15456 /* Print a VFP/Neon double precision or quad precision register name. */
15457 case 'P':
15458 case 'q':
15460 int mode = GET_MODE (x);
15461 int is_quad = (code == 'q');
15462 int regno;
15464 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15466 output_operand_lossage ("invalid operand for code '%c'", code);
15467 return;
15470 if (GET_CODE (x) != REG
15471 || !IS_VFP_REGNUM (REGNO (x)))
15473 output_operand_lossage ("invalid operand for code '%c'", code);
15474 return;
15477 regno = REGNO (x);
15478 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15479 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15481 output_operand_lossage ("invalid operand for code '%c'", code);
15482 return;
15485 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15486 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15488 return;
15490 /* These two codes print the low/high doubleword register of a Neon quad
15491 register, respectively. For pair-structure types, can also print
15492 low/high quadword registers. */
15493 case 'e':
15494 case 'f':
15496 int mode = GET_MODE (x);
15497 int regno;
15499 if ((GET_MODE_SIZE (mode) != 16
15500 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15502 output_operand_lossage ("invalid operand for code '%c'", code);
15503 return;
15506 regno = REGNO (x);
15507 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15509 output_operand_lossage ("invalid operand for code '%c'", code);
15510 return;
15513 if (GET_MODE_SIZE (mode) == 16)
15514 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15515 + (code == 'f' ? 1 : 0));
15516 else
15517 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15518 + (code == 'f' ? 1 : 0));
15520 return;
15522 /* Print a VFPv3 floating-point constant, represented as an integer
15523 index. */
15524 case 'G':
15526 int index = vfp3_const_double_index (x);
15527 gcc_assert (index != -1);
15528 fprintf (stream, "%d", index);
15530 return;
15532 /* Print bits representing opcode features for Neon.
15534 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15535 and polynomials as unsigned.
15537 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15539 Bit 2 is 1 for rounding functions, 0 otherwise. */
15541 /* Identify the type as 's', 'u', 'p' or 'f'. */
15542 case 'T':
15544 HOST_WIDE_INT bits = INTVAL (x);
15545 fputc ("uspf"[bits & 3], stream);
15547 return;
15549 /* Likewise, but signed and unsigned integers are both 'i'. */
15550 case 'F':
15552 HOST_WIDE_INT bits = INTVAL (x);
15553 fputc ("iipf"[bits & 3], stream);
15555 return;
15557 /* As for 'T', but emit 'u' instead of 'p'. */
15558 case 't':
15560 HOST_WIDE_INT bits = INTVAL (x);
15561 fputc ("usuf"[bits & 3], stream);
15563 return;
15565 /* Bit 2: rounding (vs none). */
15566 case 'O':
15568 HOST_WIDE_INT bits = INTVAL (x);
15569 fputs ((bits & 4) != 0 ? "r" : "", stream);
15571 return;
15573 /* Memory operand for vld1/vst1 instruction. */
15574 case 'A':
15576 rtx addr;
15577 bool postinc = FALSE;
15578 gcc_assert (GET_CODE (x) == MEM);
15579 addr = XEXP (x, 0);
15580 if (GET_CODE (addr) == POST_INC)
15582 postinc = 1;
15583 addr = XEXP (addr, 0);
15585 asm_fprintf (stream, "[%r]", REGNO (addr));
15586 if (postinc)
15587 fputs("!", stream);
15589 return;
15591 /* Translate an S register number into a D register number and element index. */
15592 case 'y':
15594 int mode = GET_MODE (x);
15595 int regno;
15597 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15599 output_operand_lossage ("invalid operand for code '%c'", code);
15600 return;
15603 regno = REGNO (x);
15604 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15606 output_operand_lossage ("invalid operand for code '%c'", code);
15607 return;
15610 regno = regno - FIRST_VFP_REGNUM;
15611 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15613 return;
15615 /* Register specifier for vld1.16/vst1.16. Translate the S register
15616 number into a D register number and element index. */
15617 case 'z':
15619 int mode = GET_MODE (x);
15620 int regno;
15622 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15624 output_operand_lossage ("invalid operand for code '%c'", code);
15625 return;
15628 regno = REGNO (x);
15629 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15631 output_operand_lossage ("invalid operand for code '%c'", code);
15632 return;
15635 regno = regno - FIRST_VFP_REGNUM;
15636 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15638 return;
15640 default:
15641 if (x == 0)
15643 output_operand_lossage ("missing operand");
15644 return;
15647 switch (GET_CODE (x))
15649 case REG:
15650 asm_fprintf (stream, "%r", REGNO (x));
15651 break;
15653 case MEM:
15654 output_memory_reference_mode = GET_MODE (x);
15655 output_address (XEXP (x, 0));
15656 break;
15658 case CONST_DOUBLE:
15659 if (TARGET_NEON)
15661 char fpstr[20];
15662 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15663 sizeof (fpstr), 0, 1);
15664 fprintf (stream, "#%s", fpstr);
15666 else
15667 fprintf (stream, "#%s", fp_immediate_constant (x));
15668 break;
15670 default:
15671 gcc_assert (GET_CODE (x) != NEG);
15672 fputc ('#', stream);
15673 if (GET_CODE (x) == HIGH)
15675 fputs (":lower16:", stream);
15676 x = XEXP (x, 0);
15679 output_addr_const (stream, x);
15680 break;
15685 /* Target hook for assembling integer objects. The ARM version needs to
15686 handle word-sized values specially. */
15687 static bool
15688 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15690 enum machine_mode mode;
15692 if (size == UNITS_PER_WORD && aligned_p)
15694 fputs ("\t.word\t", asm_out_file);
15695 output_addr_const (asm_out_file, x);
15697 /* Mark symbols as position independent. We only do this in the
15698 .text segment, not in the .data segment. */
15699 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15700 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15702 /* See legitimize_pic_address for an explanation of the
15703 TARGET_VXWORKS_RTP check. */
15704 if (TARGET_VXWORKS_RTP
15705 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15706 fputs ("(GOT)", asm_out_file);
15707 else
15708 fputs ("(GOTOFF)", asm_out_file);
15710 fputc ('\n', asm_out_file);
15711 return true;
15714 mode = GET_MODE (x);
15716 if (arm_vector_mode_supported_p (mode))
15718 int i, units;
15720 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15722 units = CONST_VECTOR_NUNITS (x);
15723 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15725 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15726 for (i = 0; i < units; i++)
15728 rtx elt = CONST_VECTOR_ELT (x, i);
15729 assemble_integer
15730 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15732 else
15733 for (i = 0; i < units; i++)
15735 rtx elt = CONST_VECTOR_ELT (x, i);
15736 REAL_VALUE_TYPE rval;
15738 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15740 assemble_real
15741 (rval, GET_MODE_INNER (mode),
15742 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15745 return true;
15748 return default_assemble_integer (x, size, aligned_p);
15751 static void
15752 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15754 section *s;
15756 if (!TARGET_AAPCS_BASED)
15758 (is_ctor ?
15759 default_named_section_asm_out_constructor
15760 : default_named_section_asm_out_destructor) (symbol, priority);
15761 return;
15764 /* Put these in the .init_array section, using a special relocation. */
15765 if (priority != DEFAULT_INIT_PRIORITY)
15767 char buf[18];
15768 sprintf (buf, "%s.%.5u",
15769 is_ctor ? ".init_array" : ".fini_array",
15770 priority);
15771 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15773 else if (is_ctor)
15774 s = ctors_section;
15775 else
15776 s = dtors_section;
15778 switch_to_section (s);
15779 assemble_align (POINTER_SIZE);
15780 fputs ("\t.word\t", asm_out_file);
15781 output_addr_const (asm_out_file, symbol);
15782 fputs ("(target1)\n", asm_out_file);
15785 /* Add a function to the list of static constructors. */
15787 static void
15788 arm_elf_asm_constructor (rtx symbol, int priority)
15790 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15793 /* Add a function to the list of static destructors. */
15795 static void
15796 arm_elf_asm_destructor (rtx symbol, int priority)
15798 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15801 /* A finite state machine takes care of noticing whether or not instructions
15802 can be conditionally executed, and thus decrease execution time and code
15803 size by deleting branch instructions. The fsm is controlled by
15804 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15806 /* The state of the fsm controlling condition codes are:
15807 0: normal, do nothing special
15808 1: make ASM_OUTPUT_OPCODE not output this instruction
15809 2: make ASM_OUTPUT_OPCODE not output this instruction
15810 3: make instructions conditional
15811 4: make instructions conditional
15813 State transitions (state->state by whom under condition):
15814 0 -> 1 final_prescan_insn if the `target' is a label
15815 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15816 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15817 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15818 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15819 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15820 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15821 (the target insn is arm_target_insn).
15823 If the jump clobbers the conditions then we use states 2 and 4.
15825 A similar thing can be done with conditional return insns.
15827 XXX In case the `target' is an unconditional branch, this conditionalising
15828 of the instructions always reduces code size, but not always execution
15829 time. But then, I want to reduce the code size to somewhere near what
15830 /bin/cc produces. */
15832 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15833 instructions. When a COND_EXEC instruction is seen the subsequent
15834 instructions are scanned so that multiple conditional instructions can be
15835 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15836 specify the length and true/false mask for the IT block. These will be
15837 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15839 /* Returns the index of the ARM condition code string in
15840 `arm_condition_codes'. COMPARISON should be an rtx like
15841 `(eq (...) (...))'. */
15842 static enum arm_cond_code
15843 get_arm_condition_code (rtx comparison)
15845 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15846 enum arm_cond_code code;
15847 enum rtx_code comp_code = GET_CODE (comparison);
15849 if (GET_MODE_CLASS (mode) != MODE_CC)
15850 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15851 XEXP (comparison, 1));
15853 switch (mode)
15855 case CC_DNEmode: code = ARM_NE; goto dominance;
15856 case CC_DEQmode: code = ARM_EQ; goto dominance;
15857 case CC_DGEmode: code = ARM_GE; goto dominance;
15858 case CC_DGTmode: code = ARM_GT; goto dominance;
15859 case CC_DLEmode: code = ARM_LE; goto dominance;
15860 case CC_DLTmode: code = ARM_LT; goto dominance;
15861 case CC_DGEUmode: code = ARM_CS; goto dominance;
15862 case CC_DGTUmode: code = ARM_HI; goto dominance;
15863 case CC_DLEUmode: code = ARM_LS; goto dominance;
15864 case CC_DLTUmode: code = ARM_CC;
15866 dominance:
15867 gcc_assert (comp_code == EQ || comp_code == NE);
15869 if (comp_code == EQ)
15870 return ARM_INVERSE_CONDITION_CODE (code);
15871 return code;
15873 case CC_NOOVmode:
15874 switch (comp_code)
15876 case NE: return ARM_NE;
15877 case EQ: return ARM_EQ;
15878 case GE: return ARM_PL;
15879 case LT: return ARM_MI;
15880 default: gcc_unreachable ();
15883 case CC_Zmode:
15884 switch (comp_code)
15886 case NE: return ARM_NE;
15887 case EQ: return ARM_EQ;
15888 default: gcc_unreachable ();
15891 case CC_Nmode:
15892 switch (comp_code)
15894 case NE: return ARM_MI;
15895 case EQ: return ARM_PL;
15896 default: gcc_unreachable ();
15899 case CCFPEmode:
15900 case CCFPmode:
15901 /* These encodings assume that AC=1 in the FPA system control
15902 byte. This allows us to handle all cases except UNEQ and
15903 LTGT. */
15904 switch (comp_code)
15906 case GE: return ARM_GE;
15907 case GT: return ARM_GT;
15908 case LE: return ARM_LS;
15909 case LT: return ARM_MI;
15910 case NE: return ARM_NE;
15911 case EQ: return ARM_EQ;
15912 case ORDERED: return ARM_VC;
15913 case UNORDERED: return ARM_VS;
15914 case UNLT: return ARM_LT;
15915 case UNLE: return ARM_LE;
15916 case UNGT: return ARM_HI;
15917 case UNGE: return ARM_PL;
15918 /* UNEQ and LTGT do not have a representation. */
15919 case UNEQ: /* Fall through. */
15920 case LTGT: /* Fall through. */
15921 default: gcc_unreachable ();
15924 case CC_SWPmode:
15925 switch (comp_code)
15927 case NE: return ARM_NE;
15928 case EQ: return ARM_EQ;
15929 case GE: return ARM_LE;
15930 case GT: return ARM_LT;
15931 case LE: return ARM_GE;
15932 case LT: return ARM_GT;
15933 case GEU: return ARM_LS;
15934 case GTU: return ARM_CC;
15935 case LEU: return ARM_CS;
15936 case LTU: return ARM_HI;
15937 default: gcc_unreachable ();
15940 case CC_Cmode:
15941 switch (comp_code)
15943 case LTU: return ARM_CS;
15944 case GEU: return ARM_CC;
15945 default: gcc_unreachable ();
15948 case CCmode:
15949 switch (comp_code)
15951 case NE: return ARM_NE;
15952 case EQ: return ARM_EQ;
15953 case GE: return ARM_GE;
15954 case GT: return ARM_GT;
15955 case LE: return ARM_LE;
15956 case LT: return ARM_LT;
15957 case GEU: return ARM_CS;
15958 case GTU: return ARM_HI;
15959 case LEU: return ARM_LS;
15960 case LTU: return ARM_CC;
15961 default: gcc_unreachable ();
15964 default: gcc_unreachable ();
15968 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15969 instructions. */
15970 void
15971 thumb2_final_prescan_insn (rtx insn)
15973 rtx first_insn = insn;
15974 rtx body = PATTERN (insn);
15975 rtx predicate;
15976 enum arm_cond_code code;
15977 int n;
15978 int mask;
15980 /* Remove the previous insn from the count of insns to be output. */
15981 if (arm_condexec_count)
15982 arm_condexec_count--;
15984 /* Nothing to do if we are already inside a conditional block. */
15985 if (arm_condexec_count)
15986 return;
15988 if (GET_CODE (body) != COND_EXEC)
15989 return;
15991 /* Conditional jumps are implemented directly. */
15992 if (GET_CODE (insn) == JUMP_INSN)
15993 return;
15995 predicate = COND_EXEC_TEST (body);
15996 arm_current_cc = get_arm_condition_code (predicate);
15998 n = get_attr_ce_count (insn);
15999 arm_condexec_count = 1;
16000 arm_condexec_mask = (1 << n) - 1;
16001 arm_condexec_masklen = n;
16002 /* See if subsequent instructions can be combined into the same block. */
16003 for (;;)
16005 insn = next_nonnote_insn (insn);
16007 /* Jumping into the middle of an IT block is illegal, so a label or
16008 barrier terminates the block. */
16009 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16010 break;
16012 body = PATTERN (insn);
16013 /* USE and CLOBBER aren't really insns, so just skip them. */
16014 if (GET_CODE (body) == USE
16015 || GET_CODE (body) == CLOBBER)
16016 continue;
16018 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16019 if (GET_CODE (body) != COND_EXEC)
16020 break;
16021 /* Allow up to 4 conditionally executed instructions in a block. */
16022 n = get_attr_ce_count (insn);
16023 if (arm_condexec_masklen + n > 4)
16024 break;
16026 predicate = COND_EXEC_TEST (body);
16027 code = get_arm_condition_code (predicate);
16028 mask = (1 << n) - 1;
16029 if (arm_current_cc == code)
16030 arm_condexec_mask |= (mask << arm_condexec_masklen);
16031 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16032 break;
16034 arm_condexec_count++;
16035 arm_condexec_masklen += n;
16037 /* A jump must be the last instruction in a conditional block. */
16038 if (GET_CODE(insn) == JUMP_INSN)
16039 break;
16041 /* Restore recog_data (getting the attributes of other insns can
16042 destroy this array, but final.c assumes that it remains intact
16043 across this call). */
16044 extract_constrain_insn_cached (first_insn);
16047 void
16048 arm_final_prescan_insn (rtx insn)
16050 /* BODY will hold the body of INSN. */
16051 rtx body = PATTERN (insn);
16053 /* This will be 1 if trying to repeat the trick, and things need to be
16054 reversed if it appears to fail. */
16055 int reverse = 0;
16057 /* If we start with a return insn, we only succeed if we find another one. */
16058 int seeking_return = 0;
16060 /* START_INSN will hold the insn from where we start looking. This is the
16061 first insn after the following code_label if REVERSE is true. */
16062 rtx start_insn = insn;
16064 /* If in state 4, check if the target branch is reached, in order to
16065 change back to state 0. */
16066 if (arm_ccfsm_state == 4)
16068 if (insn == arm_target_insn)
16070 arm_target_insn = NULL;
16071 arm_ccfsm_state = 0;
16073 return;
16076 /* If in state 3, it is possible to repeat the trick, if this insn is an
16077 unconditional branch to a label, and immediately following this branch
16078 is the previous target label which is only used once, and the label this
16079 branch jumps to is not too far off. */
16080 if (arm_ccfsm_state == 3)
16082 if (simplejump_p (insn))
16084 start_insn = next_nonnote_insn (start_insn);
16085 if (GET_CODE (start_insn) == BARRIER)
16087 /* XXX Isn't this always a barrier? */
16088 start_insn = next_nonnote_insn (start_insn);
16090 if (GET_CODE (start_insn) == CODE_LABEL
16091 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16092 && LABEL_NUSES (start_insn) == 1)
16093 reverse = TRUE;
16094 else
16095 return;
16097 else if (GET_CODE (body) == RETURN)
16099 start_insn = next_nonnote_insn (start_insn);
16100 if (GET_CODE (start_insn) == BARRIER)
16101 start_insn = next_nonnote_insn (start_insn);
16102 if (GET_CODE (start_insn) == CODE_LABEL
16103 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16104 && LABEL_NUSES (start_insn) == 1)
16106 reverse = TRUE;
16107 seeking_return = 1;
16109 else
16110 return;
16112 else
16113 return;
16116 gcc_assert (!arm_ccfsm_state || reverse);
16117 if (GET_CODE (insn) != JUMP_INSN)
16118 return;
16120 /* This jump might be paralleled with a clobber of the condition codes
16121 the jump should always come first */
16122 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16123 body = XVECEXP (body, 0, 0);
16125 if (reverse
16126 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16127 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16129 int insns_skipped;
16130 int fail = FALSE, succeed = FALSE;
16131 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16132 int then_not_else = TRUE;
16133 rtx this_insn = start_insn, label = 0;
16135 /* Register the insn jumped to. */
16136 if (reverse)
16138 if (!seeking_return)
16139 label = XEXP (SET_SRC (body), 0);
16141 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16142 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16143 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16145 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16146 then_not_else = FALSE;
16148 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16149 seeking_return = 1;
16150 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16152 seeking_return = 1;
16153 then_not_else = FALSE;
16155 else
16156 gcc_unreachable ();
16158 /* See how many insns this branch skips, and what kind of insns. If all
16159 insns are okay, and the label or unconditional branch to the same
16160 label is not too far away, succeed. */
16161 for (insns_skipped = 0;
16162 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16164 rtx scanbody;
16166 this_insn = next_nonnote_insn (this_insn);
16167 if (!this_insn)
16168 break;
16170 switch (GET_CODE (this_insn))
16172 case CODE_LABEL:
16173 /* Succeed if it is the target label, otherwise fail since
16174 control falls in from somewhere else. */
16175 if (this_insn == label)
16177 arm_ccfsm_state = 1;
16178 succeed = TRUE;
16180 else
16181 fail = TRUE;
16182 break;
16184 case BARRIER:
16185 /* Succeed if the following insn is the target label.
16186 Otherwise fail.
16187 If return insns are used then the last insn in a function
16188 will be a barrier. */
16189 this_insn = next_nonnote_insn (this_insn);
16190 if (this_insn && this_insn == label)
16192 arm_ccfsm_state = 1;
16193 succeed = TRUE;
16195 else
16196 fail = TRUE;
16197 break;
16199 case CALL_INSN:
16200 /* The AAPCS says that conditional calls should not be
16201 used since they make interworking inefficient (the
16202 linker can't transform BL<cond> into BLX). That's
16203 only a problem if the machine has BLX. */
16204 if (arm_arch5)
16206 fail = TRUE;
16207 break;
16210 /* Succeed if the following insn is the target label, or
16211 if the following two insns are a barrier and the
16212 target label. */
16213 this_insn = next_nonnote_insn (this_insn);
16214 if (this_insn && GET_CODE (this_insn) == BARRIER)
16215 this_insn = next_nonnote_insn (this_insn);
16217 if (this_insn && this_insn == label
16218 && insns_skipped < max_insns_skipped)
16220 arm_ccfsm_state = 1;
16221 succeed = TRUE;
16223 else
16224 fail = TRUE;
16225 break;
16227 case JUMP_INSN:
16228 /* If this is an unconditional branch to the same label, succeed.
16229 If it is to another label, do nothing. If it is conditional,
16230 fail. */
16231 /* XXX Probably, the tests for SET and the PC are
16232 unnecessary. */
16234 scanbody = PATTERN (this_insn);
16235 if (GET_CODE (scanbody) == SET
16236 && GET_CODE (SET_DEST (scanbody)) == PC)
16238 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16239 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16241 arm_ccfsm_state = 2;
16242 succeed = TRUE;
16244 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16245 fail = TRUE;
16247 /* Fail if a conditional return is undesirable (e.g. on a
16248 StrongARM), but still allow this if optimizing for size. */
16249 else if (GET_CODE (scanbody) == RETURN
16250 && !use_return_insn (TRUE, NULL)
16251 && !optimize_size)
16252 fail = TRUE;
16253 else if (GET_CODE (scanbody) == RETURN
16254 && seeking_return)
16256 arm_ccfsm_state = 2;
16257 succeed = TRUE;
16259 else if (GET_CODE (scanbody) == PARALLEL)
16261 switch (get_attr_conds (this_insn))
16263 case CONDS_NOCOND:
16264 break;
16265 default:
16266 fail = TRUE;
16267 break;
16270 else
16271 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16273 break;
16275 case INSN:
16276 /* Instructions using or affecting the condition codes make it
16277 fail. */
16278 scanbody = PATTERN (this_insn);
16279 if (!(GET_CODE (scanbody) == SET
16280 || GET_CODE (scanbody) == PARALLEL)
16281 || get_attr_conds (this_insn) != CONDS_NOCOND)
16282 fail = TRUE;
16284 /* A conditional cirrus instruction must be followed by
16285 a non Cirrus instruction. However, since we
16286 conditionalize instructions in this function and by
16287 the time we get here we can't add instructions
16288 (nops), because shorten_branches() has already been
16289 called, we will disable conditionalizing Cirrus
16290 instructions to be safe. */
16291 if (GET_CODE (scanbody) != USE
16292 && GET_CODE (scanbody) != CLOBBER
16293 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16294 fail = TRUE;
16295 break;
16297 default:
16298 break;
16301 if (succeed)
16303 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16304 arm_target_label = CODE_LABEL_NUMBER (label);
16305 else
16307 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16309 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16311 this_insn = next_nonnote_insn (this_insn);
16312 gcc_assert (!this_insn
16313 || (GET_CODE (this_insn) != BARRIER
16314 && GET_CODE (this_insn) != CODE_LABEL));
16316 if (!this_insn)
16318 /* Oh, dear! we ran off the end.. give up. */
16319 extract_constrain_insn_cached (insn);
16320 arm_ccfsm_state = 0;
16321 arm_target_insn = NULL;
16322 return;
16324 arm_target_insn = this_insn;
16327 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16328 what it was. */
16329 if (!reverse)
16330 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16332 if (reverse || then_not_else)
16333 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16336 /* Restore recog_data (getting the attributes of other insns can
16337 destroy this array, but final.c assumes that it remains intact
16338 across this call. */
16339 extract_constrain_insn_cached (insn);
16343 /* Output IT instructions. */
16344 void
16345 thumb2_asm_output_opcode (FILE * stream)
16347 char buff[5];
16348 int n;
16350 if (arm_condexec_mask)
16352 for (n = 0; n < arm_condexec_masklen; n++)
16353 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16354 buff[n] = 0;
16355 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16356 arm_condition_codes[arm_current_cc]);
16357 arm_condexec_mask = 0;
16361 /* Returns true if REGNO is a valid register
16362 for holding a quantity of type MODE. */
16364 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16366 if (GET_MODE_CLASS (mode) == MODE_CC)
16367 return (regno == CC_REGNUM
16368 || (TARGET_HARD_FLOAT && TARGET_VFP
16369 && regno == VFPCC_REGNUM));
16371 if (TARGET_THUMB1)
16372 /* For the Thumb we only allow values bigger than SImode in
16373 registers 0 - 6, so that there is always a second low
16374 register available to hold the upper part of the value.
16375 We probably we ought to ensure that the register is the
16376 start of an even numbered register pair. */
16377 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16379 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16380 && IS_CIRRUS_REGNUM (regno))
16381 /* We have outlawed SI values in Cirrus registers because they
16382 reside in the lower 32 bits, but SF values reside in the
16383 upper 32 bits. This causes gcc all sorts of grief. We can't
16384 even split the registers into pairs because Cirrus SI values
16385 get sign extended to 64bits-- aldyh. */
16386 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16388 if (TARGET_HARD_FLOAT && TARGET_VFP
16389 && IS_VFP_REGNUM (regno))
16391 if (mode == SFmode || mode == SImode)
16392 return VFP_REGNO_OK_FOR_SINGLE (regno);
16394 if (mode == DFmode)
16395 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16397 /* VFP registers can hold HFmode values, but there is no point in
16398 putting them there unless we have hardware conversion insns. */
16399 if (mode == HFmode)
16400 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16402 if (TARGET_NEON)
16403 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16404 || (VALID_NEON_QREG_MODE (mode)
16405 && NEON_REGNO_OK_FOR_QUAD (regno))
16406 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16407 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16408 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16409 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16410 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16412 return FALSE;
16415 if (TARGET_REALLY_IWMMXT)
16417 if (IS_IWMMXT_GR_REGNUM (regno))
16418 return mode == SImode;
16420 if (IS_IWMMXT_REGNUM (regno))
16421 return VALID_IWMMXT_REG_MODE (mode);
16424 /* We allow almost any value to be stored in the general registers.
16425 Restrict doubleword quantities to even register pairs so that we can
16426 use ldrd. Do not allow very large Neon structure opaque modes in
16427 general registers; they would use too many. */
16428 if (regno <= LAST_ARM_REGNUM)
16429 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16430 && ARM_NUM_REGS (mode) <= 4;
16432 if (regno == FRAME_POINTER_REGNUM
16433 || regno == ARG_POINTER_REGNUM)
16434 /* We only allow integers in the fake hard registers. */
16435 return GET_MODE_CLASS (mode) == MODE_INT;
16437 /* The only registers left are the FPA registers
16438 which we only allow to hold FP values. */
16439 return (TARGET_HARD_FLOAT && TARGET_FPA
16440 && GET_MODE_CLASS (mode) == MODE_FLOAT
16441 && regno >= FIRST_FPA_REGNUM
16442 && regno <= LAST_FPA_REGNUM);
16445 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16446 not used in arm mode. */
16448 enum reg_class
16449 arm_regno_class (int regno)
16451 if (TARGET_THUMB1)
16453 if (regno == STACK_POINTER_REGNUM)
16454 return STACK_REG;
16455 if (regno == CC_REGNUM)
16456 return CC_REG;
16457 if (regno < 8)
16458 return LO_REGS;
16459 return HI_REGS;
16462 if (TARGET_THUMB2 && regno < 8)
16463 return LO_REGS;
16465 if ( regno <= LAST_ARM_REGNUM
16466 || regno == FRAME_POINTER_REGNUM
16467 || regno == ARG_POINTER_REGNUM)
16468 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16470 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16471 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16473 if (IS_CIRRUS_REGNUM (regno))
16474 return CIRRUS_REGS;
16476 if (IS_VFP_REGNUM (regno))
16478 if (regno <= D7_VFP_REGNUM)
16479 return VFP_D0_D7_REGS;
16480 else if (regno <= LAST_LO_VFP_REGNUM)
16481 return VFP_LO_REGS;
16482 else
16483 return VFP_HI_REGS;
16486 if (IS_IWMMXT_REGNUM (regno))
16487 return IWMMXT_REGS;
16489 if (IS_IWMMXT_GR_REGNUM (regno))
16490 return IWMMXT_GR_REGS;
16492 return FPA_REGS;
16495 /* Handle a special case when computing the offset
16496 of an argument from the frame pointer. */
16498 arm_debugger_arg_offset (int value, rtx addr)
16500 rtx insn;
16502 /* We are only interested if dbxout_parms() failed to compute the offset. */
16503 if (value != 0)
16504 return 0;
16506 /* We can only cope with the case where the address is held in a register. */
16507 if (GET_CODE (addr) != REG)
16508 return 0;
16510 /* If we are using the frame pointer to point at the argument, then
16511 an offset of 0 is correct. */
16512 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16513 return 0;
16515 /* If we are using the stack pointer to point at the
16516 argument, then an offset of 0 is correct. */
16517 /* ??? Check this is consistent with thumb2 frame layout. */
16518 if ((TARGET_THUMB || !frame_pointer_needed)
16519 && REGNO (addr) == SP_REGNUM)
16520 return 0;
16522 /* Oh dear. The argument is pointed to by a register rather
16523 than being held in a register, or being stored at a known
16524 offset from the frame pointer. Since GDB only understands
16525 those two kinds of argument we must translate the address
16526 held in the register into an offset from the frame pointer.
16527 We do this by searching through the insns for the function
16528 looking to see where this register gets its value. If the
16529 register is initialized from the frame pointer plus an offset
16530 then we are in luck and we can continue, otherwise we give up.
16532 This code is exercised by producing debugging information
16533 for a function with arguments like this:
16535 double func (double a, double b, int c, double d) {return d;}
16537 Without this code the stab for parameter 'd' will be set to
16538 an offset of 0 from the frame pointer, rather than 8. */
16540 /* The if() statement says:
16542 If the insn is a normal instruction
16543 and if the insn is setting the value in a register
16544 and if the register being set is the register holding the address of the argument
16545 and if the address is computing by an addition
16546 that involves adding to a register
16547 which is the frame pointer
16548 a constant integer
16550 then... */
16552 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16554 if ( GET_CODE (insn) == INSN
16555 && GET_CODE (PATTERN (insn)) == SET
16556 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16557 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16558 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16559 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16560 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16563 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16565 break;
16569 if (value == 0)
16571 debug_rtx (addr);
16572 warning (0, "unable to compute real location of stacked parameter");
16573 value = 8; /* XXX magic hack */
16576 return value;
16579 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16580 do \
16582 if ((MASK) & insn_flags) \
16583 add_builtin_function ((NAME), (TYPE), (CODE), \
16584 BUILT_IN_MD, NULL, NULL_TREE); \
16586 while (0)
16588 struct builtin_description
16590 const unsigned int mask;
16591 const enum insn_code icode;
16592 const char * const name;
16593 const enum arm_builtins code;
16594 const enum rtx_code comparison;
16595 const unsigned int flag;
16598 static const struct builtin_description bdesc_2arg[] =
16600 #define IWMMXT_BUILTIN(code, string, builtin) \
16601 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16602 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16604 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16605 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16606 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16607 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16608 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16609 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16610 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16611 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16612 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16613 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16614 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16615 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16616 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16617 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16618 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16619 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16620 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16621 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16622 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16623 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16624 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16625 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16626 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16627 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16628 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16629 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16630 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16631 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16632 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16633 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16634 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16635 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16636 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16637 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16638 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16639 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16640 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16641 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16642 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16643 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16644 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16645 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16646 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16647 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16648 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16649 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16650 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16651 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16652 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16653 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16654 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16655 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16656 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16657 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16658 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16659 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16660 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16661 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16663 #define IWMMXT_BUILTIN2(code, builtin) \
16664 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16666 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16667 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16668 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16669 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16670 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16671 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16672 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16673 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16674 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16675 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16676 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16677 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16678 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16679 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16680 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16681 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16682 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16683 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16684 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16685 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16686 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16687 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16688 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16689 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16690 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16691 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16692 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16693 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16694 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16695 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16696 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16697 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16700 static const struct builtin_description bdesc_1arg[] =
16702 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16703 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16704 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16705 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16706 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16707 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16708 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16709 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16710 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16711 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16712 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16713 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16714 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16715 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16716 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16717 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16718 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16719 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16722 /* Set up all the iWMMXt builtins. This is
16723 not called if TARGET_IWMMXT is zero. */
16725 static void
16726 arm_init_iwmmxt_builtins (void)
16728 const struct builtin_description * d;
16729 size_t i;
16730 tree endlink = void_list_node;
16732 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16733 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16734 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16736 tree int_ftype_int
16737 = build_function_type (integer_type_node,
16738 tree_cons (NULL_TREE, integer_type_node, endlink));
16739 tree v8qi_ftype_v8qi_v8qi_int
16740 = build_function_type (V8QI_type_node,
16741 tree_cons (NULL_TREE, V8QI_type_node,
16742 tree_cons (NULL_TREE, V8QI_type_node,
16743 tree_cons (NULL_TREE,
16744 integer_type_node,
16745 endlink))));
16746 tree v4hi_ftype_v4hi_int
16747 = build_function_type (V4HI_type_node,
16748 tree_cons (NULL_TREE, V4HI_type_node,
16749 tree_cons (NULL_TREE, integer_type_node,
16750 endlink)));
16751 tree v2si_ftype_v2si_int
16752 = build_function_type (V2SI_type_node,
16753 tree_cons (NULL_TREE, V2SI_type_node,
16754 tree_cons (NULL_TREE, integer_type_node,
16755 endlink)));
16756 tree v2si_ftype_di_di
16757 = build_function_type (V2SI_type_node,
16758 tree_cons (NULL_TREE, long_long_integer_type_node,
16759 tree_cons (NULL_TREE, long_long_integer_type_node,
16760 endlink)));
16761 tree di_ftype_di_int
16762 = build_function_type (long_long_integer_type_node,
16763 tree_cons (NULL_TREE, long_long_integer_type_node,
16764 tree_cons (NULL_TREE, integer_type_node,
16765 endlink)));
16766 tree di_ftype_di_int_int
16767 = build_function_type (long_long_integer_type_node,
16768 tree_cons (NULL_TREE, long_long_integer_type_node,
16769 tree_cons (NULL_TREE, integer_type_node,
16770 tree_cons (NULL_TREE,
16771 integer_type_node,
16772 endlink))));
16773 tree int_ftype_v8qi
16774 = build_function_type (integer_type_node,
16775 tree_cons (NULL_TREE, V8QI_type_node,
16776 endlink));
16777 tree int_ftype_v4hi
16778 = build_function_type (integer_type_node,
16779 tree_cons (NULL_TREE, V4HI_type_node,
16780 endlink));
16781 tree int_ftype_v2si
16782 = build_function_type (integer_type_node,
16783 tree_cons (NULL_TREE, V2SI_type_node,
16784 endlink));
16785 tree int_ftype_v8qi_int
16786 = build_function_type (integer_type_node,
16787 tree_cons (NULL_TREE, V8QI_type_node,
16788 tree_cons (NULL_TREE, integer_type_node,
16789 endlink)));
16790 tree int_ftype_v4hi_int
16791 = build_function_type (integer_type_node,
16792 tree_cons (NULL_TREE, V4HI_type_node,
16793 tree_cons (NULL_TREE, integer_type_node,
16794 endlink)));
16795 tree int_ftype_v2si_int
16796 = build_function_type (integer_type_node,
16797 tree_cons (NULL_TREE, V2SI_type_node,
16798 tree_cons (NULL_TREE, integer_type_node,
16799 endlink)));
16800 tree v8qi_ftype_v8qi_int_int
16801 = build_function_type (V8QI_type_node,
16802 tree_cons (NULL_TREE, V8QI_type_node,
16803 tree_cons (NULL_TREE, integer_type_node,
16804 tree_cons (NULL_TREE,
16805 integer_type_node,
16806 endlink))));
16807 tree v4hi_ftype_v4hi_int_int
16808 = build_function_type (V4HI_type_node,
16809 tree_cons (NULL_TREE, V4HI_type_node,
16810 tree_cons (NULL_TREE, integer_type_node,
16811 tree_cons (NULL_TREE,
16812 integer_type_node,
16813 endlink))));
16814 tree v2si_ftype_v2si_int_int
16815 = build_function_type (V2SI_type_node,
16816 tree_cons (NULL_TREE, V2SI_type_node,
16817 tree_cons (NULL_TREE, integer_type_node,
16818 tree_cons (NULL_TREE,
16819 integer_type_node,
16820 endlink))));
16821 /* Miscellaneous. */
16822 tree v8qi_ftype_v4hi_v4hi
16823 = build_function_type (V8QI_type_node,
16824 tree_cons (NULL_TREE, V4HI_type_node,
16825 tree_cons (NULL_TREE, V4HI_type_node,
16826 endlink)));
16827 tree v4hi_ftype_v2si_v2si
16828 = build_function_type (V4HI_type_node,
16829 tree_cons (NULL_TREE, V2SI_type_node,
16830 tree_cons (NULL_TREE, V2SI_type_node,
16831 endlink)));
16832 tree v2si_ftype_v4hi_v4hi
16833 = build_function_type (V2SI_type_node,
16834 tree_cons (NULL_TREE, V4HI_type_node,
16835 tree_cons (NULL_TREE, V4HI_type_node,
16836 endlink)));
16837 tree v2si_ftype_v8qi_v8qi
16838 = build_function_type (V2SI_type_node,
16839 tree_cons (NULL_TREE, V8QI_type_node,
16840 tree_cons (NULL_TREE, V8QI_type_node,
16841 endlink)));
16842 tree v4hi_ftype_v4hi_di
16843 = build_function_type (V4HI_type_node,
16844 tree_cons (NULL_TREE, V4HI_type_node,
16845 tree_cons (NULL_TREE,
16846 long_long_integer_type_node,
16847 endlink)));
16848 tree v2si_ftype_v2si_di
16849 = build_function_type (V2SI_type_node,
16850 tree_cons (NULL_TREE, V2SI_type_node,
16851 tree_cons (NULL_TREE,
16852 long_long_integer_type_node,
16853 endlink)));
16854 tree void_ftype_int_int
16855 = build_function_type (void_type_node,
16856 tree_cons (NULL_TREE, integer_type_node,
16857 tree_cons (NULL_TREE, integer_type_node,
16858 endlink)));
16859 tree di_ftype_void
16860 = build_function_type (long_long_unsigned_type_node, endlink);
16861 tree di_ftype_v8qi
16862 = build_function_type (long_long_integer_type_node,
16863 tree_cons (NULL_TREE, V8QI_type_node,
16864 endlink));
16865 tree di_ftype_v4hi
16866 = build_function_type (long_long_integer_type_node,
16867 tree_cons (NULL_TREE, V4HI_type_node,
16868 endlink));
16869 tree di_ftype_v2si
16870 = build_function_type (long_long_integer_type_node,
16871 tree_cons (NULL_TREE, V2SI_type_node,
16872 endlink));
16873 tree v2si_ftype_v4hi
16874 = build_function_type (V2SI_type_node,
16875 tree_cons (NULL_TREE, V4HI_type_node,
16876 endlink));
16877 tree v4hi_ftype_v8qi
16878 = build_function_type (V4HI_type_node,
16879 tree_cons (NULL_TREE, V8QI_type_node,
16880 endlink));
16882 tree di_ftype_di_v4hi_v4hi
16883 = build_function_type (long_long_unsigned_type_node,
16884 tree_cons (NULL_TREE,
16885 long_long_unsigned_type_node,
16886 tree_cons (NULL_TREE, V4HI_type_node,
16887 tree_cons (NULL_TREE,
16888 V4HI_type_node,
16889 endlink))));
16891 tree di_ftype_v4hi_v4hi
16892 = build_function_type (long_long_unsigned_type_node,
16893 tree_cons (NULL_TREE, V4HI_type_node,
16894 tree_cons (NULL_TREE, V4HI_type_node,
16895 endlink)));
16897 /* Normal vector binops. */
16898 tree v8qi_ftype_v8qi_v8qi
16899 = build_function_type (V8QI_type_node,
16900 tree_cons (NULL_TREE, V8QI_type_node,
16901 tree_cons (NULL_TREE, V8QI_type_node,
16902 endlink)));
16903 tree v4hi_ftype_v4hi_v4hi
16904 = build_function_type (V4HI_type_node,
16905 tree_cons (NULL_TREE, V4HI_type_node,
16906 tree_cons (NULL_TREE, V4HI_type_node,
16907 endlink)));
16908 tree v2si_ftype_v2si_v2si
16909 = build_function_type (V2SI_type_node,
16910 tree_cons (NULL_TREE, V2SI_type_node,
16911 tree_cons (NULL_TREE, V2SI_type_node,
16912 endlink)));
16913 tree di_ftype_di_di
16914 = build_function_type (long_long_unsigned_type_node,
16915 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16916 tree_cons (NULL_TREE,
16917 long_long_unsigned_type_node,
16918 endlink)));
16920 /* Add all builtins that are more or less simple operations on two
16921 operands. */
16922 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16924 /* Use one of the operands; the target can have a different mode for
16925 mask-generating compares. */
16926 enum machine_mode mode;
16927 tree type;
16929 if (d->name == 0)
16930 continue;
16932 mode = insn_data[d->icode].operand[1].mode;
16934 switch (mode)
16936 case V8QImode:
16937 type = v8qi_ftype_v8qi_v8qi;
16938 break;
16939 case V4HImode:
16940 type = v4hi_ftype_v4hi_v4hi;
16941 break;
16942 case V2SImode:
16943 type = v2si_ftype_v2si_v2si;
16944 break;
16945 case DImode:
16946 type = di_ftype_di_di;
16947 break;
16949 default:
16950 gcc_unreachable ();
16953 def_mbuiltin (d->mask, d->name, type, d->code);
16956 /* Add the remaining MMX insns with somewhat more complicated types. */
16957 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16958 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16959 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16961 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16962 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16963 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16964 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16965 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16966 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16968 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16969 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16970 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16971 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16972 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16973 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16975 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16976 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16977 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16978 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16979 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16980 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16982 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16983 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16984 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16985 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16986 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16987 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16989 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16991 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16992 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16993 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16994 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16996 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16997 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16998 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16999 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17000 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17001 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17002 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17004 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17006 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17008 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17010 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17011 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17012 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17014 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17015 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17017 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17018 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17019 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17021 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17022 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17023 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17024 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17025 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17026 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17027 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17028 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17029 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17030 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17031 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17032 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17034 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17035 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17036 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17039 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17040 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17048 static void
17049 arm_init_tls_builtins (void)
17051 tree ftype, decl;
17053 ftype = build_function_type (ptr_type_node, void_list_node);
17054 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17055 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17056 NULL, NULL_TREE);
17057 TREE_NOTHROW (decl) = 1;
17058 TREE_READONLY (decl) = 1;
17061 enum neon_builtin_type_bits {
17062 T_V8QI = 0x0001,
17063 T_V4HI = 0x0002,
17064 T_V2SI = 0x0004,
17065 T_V2SF = 0x0008,
17066 T_DI = 0x0010,
17067 T_V16QI = 0x0020,
17068 T_V8HI = 0x0040,
17069 T_V4SI = 0x0080,
17070 T_V4SF = 0x0100,
17071 T_V2DI = 0x0200,
17072 T_TI = 0x0400,
17073 T_EI = 0x0800,
17074 T_OI = 0x1000
17077 #define v8qi_UP T_V8QI
17078 #define v4hi_UP T_V4HI
17079 #define v2si_UP T_V2SI
17080 #define v2sf_UP T_V2SF
17081 #define di_UP T_DI
17082 #define v16qi_UP T_V16QI
17083 #define v8hi_UP T_V8HI
17084 #define v4si_UP T_V4SI
17085 #define v4sf_UP T_V4SF
17086 #define v2di_UP T_V2DI
17087 #define ti_UP T_TI
17088 #define ei_UP T_EI
17089 #define oi_UP T_OI
17091 #define UP(X) X##_UP
17093 #define T_MAX 13
17095 typedef enum {
17096 NEON_BINOP,
17097 NEON_TERNOP,
17098 NEON_UNOP,
17099 NEON_GETLANE,
17100 NEON_SETLANE,
17101 NEON_CREATE,
17102 NEON_DUP,
17103 NEON_DUPLANE,
17104 NEON_COMBINE,
17105 NEON_SPLIT,
17106 NEON_LANEMUL,
17107 NEON_LANEMULL,
17108 NEON_LANEMULH,
17109 NEON_LANEMAC,
17110 NEON_SCALARMUL,
17111 NEON_SCALARMULL,
17112 NEON_SCALARMULH,
17113 NEON_SCALARMAC,
17114 NEON_CONVERT,
17115 NEON_FIXCONV,
17116 NEON_SELECT,
17117 NEON_RESULTPAIR,
17118 NEON_REINTERP,
17119 NEON_VTBL,
17120 NEON_VTBX,
17121 NEON_LOAD1,
17122 NEON_LOAD1LANE,
17123 NEON_STORE1,
17124 NEON_STORE1LANE,
17125 NEON_LOADSTRUCT,
17126 NEON_LOADSTRUCTLANE,
17127 NEON_STORESTRUCT,
17128 NEON_STORESTRUCTLANE,
17129 NEON_LOGICBINOP,
17130 NEON_SHIFTINSERT,
17131 NEON_SHIFTIMM,
17132 NEON_SHIFTACC
17133 } neon_itype;
17135 typedef struct {
17136 const char *name;
17137 const neon_itype itype;
17138 const int bits;
17139 const enum insn_code codes[T_MAX];
17140 const unsigned int num_vars;
17141 unsigned int base_fcode;
17142 } neon_builtin_datum;
17144 #define CF(N,X) CODE_FOR_neon_##N##X
17146 #define VAR1(T, N, A) \
17147 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17148 #define VAR2(T, N, A, B) \
17149 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17150 #define VAR3(T, N, A, B, C) \
17151 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17152 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17153 #define VAR4(T, N, A, B, C, D) \
17154 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17155 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17156 #define VAR5(T, N, A, B, C, D, E) \
17157 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17158 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17159 #define VAR6(T, N, A, B, C, D, E, F) \
17160 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17161 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17162 #define VAR7(T, N, A, B, C, D, E, F, G) \
17163 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17164 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17165 CF (N, G) }, 7, 0
17166 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17167 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17168 | UP (H), \
17169 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17170 CF (N, G), CF (N, H) }, 8, 0
17171 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17172 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17173 | UP (H) | UP (I), \
17174 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17175 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17176 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17177 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17178 | UP (H) | UP (I) | UP (J), \
17179 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17180 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17182 /* The mode entries in the following table correspond to the "key" type of the
17183 instruction variant, i.e. equivalent to that which would be specified after
17184 the assembler mnemonic, which usually refers to the last vector operand.
17185 (Signed/unsigned/polynomial types are not differentiated between though, and
17186 are all mapped onto the same mode for a given element size.) The modes
17187 listed per instruction should be the same as those defined for that
17188 instruction's pattern in neon.md.
17189 WARNING: Variants should be listed in the same increasing order as
17190 neon_builtin_type_bits. */
17192 static neon_builtin_datum neon_builtin_data[] =
17194 { VAR10 (BINOP, vadd,
17195 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17196 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17197 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17198 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17199 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17200 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17201 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17202 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17203 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17204 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17205 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17206 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17207 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17208 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17209 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17210 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17211 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17212 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17213 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17214 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17215 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17216 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17217 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17218 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17219 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17220 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17221 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17222 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17223 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17224 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17225 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17226 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17227 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17228 { VAR10 (BINOP, vsub,
17229 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17230 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17231 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17232 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17233 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17234 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17235 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17236 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17237 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17238 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17239 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17240 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17241 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17242 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17243 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17244 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17245 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17246 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17247 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17248 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17249 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17250 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17251 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17252 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17253 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17254 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17255 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17256 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17257 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17258 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17259 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17260 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17261 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17262 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17263 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17264 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17265 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17266 /* FIXME: vget_lane supports more variants than this! */
17267 { VAR10 (GETLANE, vget_lane,
17268 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17269 { VAR10 (SETLANE, vset_lane,
17270 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17271 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17272 { VAR10 (DUP, vdup_n,
17273 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17274 { VAR10 (DUPLANE, vdup_lane,
17275 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17276 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17277 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17278 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17279 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17280 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17281 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17282 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17283 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17284 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17285 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17286 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17287 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17288 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17289 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17290 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17291 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17292 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17293 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17294 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17295 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17296 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17297 { VAR10 (BINOP, vext,
17298 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17299 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17300 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17301 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17302 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17303 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17304 { VAR10 (SELECT, vbsl,
17305 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17306 { VAR1 (VTBL, vtbl1, v8qi) },
17307 { VAR1 (VTBL, vtbl2, v8qi) },
17308 { VAR1 (VTBL, vtbl3, v8qi) },
17309 { VAR1 (VTBL, vtbl4, v8qi) },
17310 { VAR1 (VTBX, vtbx1, v8qi) },
17311 { VAR1 (VTBX, vtbx2, v8qi) },
17312 { VAR1 (VTBX, vtbx3, v8qi) },
17313 { VAR1 (VTBX, vtbx4, v8qi) },
17314 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17315 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17316 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17317 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17318 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17319 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17320 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17321 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17322 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17323 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17324 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17325 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17326 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17327 { VAR10 (LOAD1, vld1,
17328 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17329 { VAR10 (LOAD1LANE, vld1_lane,
17330 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17331 { VAR10 (LOAD1, vld1_dup,
17332 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17333 { VAR10 (STORE1, vst1,
17334 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17335 { VAR10 (STORE1LANE, vst1_lane,
17336 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17337 { VAR9 (LOADSTRUCT,
17338 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17339 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17340 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17341 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17342 { VAR9 (STORESTRUCT, vst2,
17343 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17344 { VAR7 (STORESTRUCTLANE, vst2_lane,
17345 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17346 { VAR9 (LOADSTRUCT,
17347 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17348 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17349 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17350 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17351 { VAR9 (STORESTRUCT, vst3,
17352 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17353 { VAR7 (STORESTRUCTLANE, vst3_lane,
17354 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17355 { VAR9 (LOADSTRUCT, vld4,
17356 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17357 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17358 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17359 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17360 { VAR9 (STORESTRUCT, vst4,
17361 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17362 { VAR7 (STORESTRUCTLANE, vst4_lane,
17363 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17364 { VAR10 (LOGICBINOP, vand,
17365 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17366 { VAR10 (LOGICBINOP, vorr,
17367 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17368 { VAR10 (BINOP, veor,
17369 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17370 { VAR10 (LOGICBINOP, vbic,
17371 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17372 { VAR10 (LOGICBINOP, vorn,
17373 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17376 #undef CF
17377 #undef VAR1
17378 #undef VAR2
17379 #undef VAR3
17380 #undef VAR4
17381 #undef VAR5
17382 #undef VAR6
17383 #undef VAR7
17384 #undef VAR8
17385 #undef VAR9
17386 #undef VAR10
17388 static void
17389 arm_init_neon_builtins (void)
17391 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17393 tree neon_intQI_type_node;
17394 tree neon_intHI_type_node;
17395 tree neon_polyQI_type_node;
17396 tree neon_polyHI_type_node;
17397 tree neon_intSI_type_node;
17398 tree neon_intDI_type_node;
17399 tree neon_float_type_node;
17401 tree intQI_pointer_node;
17402 tree intHI_pointer_node;
17403 tree intSI_pointer_node;
17404 tree intDI_pointer_node;
17405 tree float_pointer_node;
17407 tree const_intQI_node;
17408 tree const_intHI_node;
17409 tree const_intSI_node;
17410 tree const_intDI_node;
17411 tree const_float_node;
17413 tree const_intQI_pointer_node;
17414 tree const_intHI_pointer_node;
17415 tree const_intSI_pointer_node;
17416 tree const_intDI_pointer_node;
17417 tree const_float_pointer_node;
17419 tree V8QI_type_node;
17420 tree V4HI_type_node;
17421 tree V2SI_type_node;
17422 tree V2SF_type_node;
17423 tree V16QI_type_node;
17424 tree V8HI_type_node;
17425 tree V4SI_type_node;
17426 tree V4SF_type_node;
17427 tree V2DI_type_node;
17429 tree intUQI_type_node;
17430 tree intUHI_type_node;
17431 tree intUSI_type_node;
17432 tree intUDI_type_node;
17434 tree intEI_type_node;
17435 tree intOI_type_node;
17436 tree intCI_type_node;
17437 tree intXI_type_node;
17439 tree V8QI_pointer_node;
17440 tree V4HI_pointer_node;
17441 tree V2SI_pointer_node;
17442 tree V2SF_pointer_node;
17443 tree V16QI_pointer_node;
17444 tree V8HI_pointer_node;
17445 tree V4SI_pointer_node;
17446 tree V4SF_pointer_node;
17447 tree V2DI_pointer_node;
17449 tree void_ftype_pv8qi_v8qi_v8qi;
17450 tree void_ftype_pv4hi_v4hi_v4hi;
17451 tree void_ftype_pv2si_v2si_v2si;
17452 tree void_ftype_pv2sf_v2sf_v2sf;
17453 tree void_ftype_pdi_di_di;
17454 tree void_ftype_pv16qi_v16qi_v16qi;
17455 tree void_ftype_pv8hi_v8hi_v8hi;
17456 tree void_ftype_pv4si_v4si_v4si;
17457 tree void_ftype_pv4sf_v4sf_v4sf;
17458 tree void_ftype_pv2di_v2di_v2di;
17460 tree reinterp_ftype_dreg[5][5];
17461 tree reinterp_ftype_qreg[5][5];
17462 tree dreg_types[5], qreg_types[5];
17464 /* Create distinguished type nodes for NEON vector element types,
17465 and pointers to values of such types, so we can detect them later. */
17466 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17467 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17468 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17469 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17470 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17471 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17472 neon_float_type_node = make_node (REAL_TYPE);
17473 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17474 layout_type (neon_float_type_node);
17476 /* Define typedefs which exactly correspond to the modes we are basing vector
17477 types on. If you change these names you'll need to change
17478 the table used by arm_mangle_type too. */
17479 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17480 "__builtin_neon_qi");
17481 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17482 "__builtin_neon_hi");
17483 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17484 "__builtin_neon_si");
17485 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17486 "__builtin_neon_sf");
17487 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17488 "__builtin_neon_di");
17489 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17490 "__builtin_neon_poly8");
17491 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17492 "__builtin_neon_poly16");
17494 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17495 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17496 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17497 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17498 float_pointer_node = build_pointer_type (neon_float_type_node);
17500 /* Next create constant-qualified versions of the above types. */
17501 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17502 TYPE_QUAL_CONST);
17503 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17504 TYPE_QUAL_CONST);
17505 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17506 TYPE_QUAL_CONST);
17507 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17508 TYPE_QUAL_CONST);
17509 const_float_node = build_qualified_type (neon_float_type_node,
17510 TYPE_QUAL_CONST);
17512 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17513 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17514 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17515 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17516 const_float_pointer_node = build_pointer_type (const_float_node);
17518 /* Now create vector types based on our NEON element types. */
17519 /* 64-bit vectors. */
17520 V8QI_type_node =
17521 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17522 V4HI_type_node =
17523 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17524 V2SI_type_node =
17525 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17526 V2SF_type_node =
17527 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17528 /* 128-bit vectors. */
17529 V16QI_type_node =
17530 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17531 V8HI_type_node =
17532 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17533 V4SI_type_node =
17534 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17535 V4SF_type_node =
17536 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17537 V2DI_type_node =
17538 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17540 /* Unsigned integer types for various mode sizes. */
17541 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17542 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17543 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17544 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17546 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17547 "__builtin_neon_uqi");
17548 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17549 "__builtin_neon_uhi");
17550 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17551 "__builtin_neon_usi");
17552 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17553 "__builtin_neon_udi");
17555 /* Opaque integer types for structures of vectors. */
17556 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17557 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17558 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17559 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17561 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17562 "__builtin_neon_ti");
17563 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17564 "__builtin_neon_ei");
17565 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17566 "__builtin_neon_oi");
17567 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17568 "__builtin_neon_ci");
17569 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17570 "__builtin_neon_xi");
17572 /* Pointers to vector types. */
17573 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17574 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17575 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17576 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17577 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17578 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17579 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17580 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17581 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17583 /* Operations which return results as pairs. */
17584 void_ftype_pv8qi_v8qi_v8qi =
17585 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17586 V8QI_type_node, NULL);
17587 void_ftype_pv4hi_v4hi_v4hi =
17588 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17589 V4HI_type_node, NULL);
17590 void_ftype_pv2si_v2si_v2si =
17591 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17592 V2SI_type_node, NULL);
17593 void_ftype_pv2sf_v2sf_v2sf =
17594 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17595 V2SF_type_node, NULL);
17596 void_ftype_pdi_di_di =
17597 build_function_type_list (void_type_node, intDI_pointer_node,
17598 neon_intDI_type_node, neon_intDI_type_node, NULL);
17599 void_ftype_pv16qi_v16qi_v16qi =
17600 build_function_type_list (void_type_node, V16QI_pointer_node,
17601 V16QI_type_node, V16QI_type_node, NULL);
17602 void_ftype_pv8hi_v8hi_v8hi =
17603 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17604 V8HI_type_node, NULL);
17605 void_ftype_pv4si_v4si_v4si =
17606 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17607 V4SI_type_node, NULL);
17608 void_ftype_pv4sf_v4sf_v4sf =
17609 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17610 V4SF_type_node, NULL);
17611 void_ftype_pv2di_v2di_v2di =
17612 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17613 V2DI_type_node, NULL);
17615 dreg_types[0] = V8QI_type_node;
17616 dreg_types[1] = V4HI_type_node;
17617 dreg_types[2] = V2SI_type_node;
17618 dreg_types[3] = V2SF_type_node;
17619 dreg_types[4] = neon_intDI_type_node;
17621 qreg_types[0] = V16QI_type_node;
17622 qreg_types[1] = V8HI_type_node;
17623 qreg_types[2] = V4SI_type_node;
17624 qreg_types[3] = V4SF_type_node;
17625 qreg_types[4] = V2DI_type_node;
17627 for (i = 0; i < 5; i++)
17629 int j;
17630 for (j = 0; j < 5; j++)
17632 reinterp_ftype_dreg[i][j]
17633 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17634 reinterp_ftype_qreg[i][j]
17635 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17639 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17641 neon_builtin_datum *d = &neon_builtin_data[i];
17642 unsigned int j, codeidx = 0;
17644 d->base_fcode = fcode;
17646 for (j = 0; j < T_MAX; j++)
17648 const char* const modenames[] = {
17649 "v8qi", "v4hi", "v2si", "v2sf", "di",
17650 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17652 char namebuf[60];
17653 tree ftype = NULL;
17654 enum insn_code icode;
17655 int is_load = 0, is_store = 0;
17657 if ((d->bits & (1 << j)) == 0)
17658 continue;
17660 icode = d->codes[codeidx++];
17662 switch (d->itype)
17664 case NEON_LOAD1:
17665 case NEON_LOAD1LANE:
17666 case NEON_LOADSTRUCT:
17667 case NEON_LOADSTRUCTLANE:
17668 is_load = 1;
17669 /* Fall through. */
17670 case NEON_STORE1:
17671 case NEON_STORE1LANE:
17672 case NEON_STORESTRUCT:
17673 case NEON_STORESTRUCTLANE:
17674 if (!is_load)
17675 is_store = 1;
17676 /* Fall through. */
17677 case NEON_UNOP:
17678 case NEON_BINOP:
17679 case NEON_LOGICBINOP:
17680 case NEON_SHIFTINSERT:
17681 case NEON_TERNOP:
17682 case NEON_GETLANE:
17683 case NEON_SETLANE:
17684 case NEON_CREATE:
17685 case NEON_DUP:
17686 case NEON_DUPLANE:
17687 case NEON_SHIFTIMM:
17688 case NEON_SHIFTACC:
17689 case NEON_COMBINE:
17690 case NEON_SPLIT:
17691 case NEON_CONVERT:
17692 case NEON_FIXCONV:
17693 case NEON_LANEMUL:
17694 case NEON_LANEMULL:
17695 case NEON_LANEMULH:
17696 case NEON_LANEMAC:
17697 case NEON_SCALARMUL:
17698 case NEON_SCALARMULL:
17699 case NEON_SCALARMULH:
17700 case NEON_SCALARMAC:
17701 case NEON_SELECT:
17702 case NEON_VTBL:
17703 case NEON_VTBX:
17705 int k;
17706 tree return_type = void_type_node, args = void_list_node;
17708 /* Build a function type directly from the insn_data for this
17709 builtin. The build_function_type() function takes care of
17710 removing duplicates for us. */
17711 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17713 tree eltype;
17715 if (is_load && k == 1)
17717 /* Neon load patterns always have the memory operand
17718 (a SImode pointer) in the operand 1 position. We
17719 want a const pointer to the element type in that
17720 position. */
17721 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17723 switch (1 << j)
17725 case T_V8QI:
17726 case T_V16QI:
17727 eltype = const_intQI_pointer_node;
17728 break;
17730 case T_V4HI:
17731 case T_V8HI:
17732 eltype = const_intHI_pointer_node;
17733 break;
17735 case T_V2SI:
17736 case T_V4SI:
17737 eltype = const_intSI_pointer_node;
17738 break;
17740 case T_V2SF:
17741 case T_V4SF:
17742 eltype = const_float_pointer_node;
17743 break;
17745 case T_DI:
17746 case T_V2DI:
17747 eltype = const_intDI_pointer_node;
17748 break;
17750 default: gcc_unreachable ();
17753 else if (is_store && k == 0)
17755 /* Similarly, Neon store patterns use operand 0 as
17756 the memory location to store to (a SImode pointer).
17757 Use a pointer to the element type of the store in
17758 that position. */
17759 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17761 switch (1 << j)
17763 case T_V8QI:
17764 case T_V16QI:
17765 eltype = intQI_pointer_node;
17766 break;
17768 case T_V4HI:
17769 case T_V8HI:
17770 eltype = intHI_pointer_node;
17771 break;
17773 case T_V2SI:
17774 case T_V4SI:
17775 eltype = intSI_pointer_node;
17776 break;
17778 case T_V2SF:
17779 case T_V4SF:
17780 eltype = float_pointer_node;
17781 break;
17783 case T_DI:
17784 case T_V2DI:
17785 eltype = intDI_pointer_node;
17786 break;
17788 default: gcc_unreachable ();
17791 else
17793 switch (insn_data[icode].operand[k].mode)
17795 case VOIDmode: eltype = void_type_node; break;
17796 /* Scalars. */
17797 case QImode: eltype = neon_intQI_type_node; break;
17798 case HImode: eltype = neon_intHI_type_node; break;
17799 case SImode: eltype = neon_intSI_type_node; break;
17800 case SFmode: eltype = neon_float_type_node; break;
17801 case DImode: eltype = neon_intDI_type_node; break;
17802 case TImode: eltype = intTI_type_node; break;
17803 case EImode: eltype = intEI_type_node; break;
17804 case OImode: eltype = intOI_type_node; break;
17805 case CImode: eltype = intCI_type_node; break;
17806 case XImode: eltype = intXI_type_node; break;
17807 /* 64-bit vectors. */
17808 case V8QImode: eltype = V8QI_type_node; break;
17809 case V4HImode: eltype = V4HI_type_node; break;
17810 case V2SImode: eltype = V2SI_type_node; break;
17811 case V2SFmode: eltype = V2SF_type_node; break;
17812 /* 128-bit vectors. */
17813 case V16QImode: eltype = V16QI_type_node; break;
17814 case V8HImode: eltype = V8HI_type_node; break;
17815 case V4SImode: eltype = V4SI_type_node; break;
17816 case V4SFmode: eltype = V4SF_type_node; break;
17817 case V2DImode: eltype = V2DI_type_node; break;
17818 default: gcc_unreachable ();
17822 if (k == 0 && !is_store)
17823 return_type = eltype;
17824 else
17825 args = tree_cons (NULL_TREE, eltype, args);
17828 ftype = build_function_type (return_type, args);
17830 break;
17832 case NEON_RESULTPAIR:
17834 switch (insn_data[icode].operand[1].mode)
17836 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17837 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17838 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17839 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17840 case DImode: ftype = void_ftype_pdi_di_di; break;
17841 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17842 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17843 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17844 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17845 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17846 default: gcc_unreachable ();
17849 break;
17851 case NEON_REINTERP:
17853 /* We iterate over 5 doubleword types, then 5 quadword
17854 types. */
17855 int rhs = j % 5;
17856 switch (insn_data[icode].operand[0].mode)
17858 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17859 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17860 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17861 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17862 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17863 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17864 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17865 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17866 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17867 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17868 default: gcc_unreachable ();
17871 break;
17873 default:
17874 gcc_unreachable ();
17877 gcc_assert (ftype != NULL);
17879 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17881 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17882 NULL_TREE);
17887 static void
17888 arm_init_fp16_builtins (void)
17890 tree fp16_type = make_node (REAL_TYPE);
17891 TYPE_PRECISION (fp16_type) = 16;
17892 layout_type (fp16_type);
17893 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17896 static void
17897 arm_init_builtins (void)
17899 arm_init_tls_builtins ();
17901 if (TARGET_REALLY_IWMMXT)
17902 arm_init_iwmmxt_builtins ();
17904 if (TARGET_NEON)
17905 arm_init_neon_builtins ();
17907 if (arm_fp16_format)
17908 arm_init_fp16_builtins ();
17911 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17913 static const char *
17914 arm_invalid_parameter_type (const_tree t)
17916 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17917 return N_("function parameters cannot have __fp16 type");
17918 return NULL;
17921 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17923 static const char *
17924 arm_invalid_return_type (const_tree t)
17926 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17927 return N_("functions cannot return __fp16 type");
17928 return NULL;
17931 /* Implement TARGET_PROMOTED_TYPE. */
17933 static tree
17934 arm_promoted_type (const_tree t)
17936 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17937 return float_type_node;
17938 return NULL_TREE;
17941 /* Implement TARGET_CONVERT_TO_TYPE.
17942 Specifically, this hook implements the peculiarity of the ARM
17943 half-precision floating-point C semantics that requires conversions between
17944 __fp16 to or from double to do an intermediate conversion to float. */
17946 static tree
17947 arm_convert_to_type (tree type, tree expr)
17949 tree fromtype = TREE_TYPE (expr);
17950 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17951 return NULL_TREE;
17952 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17953 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17954 return convert (type, convert (float_type_node, expr));
17955 return NULL_TREE;
17958 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17959 This simply adds HFmode as a supported mode; even though we don't
17960 implement arithmetic on this type directly, it's supported by
17961 optabs conversions, much the way the double-word arithmetic is
17962 special-cased in the default hook. */
17964 static bool
17965 arm_scalar_mode_supported_p (enum machine_mode mode)
17967 if (mode == HFmode)
17968 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17969 else
17970 return default_scalar_mode_supported_p (mode);
17973 /* Errors in the source file can cause expand_expr to return const0_rtx
17974 where we expect a vector. To avoid crashing, use one of the vector
17975 clear instructions. */
17977 static rtx
17978 safe_vector_operand (rtx x, enum machine_mode mode)
17980 if (x != const0_rtx)
17981 return x;
17982 x = gen_reg_rtx (mode);
17984 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17985 : gen_rtx_SUBREG (DImode, x, 0)));
17986 return x;
17989 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17991 static rtx
17992 arm_expand_binop_builtin (enum insn_code icode,
17993 tree exp, rtx target)
17995 rtx pat;
17996 tree arg0 = CALL_EXPR_ARG (exp, 0);
17997 tree arg1 = CALL_EXPR_ARG (exp, 1);
17998 rtx op0 = expand_normal (arg0);
17999 rtx op1 = expand_normal (arg1);
18000 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18001 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18002 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18004 if (VECTOR_MODE_P (mode0))
18005 op0 = safe_vector_operand (op0, mode0);
18006 if (VECTOR_MODE_P (mode1))
18007 op1 = safe_vector_operand (op1, mode1);
18009 if (! target
18010 || GET_MODE (target) != tmode
18011 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18012 target = gen_reg_rtx (tmode);
18014 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18016 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18017 op0 = copy_to_mode_reg (mode0, op0);
18018 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18019 op1 = copy_to_mode_reg (mode1, op1);
18021 pat = GEN_FCN (icode) (target, op0, op1);
18022 if (! pat)
18023 return 0;
18024 emit_insn (pat);
18025 return target;
18028 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18030 static rtx
18031 arm_expand_unop_builtin (enum insn_code icode,
18032 tree exp, rtx target, int do_load)
18034 rtx pat;
18035 tree arg0 = CALL_EXPR_ARG (exp, 0);
18036 rtx op0 = expand_normal (arg0);
18037 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18038 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18040 if (! target
18041 || GET_MODE (target) != tmode
18042 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18043 target = gen_reg_rtx (tmode);
18044 if (do_load)
18045 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18046 else
18048 if (VECTOR_MODE_P (mode0))
18049 op0 = safe_vector_operand (op0, mode0);
18051 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18052 op0 = copy_to_mode_reg (mode0, op0);
18055 pat = GEN_FCN (icode) (target, op0);
18056 if (! pat)
18057 return 0;
18058 emit_insn (pat);
18059 return target;
18062 static int
18063 neon_builtin_compare (const void *a, const void *b)
18065 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18066 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18067 unsigned int soughtcode = key->base_fcode;
18069 if (soughtcode >= memb->base_fcode
18070 && soughtcode < memb->base_fcode + memb->num_vars)
18071 return 0;
18072 else if (soughtcode < memb->base_fcode)
18073 return -1;
18074 else
18075 return 1;
18078 static enum insn_code
18079 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18081 neon_builtin_datum key, *found;
18082 int idx;
18084 key.base_fcode = fcode;
18085 found = (neon_builtin_datum *)
18086 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18087 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18088 gcc_assert (found);
18089 idx = fcode - (int) found->base_fcode;
18090 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18092 if (itype)
18093 *itype = found->itype;
18095 return found->codes[idx];
18098 typedef enum {
18099 NEON_ARG_COPY_TO_REG,
18100 NEON_ARG_CONSTANT,
18101 NEON_ARG_STOP
18102 } builtin_arg;
18104 #define NEON_MAX_BUILTIN_ARGS 5
18106 /* Expand a Neon builtin. */
18107 static rtx
18108 arm_expand_neon_args (rtx target, int icode, int have_retval,
18109 tree exp, ...)
18111 va_list ap;
18112 rtx pat;
18113 tree arg[NEON_MAX_BUILTIN_ARGS];
18114 rtx op[NEON_MAX_BUILTIN_ARGS];
18115 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18116 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18117 int argc = 0;
18119 if (have_retval
18120 && (!target
18121 || GET_MODE (target) != tmode
18122 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18123 target = gen_reg_rtx (tmode);
18125 va_start (ap, exp);
18127 for (;;)
18129 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18131 if (thisarg == NEON_ARG_STOP)
18132 break;
18133 else
18135 arg[argc] = CALL_EXPR_ARG (exp, argc);
18136 op[argc] = expand_normal (arg[argc]);
18137 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18139 switch (thisarg)
18141 case NEON_ARG_COPY_TO_REG:
18142 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18143 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18144 (op[argc], mode[argc]))
18145 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18146 break;
18148 case NEON_ARG_CONSTANT:
18149 /* FIXME: This error message is somewhat unhelpful. */
18150 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18151 (op[argc], mode[argc]))
18152 error ("argument must be a constant");
18153 break;
18155 case NEON_ARG_STOP:
18156 gcc_unreachable ();
18159 argc++;
18163 va_end (ap);
18165 if (have_retval)
18166 switch (argc)
18168 case 1:
18169 pat = GEN_FCN (icode) (target, op[0]);
18170 break;
18172 case 2:
18173 pat = GEN_FCN (icode) (target, op[0], op[1]);
18174 break;
18176 case 3:
18177 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18178 break;
18180 case 4:
18181 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18182 break;
18184 case 5:
18185 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18186 break;
18188 default:
18189 gcc_unreachable ();
18191 else
18192 switch (argc)
18194 case 1:
18195 pat = GEN_FCN (icode) (op[0]);
18196 break;
18198 case 2:
18199 pat = GEN_FCN (icode) (op[0], op[1]);
18200 break;
18202 case 3:
18203 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18204 break;
18206 case 4:
18207 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18208 break;
18210 case 5:
18211 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18212 break;
18214 default:
18215 gcc_unreachable ();
18218 if (!pat)
18219 return 0;
18221 emit_insn (pat);
18223 return target;
18226 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18227 constants defined per-instruction or per instruction-variant. Instead, the
18228 required info is looked up in the table neon_builtin_data. */
18229 static rtx
18230 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18232 neon_itype itype;
18233 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18235 switch (itype)
18237 case NEON_UNOP:
18238 case NEON_CONVERT:
18239 case NEON_DUPLANE:
18240 return arm_expand_neon_args (target, icode, 1, exp,
18241 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18243 case NEON_BINOP:
18244 case NEON_SETLANE:
18245 case NEON_SCALARMUL:
18246 case NEON_SCALARMULL:
18247 case NEON_SCALARMULH:
18248 case NEON_SHIFTINSERT:
18249 case NEON_LOGICBINOP:
18250 return arm_expand_neon_args (target, icode, 1, exp,
18251 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18252 NEON_ARG_STOP);
18254 case NEON_TERNOP:
18255 return arm_expand_neon_args (target, icode, 1, exp,
18256 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18257 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18259 case NEON_GETLANE:
18260 case NEON_FIXCONV:
18261 case NEON_SHIFTIMM:
18262 return arm_expand_neon_args (target, icode, 1, exp,
18263 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18264 NEON_ARG_STOP);
18266 case NEON_CREATE:
18267 return arm_expand_neon_args (target, icode, 1, exp,
18268 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18270 case NEON_DUP:
18271 case NEON_SPLIT:
18272 case NEON_REINTERP:
18273 return arm_expand_neon_args (target, icode, 1, exp,
18274 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18276 case NEON_COMBINE:
18277 case NEON_VTBL:
18278 return arm_expand_neon_args (target, icode, 1, exp,
18279 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18281 case NEON_RESULTPAIR:
18282 return arm_expand_neon_args (target, icode, 0, exp,
18283 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18284 NEON_ARG_STOP);
18286 case NEON_LANEMUL:
18287 case NEON_LANEMULL:
18288 case NEON_LANEMULH:
18289 return arm_expand_neon_args (target, icode, 1, exp,
18290 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18291 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18293 case NEON_LANEMAC:
18294 return arm_expand_neon_args (target, icode, 1, exp,
18295 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18296 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18298 case NEON_SHIFTACC:
18299 return arm_expand_neon_args (target, icode, 1, exp,
18300 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18301 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18303 case NEON_SCALARMAC:
18304 return arm_expand_neon_args (target, icode, 1, exp,
18305 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18306 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18308 case NEON_SELECT:
18309 case NEON_VTBX:
18310 return arm_expand_neon_args (target, icode, 1, exp,
18311 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18312 NEON_ARG_STOP);
18314 case NEON_LOAD1:
18315 case NEON_LOADSTRUCT:
18316 return arm_expand_neon_args (target, icode, 1, exp,
18317 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18319 case NEON_LOAD1LANE:
18320 case NEON_LOADSTRUCTLANE:
18321 return arm_expand_neon_args (target, icode, 1, exp,
18322 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18323 NEON_ARG_STOP);
18325 case NEON_STORE1:
18326 case NEON_STORESTRUCT:
18327 return arm_expand_neon_args (target, icode, 0, exp,
18328 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18330 case NEON_STORE1LANE:
18331 case NEON_STORESTRUCTLANE:
18332 return arm_expand_neon_args (target, icode, 0, exp,
18333 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18334 NEON_ARG_STOP);
18337 gcc_unreachable ();
18340 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18341 void
18342 neon_reinterpret (rtx dest, rtx src)
18344 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18347 /* Emit code to place a Neon pair result in memory locations (with equal
18348 registers). */
18349 void
18350 neon_emit_pair_result_insn (enum machine_mode mode,
18351 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18352 rtx op1, rtx op2)
18354 rtx mem = gen_rtx_MEM (mode, destaddr);
18355 rtx tmp1 = gen_reg_rtx (mode);
18356 rtx tmp2 = gen_reg_rtx (mode);
18358 emit_insn (intfn (tmp1, op1, tmp2, op2));
18360 emit_move_insn (mem, tmp1);
18361 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18362 emit_move_insn (mem, tmp2);
18365 /* Set up operands for a register copy from src to dest, taking care not to
18366 clobber registers in the process.
18367 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18368 be called with a large N, so that should be OK. */
18370 void
18371 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18373 unsigned int copied = 0, opctr = 0;
18374 unsigned int done = (1 << count) - 1;
18375 unsigned int i, j;
18377 while (copied != done)
18379 for (i = 0; i < count; i++)
18381 int good = 1;
18383 for (j = 0; good && j < count; j++)
18384 if (i != j && (copied & (1 << j)) == 0
18385 && reg_overlap_mentioned_p (src[j], dest[i]))
18386 good = 0;
18388 if (good)
18390 operands[opctr++] = dest[i];
18391 operands[opctr++] = src[i];
18392 copied |= 1 << i;
18397 gcc_assert (opctr == count * 2);
18400 /* Expand an expression EXP that calls a built-in function,
18401 with result going to TARGET if that's convenient
18402 (and in mode MODE if that's convenient).
18403 SUBTARGET may be used as the target for computing one of EXP's operands.
18404 IGNORE is nonzero if the value is to be ignored. */
18406 static rtx
18407 arm_expand_builtin (tree exp,
18408 rtx target,
18409 rtx subtarget ATTRIBUTE_UNUSED,
18410 enum machine_mode mode ATTRIBUTE_UNUSED,
18411 int ignore ATTRIBUTE_UNUSED)
18413 const struct builtin_description * d;
18414 enum insn_code icode;
18415 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18416 tree arg0;
18417 tree arg1;
18418 tree arg2;
18419 rtx op0;
18420 rtx op1;
18421 rtx op2;
18422 rtx pat;
18423 int fcode = DECL_FUNCTION_CODE (fndecl);
18424 size_t i;
18425 enum machine_mode tmode;
18426 enum machine_mode mode0;
18427 enum machine_mode mode1;
18428 enum machine_mode mode2;
18430 if (fcode >= ARM_BUILTIN_NEON_BASE)
18431 return arm_expand_neon_builtin (fcode, exp, target);
18433 switch (fcode)
18435 case ARM_BUILTIN_TEXTRMSB:
18436 case ARM_BUILTIN_TEXTRMUB:
18437 case ARM_BUILTIN_TEXTRMSH:
18438 case ARM_BUILTIN_TEXTRMUH:
18439 case ARM_BUILTIN_TEXTRMSW:
18440 case ARM_BUILTIN_TEXTRMUW:
18441 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18442 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18443 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18444 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18445 : CODE_FOR_iwmmxt_textrmw);
18447 arg0 = CALL_EXPR_ARG (exp, 0);
18448 arg1 = CALL_EXPR_ARG (exp, 1);
18449 op0 = expand_normal (arg0);
18450 op1 = expand_normal (arg1);
18451 tmode = insn_data[icode].operand[0].mode;
18452 mode0 = insn_data[icode].operand[1].mode;
18453 mode1 = insn_data[icode].operand[2].mode;
18455 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18456 op0 = copy_to_mode_reg (mode0, op0);
18457 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18459 /* @@@ better error message */
18460 error ("selector must be an immediate");
18461 return gen_reg_rtx (tmode);
18463 if (target == 0
18464 || GET_MODE (target) != tmode
18465 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18466 target = gen_reg_rtx (tmode);
18467 pat = GEN_FCN (icode) (target, op0, op1);
18468 if (! pat)
18469 return 0;
18470 emit_insn (pat);
18471 return target;
18473 case ARM_BUILTIN_TINSRB:
18474 case ARM_BUILTIN_TINSRH:
18475 case ARM_BUILTIN_TINSRW:
18476 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18477 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18478 : CODE_FOR_iwmmxt_tinsrw);
18479 arg0 = CALL_EXPR_ARG (exp, 0);
18480 arg1 = CALL_EXPR_ARG (exp, 1);
18481 arg2 = CALL_EXPR_ARG (exp, 2);
18482 op0 = expand_normal (arg0);
18483 op1 = expand_normal (arg1);
18484 op2 = expand_normal (arg2);
18485 tmode = insn_data[icode].operand[0].mode;
18486 mode0 = insn_data[icode].operand[1].mode;
18487 mode1 = insn_data[icode].operand[2].mode;
18488 mode2 = insn_data[icode].operand[3].mode;
18490 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18491 op0 = copy_to_mode_reg (mode0, op0);
18492 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18493 op1 = copy_to_mode_reg (mode1, op1);
18494 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18496 /* @@@ better error message */
18497 error ("selector must be an immediate");
18498 return const0_rtx;
18500 if (target == 0
18501 || GET_MODE (target) != tmode
18502 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18503 target = gen_reg_rtx (tmode);
18504 pat = GEN_FCN (icode) (target, op0, op1, op2);
18505 if (! pat)
18506 return 0;
18507 emit_insn (pat);
18508 return target;
18510 case ARM_BUILTIN_SETWCX:
18511 arg0 = CALL_EXPR_ARG (exp, 0);
18512 arg1 = CALL_EXPR_ARG (exp, 1);
18513 op0 = force_reg (SImode, expand_normal (arg0));
18514 op1 = expand_normal (arg1);
18515 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18516 return 0;
18518 case ARM_BUILTIN_GETWCX:
18519 arg0 = CALL_EXPR_ARG (exp, 0);
18520 op0 = expand_normal (arg0);
18521 target = gen_reg_rtx (SImode);
18522 emit_insn (gen_iwmmxt_tmrc (target, op0));
18523 return target;
18525 case ARM_BUILTIN_WSHUFH:
18526 icode = CODE_FOR_iwmmxt_wshufh;
18527 arg0 = CALL_EXPR_ARG (exp, 0);
18528 arg1 = CALL_EXPR_ARG (exp, 1);
18529 op0 = expand_normal (arg0);
18530 op1 = expand_normal (arg1);
18531 tmode = insn_data[icode].operand[0].mode;
18532 mode1 = insn_data[icode].operand[1].mode;
18533 mode2 = insn_data[icode].operand[2].mode;
18535 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18536 op0 = copy_to_mode_reg (mode1, op0);
18537 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18539 /* @@@ better error message */
18540 error ("mask must be an immediate");
18541 return const0_rtx;
18543 if (target == 0
18544 || GET_MODE (target) != tmode
18545 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18546 target = gen_reg_rtx (tmode);
18547 pat = GEN_FCN (icode) (target, op0, op1);
18548 if (! pat)
18549 return 0;
18550 emit_insn (pat);
18551 return target;
18553 case ARM_BUILTIN_WSADB:
18554 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18555 case ARM_BUILTIN_WSADH:
18556 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18557 case ARM_BUILTIN_WSADBZ:
18558 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18559 case ARM_BUILTIN_WSADHZ:
18560 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18562 /* Several three-argument builtins. */
18563 case ARM_BUILTIN_WMACS:
18564 case ARM_BUILTIN_WMACU:
18565 case ARM_BUILTIN_WALIGN:
18566 case ARM_BUILTIN_TMIA:
18567 case ARM_BUILTIN_TMIAPH:
18568 case ARM_BUILTIN_TMIATT:
18569 case ARM_BUILTIN_TMIATB:
18570 case ARM_BUILTIN_TMIABT:
18571 case ARM_BUILTIN_TMIABB:
18572 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18573 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18574 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18575 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18576 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18577 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18578 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18579 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18580 : CODE_FOR_iwmmxt_walign);
18581 arg0 = CALL_EXPR_ARG (exp, 0);
18582 arg1 = CALL_EXPR_ARG (exp, 1);
18583 arg2 = CALL_EXPR_ARG (exp, 2);
18584 op0 = expand_normal (arg0);
18585 op1 = expand_normal (arg1);
18586 op2 = expand_normal (arg2);
18587 tmode = insn_data[icode].operand[0].mode;
18588 mode0 = insn_data[icode].operand[1].mode;
18589 mode1 = insn_data[icode].operand[2].mode;
18590 mode2 = insn_data[icode].operand[3].mode;
18592 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18593 op0 = copy_to_mode_reg (mode0, op0);
18594 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18595 op1 = copy_to_mode_reg (mode1, op1);
18596 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18597 op2 = copy_to_mode_reg (mode2, op2);
18598 if (target == 0
18599 || GET_MODE (target) != tmode
18600 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18601 target = gen_reg_rtx (tmode);
18602 pat = GEN_FCN (icode) (target, op0, op1, op2);
18603 if (! pat)
18604 return 0;
18605 emit_insn (pat);
18606 return target;
18608 case ARM_BUILTIN_WZERO:
18609 target = gen_reg_rtx (DImode);
18610 emit_insn (gen_iwmmxt_clrdi (target));
18611 return target;
18613 case ARM_BUILTIN_THREAD_POINTER:
18614 return arm_load_tp (target);
18616 default:
18617 break;
18620 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18621 if (d->code == (const enum arm_builtins) fcode)
18622 return arm_expand_binop_builtin (d->icode, exp, target);
18624 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18625 if (d->code == (const enum arm_builtins) fcode)
18626 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18628 /* @@@ Should really do something sensible here. */
18629 return NULL_RTX;
18632 /* Return the number (counting from 0) of
18633 the least significant set bit in MASK. */
18635 inline static int
18636 number_of_first_bit_set (unsigned mask)
18638 int bit;
18640 for (bit = 0;
18641 (mask & (1 << bit)) == 0;
18642 ++bit)
18643 continue;
18645 return bit;
18648 /* Emit code to push or pop registers to or from the stack. F is the
18649 assembly file. MASK is the registers to push or pop. PUSH is
18650 nonzero if we should push, and zero if we should pop. For debugging
18651 output, if pushing, adjust CFA_OFFSET by the amount of space added
18652 to the stack. REAL_REGS should have the same number of bits set as
18653 MASK, and will be used instead (in the same order) to describe which
18654 registers were saved - this is used to mark the save slots when we
18655 push high registers after moving them to low registers. */
18656 static void
18657 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18658 unsigned long real_regs)
18660 int regno;
18661 int lo_mask = mask & 0xFF;
18662 int pushed_words = 0;
18664 gcc_assert (mask);
18666 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18668 /* Special case. Do not generate a POP PC statement here, do it in
18669 thumb_exit() */
18670 thumb_exit (f, -1);
18671 return;
18674 if (ARM_EABI_UNWIND_TABLES && push)
18676 fprintf (f, "\t.save\t{");
18677 for (regno = 0; regno < 15; regno++)
18679 if (real_regs & (1 << regno))
18681 if (real_regs & ((1 << regno) -1))
18682 fprintf (f, ", ");
18683 asm_fprintf (f, "%r", regno);
18686 fprintf (f, "}\n");
18689 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18691 /* Look at the low registers first. */
18692 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18694 if (lo_mask & 1)
18696 asm_fprintf (f, "%r", regno);
18698 if ((lo_mask & ~1) != 0)
18699 fprintf (f, ", ");
18701 pushed_words++;
18705 if (push && (mask & (1 << LR_REGNUM)))
18707 /* Catch pushing the LR. */
18708 if (mask & 0xFF)
18709 fprintf (f, ", ");
18711 asm_fprintf (f, "%r", LR_REGNUM);
18713 pushed_words++;
18715 else if (!push && (mask & (1 << PC_REGNUM)))
18717 /* Catch popping the PC. */
18718 if (TARGET_INTERWORK || TARGET_BACKTRACE
18719 || crtl->calls_eh_return)
18721 /* The PC is never poped directly, instead
18722 it is popped into r3 and then BX is used. */
18723 fprintf (f, "}\n");
18725 thumb_exit (f, -1);
18727 return;
18729 else
18731 if (mask & 0xFF)
18732 fprintf (f, ", ");
18734 asm_fprintf (f, "%r", PC_REGNUM);
18738 fprintf (f, "}\n");
18740 if (push && pushed_words && dwarf2out_do_frame ())
18742 char *l = dwarf2out_cfi_label (false);
18743 int pushed_mask = real_regs;
18745 *cfa_offset += pushed_words * 4;
18746 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18748 pushed_words = 0;
18749 pushed_mask = real_regs;
18750 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18752 if (pushed_mask & 1)
18753 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18758 /* Generate code to return from a thumb function.
18759 If 'reg_containing_return_addr' is -1, then the return address is
18760 actually on the stack, at the stack pointer. */
18761 static void
18762 thumb_exit (FILE *f, int reg_containing_return_addr)
18764 unsigned regs_available_for_popping;
18765 unsigned regs_to_pop;
18766 int pops_needed;
18767 unsigned available;
18768 unsigned required;
18769 int mode;
18770 int size;
18771 int restore_a4 = FALSE;
18773 /* Compute the registers we need to pop. */
18774 regs_to_pop = 0;
18775 pops_needed = 0;
18777 if (reg_containing_return_addr == -1)
18779 regs_to_pop |= 1 << LR_REGNUM;
18780 ++pops_needed;
18783 if (TARGET_BACKTRACE)
18785 /* Restore the (ARM) frame pointer and stack pointer. */
18786 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18787 pops_needed += 2;
18790 /* If there is nothing to pop then just emit the BX instruction and
18791 return. */
18792 if (pops_needed == 0)
18794 if (crtl->calls_eh_return)
18795 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18797 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18798 return;
18800 /* Otherwise if we are not supporting interworking and we have not created
18801 a backtrace structure and the function was not entered in ARM mode then
18802 just pop the return address straight into the PC. */
18803 else if (!TARGET_INTERWORK
18804 && !TARGET_BACKTRACE
18805 && !is_called_in_ARM_mode (current_function_decl)
18806 && !crtl->calls_eh_return)
18808 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18809 return;
18812 /* Find out how many of the (return) argument registers we can corrupt. */
18813 regs_available_for_popping = 0;
18815 /* If returning via __builtin_eh_return, the bottom three registers
18816 all contain information needed for the return. */
18817 if (crtl->calls_eh_return)
18818 size = 12;
18819 else
18821 /* If we can deduce the registers used from the function's
18822 return value. This is more reliable that examining
18823 df_regs_ever_live_p () because that will be set if the register is
18824 ever used in the function, not just if the register is used
18825 to hold a return value. */
18827 if (crtl->return_rtx != 0)
18828 mode = GET_MODE (crtl->return_rtx);
18829 else
18830 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18832 size = GET_MODE_SIZE (mode);
18834 if (size == 0)
18836 /* In a void function we can use any argument register.
18837 In a function that returns a structure on the stack
18838 we can use the second and third argument registers. */
18839 if (mode == VOIDmode)
18840 regs_available_for_popping =
18841 (1 << ARG_REGISTER (1))
18842 | (1 << ARG_REGISTER (2))
18843 | (1 << ARG_REGISTER (3));
18844 else
18845 regs_available_for_popping =
18846 (1 << ARG_REGISTER (2))
18847 | (1 << ARG_REGISTER (3));
18849 else if (size <= 4)
18850 regs_available_for_popping =
18851 (1 << ARG_REGISTER (2))
18852 | (1 << ARG_REGISTER (3));
18853 else if (size <= 8)
18854 regs_available_for_popping =
18855 (1 << ARG_REGISTER (3));
18858 /* Match registers to be popped with registers into which we pop them. */
18859 for (available = regs_available_for_popping,
18860 required = regs_to_pop;
18861 required != 0 && available != 0;
18862 available &= ~(available & - available),
18863 required &= ~(required & - required))
18864 -- pops_needed;
18866 /* If we have any popping registers left over, remove them. */
18867 if (available > 0)
18868 regs_available_for_popping &= ~available;
18870 /* Otherwise if we need another popping register we can use
18871 the fourth argument register. */
18872 else if (pops_needed)
18874 /* If we have not found any free argument registers and
18875 reg a4 contains the return address, we must move it. */
18876 if (regs_available_for_popping == 0
18877 && reg_containing_return_addr == LAST_ARG_REGNUM)
18879 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18880 reg_containing_return_addr = LR_REGNUM;
18882 else if (size > 12)
18884 /* Register a4 is being used to hold part of the return value,
18885 but we have dire need of a free, low register. */
18886 restore_a4 = TRUE;
18888 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18891 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18893 /* The fourth argument register is available. */
18894 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18896 --pops_needed;
18900 /* Pop as many registers as we can. */
18901 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18902 regs_available_for_popping);
18904 /* Process the registers we popped. */
18905 if (reg_containing_return_addr == -1)
18907 /* The return address was popped into the lowest numbered register. */
18908 regs_to_pop &= ~(1 << LR_REGNUM);
18910 reg_containing_return_addr =
18911 number_of_first_bit_set (regs_available_for_popping);
18913 /* Remove this register for the mask of available registers, so that
18914 the return address will not be corrupted by further pops. */
18915 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18918 /* If we popped other registers then handle them here. */
18919 if (regs_available_for_popping)
18921 int frame_pointer;
18923 /* Work out which register currently contains the frame pointer. */
18924 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18926 /* Move it into the correct place. */
18927 asm_fprintf (f, "\tmov\t%r, %r\n",
18928 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18930 /* (Temporarily) remove it from the mask of popped registers. */
18931 regs_available_for_popping &= ~(1 << frame_pointer);
18932 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18934 if (regs_available_for_popping)
18936 int stack_pointer;
18938 /* We popped the stack pointer as well,
18939 find the register that contains it. */
18940 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18942 /* Move it into the stack register. */
18943 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18945 /* At this point we have popped all necessary registers, so
18946 do not worry about restoring regs_available_for_popping
18947 to its correct value:
18949 assert (pops_needed == 0)
18950 assert (regs_available_for_popping == (1 << frame_pointer))
18951 assert (regs_to_pop == (1 << STACK_POINTER)) */
18953 else
18955 /* Since we have just move the popped value into the frame
18956 pointer, the popping register is available for reuse, and
18957 we know that we still have the stack pointer left to pop. */
18958 regs_available_for_popping |= (1 << frame_pointer);
18962 /* If we still have registers left on the stack, but we no longer have
18963 any registers into which we can pop them, then we must move the return
18964 address into the link register and make available the register that
18965 contained it. */
18966 if (regs_available_for_popping == 0 && pops_needed > 0)
18968 regs_available_for_popping |= 1 << reg_containing_return_addr;
18970 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18971 reg_containing_return_addr);
18973 reg_containing_return_addr = LR_REGNUM;
18976 /* If we have registers left on the stack then pop some more.
18977 We know that at most we will want to pop FP and SP. */
18978 if (pops_needed > 0)
18980 int popped_into;
18981 int move_to;
18983 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18984 regs_available_for_popping);
18986 /* We have popped either FP or SP.
18987 Move whichever one it is into the correct register. */
18988 popped_into = number_of_first_bit_set (regs_available_for_popping);
18989 move_to = number_of_first_bit_set (regs_to_pop);
18991 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18993 regs_to_pop &= ~(1 << move_to);
18995 --pops_needed;
18998 /* If we still have not popped everything then we must have only
18999 had one register available to us and we are now popping the SP. */
19000 if (pops_needed > 0)
19002 int popped_into;
19004 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19005 regs_available_for_popping);
19007 popped_into = number_of_first_bit_set (regs_available_for_popping);
19009 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19011 assert (regs_to_pop == (1 << STACK_POINTER))
19012 assert (pops_needed == 1)
19016 /* If necessary restore the a4 register. */
19017 if (restore_a4)
19019 if (reg_containing_return_addr != LR_REGNUM)
19021 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19022 reg_containing_return_addr = LR_REGNUM;
19025 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19028 if (crtl->calls_eh_return)
19029 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19031 /* Return to caller. */
19032 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19036 void
19037 thumb1_final_prescan_insn (rtx insn)
19039 if (flag_print_asm_name)
19040 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19041 INSN_ADDRESSES (INSN_UID (insn)));
19045 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19047 unsigned HOST_WIDE_INT mask = 0xff;
19048 int i;
19050 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19051 if (val == 0) /* XXX */
19052 return 0;
19054 for (i = 0; i < 25; i++)
19055 if ((val & (mask << i)) == val)
19056 return 1;
19058 return 0;
19061 /* Returns nonzero if the current function contains,
19062 or might contain a far jump. */
19063 static int
19064 thumb_far_jump_used_p (void)
19066 rtx insn;
19068 /* This test is only important for leaf functions. */
19069 /* assert (!leaf_function_p ()); */
19071 /* If we have already decided that far jumps may be used,
19072 do not bother checking again, and always return true even if
19073 it turns out that they are not being used. Once we have made
19074 the decision that far jumps are present (and that hence the link
19075 register will be pushed onto the stack) we cannot go back on it. */
19076 if (cfun->machine->far_jump_used)
19077 return 1;
19079 /* If this function is not being called from the prologue/epilogue
19080 generation code then it must be being called from the
19081 INITIAL_ELIMINATION_OFFSET macro. */
19082 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19084 /* In this case we know that we are being asked about the elimination
19085 of the arg pointer register. If that register is not being used,
19086 then there are no arguments on the stack, and we do not have to
19087 worry that a far jump might force the prologue to push the link
19088 register, changing the stack offsets. In this case we can just
19089 return false, since the presence of far jumps in the function will
19090 not affect stack offsets.
19092 If the arg pointer is live (or if it was live, but has now been
19093 eliminated and so set to dead) then we do have to test to see if
19094 the function might contain a far jump. This test can lead to some
19095 false negatives, since before reload is completed, then length of
19096 branch instructions is not known, so gcc defaults to returning their
19097 longest length, which in turn sets the far jump attribute to true.
19099 A false negative will not result in bad code being generated, but it
19100 will result in a needless push and pop of the link register. We
19101 hope that this does not occur too often.
19103 If we need doubleword stack alignment this could affect the other
19104 elimination offsets so we can't risk getting it wrong. */
19105 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19106 cfun->machine->arg_pointer_live = 1;
19107 else if (!cfun->machine->arg_pointer_live)
19108 return 0;
19111 /* Check to see if the function contains a branch
19112 insn with the far jump attribute set. */
19113 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19115 if (GET_CODE (insn) == JUMP_INSN
19116 /* Ignore tablejump patterns. */
19117 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19118 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19119 && get_attr_far_jump (insn) == FAR_JUMP_YES
19122 /* Record the fact that we have decided that
19123 the function does use far jumps. */
19124 cfun->machine->far_jump_used = 1;
19125 return 1;
19129 return 0;
19132 /* Return nonzero if FUNC must be entered in ARM mode. */
19134 is_called_in_ARM_mode (tree func)
19136 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19138 /* Ignore the problem about functions whose address is taken. */
19139 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19140 return TRUE;
19142 #ifdef ARM_PE
19143 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19144 #else
19145 return FALSE;
19146 #endif
19149 /* The bits which aren't usefully expanded as rtl. */
19150 const char *
19151 thumb_unexpanded_epilogue (void)
19153 arm_stack_offsets *offsets;
19154 int regno;
19155 unsigned long live_regs_mask = 0;
19156 int high_regs_pushed = 0;
19157 int had_to_push_lr;
19158 int size;
19160 if (cfun->machine->return_used_this_function != 0)
19161 return "";
19163 if (IS_NAKED (arm_current_func_type ()))
19164 return "";
19166 offsets = arm_get_frame_offsets ();
19167 live_regs_mask = offsets->saved_regs_mask;
19168 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19170 /* If we can deduce the registers used from the function's return value.
19171 This is more reliable that examining df_regs_ever_live_p () because that
19172 will be set if the register is ever used in the function, not just if
19173 the register is used to hold a return value. */
19174 size = arm_size_return_regs ();
19176 /* The prolog may have pushed some high registers to use as
19177 work registers. e.g. the testsuite file:
19178 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19179 compiles to produce:
19180 push {r4, r5, r6, r7, lr}
19181 mov r7, r9
19182 mov r6, r8
19183 push {r6, r7}
19184 as part of the prolog. We have to undo that pushing here. */
19186 if (high_regs_pushed)
19188 unsigned long mask = live_regs_mask & 0xff;
19189 int next_hi_reg;
19191 /* The available low registers depend on the size of the value we are
19192 returning. */
19193 if (size <= 12)
19194 mask |= 1 << 3;
19195 if (size <= 8)
19196 mask |= 1 << 2;
19198 if (mask == 0)
19199 /* Oh dear! We have no low registers into which we can pop
19200 high registers! */
19201 internal_error
19202 ("no low registers available for popping high registers");
19204 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19205 if (live_regs_mask & (1 << next_hi_reg))
19206 break;
19208 while (high_regs_pushed)
19210 /* Find lo register(s) into which the high register(s) can
19211 be popped. */
19212 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19214 if (mask & (1 << regno))
19215 high_regs_pushed--;
19216 if (high_regs_pushed == 0)
19217 break;
19220 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19222 /* Pop the values into the low register(s). */
19223 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19225 /* Move the value(s) into the high registers. */
19226 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19228 if (mask & (1 << regno))
19230 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19231 regno);
19233 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19234 if (live_regs_mask & (1 << next_hi_reg))
19235 break;
19239 live_regs_mask &= ~0x0f00;
19242 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19243 live_regs_mask &= 0xff;
19245 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19247 /* Pop the return address into the PC. */
19248 if (had_to_push_lr)
19249 live_regs_mask |= 1 << PC_REGNUM;
19251 /* Either no argument registers were pushed or a backtrace
19252 structure was created which includes an adjusted stack
19253 pointer, so just pop everything. */
19254 if (live_regs_mask)
19255 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19256 live_regs_mask);
19258 /* We have either just popped the return address into the
19259 PC or it is was kept in LR for the entire function. */
19260 if (!had_to_push_lr)
19261 thumb_exit (asm_out_file, LR_REGNUM);
19263 else
19265 /* Pop everything but the return address. */
19266 if (live_regs_mask)
19267 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19268 live_regs_mask);
19270 if (had_to_push_lr)
19272 if (size > 12)
19274 /* We have no free low regs, so save one. */
19275 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19276 LAST_ARG_REGNUM);
19279 /* Get the return address into a temporary register. */
19280 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19281 1 << LAST_ARG_REGNUM);
19283 if (size > 12)
19285 /* Move the return address to lr. */
19286 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19287 LAST_ARG_REGNUM);
19288 /* Restore the low register. */
19289 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19290 IP_REGNUM);
19291 regno = LR_REGNUM;
19293 else
19294 regno = LAST_ARG_REGNUM;
19296 else
19297 regno = LR_REGNUM;
19299 /* Remove the argument registers that were pushed onto the stack. */
19300 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19301 SP_REGNUM, SP_REGNUM,
19302 crtl->args.pretend_args_size);
19304 thumb_exit (asm_out_file, regno);
19307 return "";
19310 /* Functions to save and restore machine-specific function data. */
19311 static struct machine_function *
19312 arm_init_machine_status (void)
19314 struct machine_function *machine;
19315 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
19317 #if ARM_FT_UNKNOWN != 0
19318 machine->func_type = ARM_FT_UNKNOWN;
19319 #endif
19320 return machine;
19323 /* Return an RTX indicating where the return address to the
19324 calling function can be found. */
19326 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19328 if (count != 0)
19329 return NULL_RTX;
19331 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19334 /* Do anything needed before RTL is emitted for each function. */
19335 void
19336 arm_init_expanders (void)
19338 /* Arrange to initialize and mark the machine per-function status. */
19339 init_machine_status = arm_init_machine_status;
19341 /* This is to stop the combine pass optimizing away the alignment
19342 adjustment of va_arg. */
19343 /* ??? It is claimed that this should not be necessary. */
19344 if (cfun)
19345 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19349 /* Like arm_compute_initial_elimination offset. Simpler because there
19350 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19351 to point at the base of the local variables after static stack
19352 space for a function has been allocated. */
19354 HOST_WIDE_INT
19355 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19357 arm_stack_offsets *offsets;
19359 offsets = arm_get_frame_offsets ();
19361 switch (from)
19363 case ARG_POINTER_REGNUM:
19364 switch (to)
19366 case STACK_POINTER_REGNUM:
19367 return offsets->outgoing_args - offsets->saved_args;
19369 case FRAME_POINTER_REGNUM:
19370 return offsets->soft_frame - offsets->saved_args;
19372 case ARM_HARD_FRAME_POINTER_REGNUM:
19373 return offsets->saved_regs - offsets->saved_args;
19375 case THUMB_HARD_FRAME_POINTER_REGNUM:
19376 return offsets->locals_base - offsets->saved_args;
19378 default:
19379 gcc_unreachable ();
19381 break;
19383 case FRAME_POINTER_REGNUM:
19384 switch (to)
19386 case STACK_POINTER_REGNUM:
19387 return offsets->outgoing_args - offsets->soft_frame;
19389 case ARM_HARD_FRAME_POINTER_REGNUM:
19390 return offsets->saved_regs - offsets->soft_frame;
19392 case THUMB_HARD_FRAME_POINTER_REGNUM:
19393 return offsets->locals_base - offsets->soft_frame;
19395 default:
19396 gcc_unreachable ();
19398 break;
19400 default:
19401 gcc_unreachable ();
19405 /* Generate the rest of a function's prologue. */
19406 void
19407 thumb1_expand_prologue (void)
19409 rtx insn, dwarf;
19411 HOST_WIDE_INT amount;
19412 arm_stack_offsets *offsets;
19413 unsigned long func_type;
19414 int regno;
19415 unsigned long live_regs_mask;
19417 func_type = arm_current_func_type ();
19419 /* Naked functions don't have prologues. */
19420 if (IS_NAKED (func_type))
19421 return;
19423 if (IS_INTERRUPT (func_type))
19425 error ("interrupt Service Routines cannot be coded in Thumb mode");
19426 return;
19429 offsets = arm_get_frame_offsets ();
19430 live_regs_mask = offsets->saved_regs_mask;
19431 /* Load the pic register before setting the frame pointer,
19432 so we can use r7 as a temporary work register. */
19433 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19434 arm_load_pic_register (live_regs_mask);
19436 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19437 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19438 stack_pointer_rtx);
19440 amount = offsets->outgoing_args - offsets->saved_regs;
19441 if (amount)
19443 if (amount < 512)
19445 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19446 GEN_INT (- amount)));
19447 RTX_FRAME_RELATED_P (insn) = 1;
19449 else
19451 rtx reg;
19453 /* The stack decrement is too big for an immediate value in a single
19454 insn. In theory we could issue multiple subtracts, but after
19455 three of them it becomes more space efficient to place the full
19456 value in the constant pool and load into a register. (Also the
19457 ARM debugger really likes to see only one stack decrement per
19458 function). So instead we look for a scratch register into which
19459 we can load the decrement, and then we subtract this from the
19460 stack pointer. Unfortunately on the thumb the only available
19461 scratch registers are the argument registers, and we cannot use
19462 these as they may hold arguments to the function. Instead we
19463 attempt to locate a call preserved register which is used by this
19464 function. If we can find one, then we know that it will have
19465 been pushed at the start of the prologue and so we can corrupt
19466 it now. */
19467 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19468 if (live_regs_mask & (1 << regno))
19469 break;
19471 gcc_assert(regno <= LAST_LO_REGNUM);
19473 reg = gen_rtx_REG (SImode, regno);
19475 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19477 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19478 stack_pointer_rtx, reg));
19479 RTX_FRAME_RELATED_P (insn) = 1;
19480 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19481 plus_constant (stack_pointer_rtx,
19482 -amount));
19483 RTX_FRAME_RELATED_P (dwarf) = 1;
19484 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19488 if (frame_pointer_needed)
19489 thumb_set_frame_pointer (offsets);
19491 /* If we are profiling, make sure no instructions are scheduled before
19492 the call to mcount. Similarly if the user has requested no
19493 scheduling in the prolog. Similarly if we want non-call exceptions
19494 using the EABI unwinder, to prevent faulting instructions from being
19495 swapped with a stack adjustment. */
19496 if (crtl->profile || !TARGET_SCHED_PROLOG
19497 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19498 emit_insn (gen_blockage ());
19500 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19501 if (live_regs_mask & 0xff)
19502 cfun->machine->lr_save_eliminated = 0;
19506 void
19507 thumb1_expand_epilogue (void)
19509 HOST_WIDE_INT amount;
19510 arm_stack_offsets *offsets;
19511 int regno;
19513 /* Naked functions don't have prologues. */
19514 if (IS_NAKED (arm_current_func_type ()))
19515 return;
19517 offsets = arm_get_frame_offsets ();
19518 amount = offsets->outgoing_args - offsets->saved_regs;
19520 if (frame_pointer_needed)
19522 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19523 amount = offsets->locals_base - offsets->saved_regs;
19526 gcc_assert (amount >= 0);
19527 if (amount)
19529 if (amount < 512)
19530 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19531 GEN_INT (amount)));
19532 else
19534 /* r3 is always free in the epilogue. */
19535 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19537 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19538 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19542 /* Emit a USE (stack_pointer_rtx), so that
19543 the stack adjustment will not be deleted. */
19544 emit_insn (gen_prologue_use (stack_pointer_rtx));
19546 if (crtl->profile || !TARGET_SCHED_PROLOG)
19547 emit_insn (gen_blockage ());
19549 /* Emit a clobber for each insn that will be restored in the epilogue,
19550 so that flow2 will get register lifetimes correct. */
19551 for (regno = 0; regno < 13; regno++)
19552 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19553 emit_clobber (gen_rtx_REG (SImode, regno));
19555 if (! df_regs_ever_live_p (LR_REGNUM))
19556 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19559 static void
19560 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19562 arm_stack_offsets *offsets;
19563 unsigned long live_regs_mask = 0;
19564 unsigned long l_mask;
19565 unsigned high_regs_pushed = 0;
19566 int cfa_offset = 0;
19567 int regno;
19569 if (IS_NAKED (arm_current_func_type ()))
19570 return;
19572 if (is_called_in_ARM_mode (current_function_decl))
19574 const char * name;
19576 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19577 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19578 == SYMBOL_REF);
19579 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19581 /* Generate code sequence to switch us into Thumb mode. */
19582 /* The .code 32 directive has already been emitted by
19583 ASM_DECLARE_FUNCTION_NAME. */
19584 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19585 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19587 /* Generate a label, so that the debugger will notice the
19588 change in instruction sets. This label is also used by
19589 the assembler to bypass the ARM code when this function
19590 is called from a Thumb encoded function elsewhere in the
19591 same file. Hence the definition of STUB_NAME here must
19592 agree with the definition in gas/config/tc-arm.c. */
19594 #define STUB_NAME ".real_start_of"
19596 fprintf (f, "\t.code\t16\n");
19597 #ifdef ARM_PE
19598 if (arm_dllexport_name_p (name))
19599 name = arm_strip_name_encoding (name);
19600 #endif
19601 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19602 fprintf (f, "\t.thumb_func\n");
19603 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19606 if (crtl->args.pretend_args_size)
19608 /* Output unwind directive for the stack adjustment. */
19609 if (ARM_EABI_UNWIND_TABLES)
19610 fprintf (f, "\t.pad #%d\n",
19611 crtl->args.pretend_args_size);
19613 if (cfun->machine->uses_anonymous_args)
19615 int num_pushes;
19617 fprintf (f, "\tpush\t{");
19619 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19621 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19622 regno <= LAST_ARG_REGNUM;
19623 regno++)
19624 asm_fprintf (f, "%r%s", regno,
19625 regno == LAST_ARG_REGNUM ? "" : ", ");
19627 fprintf (f, "}\n");
19629 else
19630 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19631 SP_REGNUM, SP_REGNUM,
19632 crtl->args.pretend_args_size);
19634 /* We don't need to record the stores for unwinding (would it
19635 help the debugger any if we did?), but record the change in
19636 the stack pointer. */
19637 if (dwarf2out_do_frame ())
19639 char *l = dwarf2out_cfi_label (false);
19641 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19642 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19646 /* Get the registers we are going to push. */
19647 offsets = arm_get_frame_offsets ();
19648 live_regs_mask = offsets->saved_regs_mask;
19649 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19650 l_mask = live_regs_mask & 0x40ff;
19651 /* Then count how many other high registers will need to be pushed. */
19652 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19654 if (TARGET_BACKTRACE)
19656 unsigned offset;
19657 unsigned work_register;
19659 /* We have been asked to create a stack backtrace structure.
19660 The code looks like this:
19662 0 .align 2
19663 0 func:
19664 0 sub SP, #16 Reserve space for 4 registers.
19665 2 push {R7} Push low registers.
19666 4 add R7, SP, #20 Get the stack pointer before the push.
19667 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19668 8 mov R7, PC Get hold of the start of this code plus 12.
19669 10 str R7, [SP, #16] Store it.
19670 12 mov R7, FP Get hold of the current frame pointer.
19671 14 str R7, [SP, #4] Store it.
19672 16 mov R7, LR Get hold of the current return address.
19673 18 str R7, [SP, #12] Store it.
19674 20 add R7, SP, #16 Point at the start of the backtrace structure.
19675 22 mov FP, R7 Put this value into the frame pointer. */
19677 work_register = thumb_find_work_register (live_regs_mask);
19679 if (ARM_EABI_UNWIND_TABLES)
19680 asm_fprintf (f, "\t.pad #16\n");
19682 asm_fprintf
19683 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19684 SP_REGNUM, SP_REGNUM);
19686 if (dwarf2out_do_frame ())
19688 char *l = dwarf2out_cfi_label (false);
19690 cfa_offset = cfa_offset + 16;
19691 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19694 if (l_mask)
19696 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19697 offset = bit_count (l_mask) * UNITS_PER_WORD;
19699 else
19700 offset = 0;
19702 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19703 offset + 16 + crtl->args.pretend_args_size);
19705 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19706 offset + 4);
19708 /* Make sure that the instruction fetching the PC is in the right place
19709 to calculate "start of backtrace creation code + 12". */
19710 if (l_mask)
19712 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19713 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19714 offset + 12);
19715 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19716 ARM_HARD_FRAME_POINTER_REGNUM);
19717 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19718 offset);
19720 else
19722 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19723 ARM_HARD_FRAME_POINTER_REGNUM);
19724 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19725 offset);
19726 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19727 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19728 offset + 12);
19731 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19732 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19733 offset + 8);
19734 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19735 offset + 12);
19736 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19737 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19739 /* Optimization: If we are not pushing any low registers but we are going
19740 to push some high registers then delay our first push. This will just
19741 be a push of LR and we can combine it with the push of the first high
19742 register. */
19743 else if ((l_mask & 0xff) != 0
19744 || (high_regs_pushed == 0 && l_mask))
19745 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19747 if (high_regs_pushed)
19749 unsigned pushable_regs;
19750 unsigned next_hi_reg;
19752 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19753 if (live_regs_mask & (1 << next_hi_reg))
19754 break;
19756 pushable_regs = l_mask & 0xff;
19758 if (pushable_regs == 0)
19759 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19761 while (high_regs_pushed > 0)
19763 unsigned long real_regs_mask = 0;
19765 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19767 if (pushable_regs & (1 << regno))
19769 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19771 high_regs_pushed --;
19772 real_regs_mask |= (1 << next_hi_reg);
19774 if (high_regs_pushed)
19776 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19777 next_hi_reg --)
19778 if (live_regs_mask & (1 << next_hi_reg))
19779 break;
19781 else
19783 pushable_regs &= ~((1 << regno) - 1);
19784 break;
19789 /* If we had to find a work register and we have not yet
19790 saved the LR then add it to the list of regs to push. */
19791 if (l_mask == (1 << LR_REGNUM))
19793 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19794 1, &cfa_offset,
19795 real_regs_mask | (1 << LR_REGNUM));
19796 l_mask = 0;
19798 else
19799 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19804 /* Handle the case of a double word load into a low register from
19805 a computed memory address. The computed address may involve a
19806 register which is overwritten by the load. */
19807 const char *
19808 thumb_load_double_from_address (rtx *operands)
19810 rtx addr;
19811 rtx base;
19812 rtx offset;
19813 rtx arg1;
19814 rtx arg2;
19816 gcc_assert (GET_CODE (operands[0]) == REG);
19817 gcc_assert (GET_CODE (operands[1]) == MEM);
19819 /* Get the memory address. */
19820 addr = XEXP (operands[1], 0);
19822 /* Work out how the memory address is computed. */
19823 switch (GET_CODE (addr))
19825 case REG:
19826 operands[2] = adjust_address (operands[1], SImode, 4);
19828 if (REGNO (operands[0]) == REGNO (addr))
19830 output_asm_insn ("ldr\t%H0, %2", operands);
19831 output_asm_insn ("ldr\t%0, %1", operands);
19833 else
19835 output_asm_insn ("ldr\t%0, %1", operands);
19836 output_asm_insn ("ldr\t%H0, %2", operands);
19838 break;
19840 case CONST:
19841 /* Compute <address> + 4 for the high order load. */
19842 operands[2] = adjust_address (operands[1], SImode, 4);
19844 output_asm_insn ("ldr\t%0, %1", operands);
19845 output_asm_insn ("ldr\t%H0, %2", operands);
19846 break;
19848 case PLUS:
19849 arg1 = XEXP (addr, 0);
19850 arg2 = XEXP (addr, 1);
19852 if (CONSTANT_P (arg1))
19853 base = arg2, offset = arg1;
19854 else
19855 base = arg1, offset = arg2;
19857 gcc_assert (GET_CODE (base) == REG);
19859 /* Catch the case of <address> = <reg> + <reg> */
19860 if (GET_CODE (offset) == REG)
19862 int reg_offset = REGNO (offset);
19863 int reg_base = REGNO (base);
19864 int reg_dest = REGNO (operands[0]);
19866 /* Add the base and offset registers together into the
19867 higher destination register. */
19868 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19869 reg_dest + 1, reg_base, reg_offset);
19871 /* Load the lower destination register from the address in
19872 the higher destination register. */
19873 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19874 reg_dest, reg_dest + 1);
19876 /* Load the higher destination register from its own address
19877 plus 4. */
19878 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19879 reg_dest + 1, reg_dest + 1);
19881 else
19883 /* Compute <address> + 4 for the high order load. */
19884 operands[2] = adjust_address (operands[1], SImode, 4);
19886 /* If the computed address is held in the low order register
19887 then load the high order register first, otherwise always
19888 load the low order register first. */
19889 if (REGNO (operands[0]) == REGNO (base))
19891 output_asm_insn ("ldr\t%H0, %2", operands);
19892 output_asm_insn ("ldr\t%0, %1", operands);
19894 else
19896 output_asm_insn ("ldr\t%0, %1", operands);
19897 output_asm_insn ("ldr\t%H0, %2", operands);
19900 break;
19902 case LABEL_REF:
19903 /* With no registers to worry about we can just load the value
19904 directly. */
19905 operands[2] = adjust_address (operands[1], SImode, 4);
19907 output_asm_insn ("ldr\t%H0, %2", operands);
19908 output_asm_insn ("ldr\t%0, %1", operands);
19909 break;
19911 default:
19912 gcc_unreachable ();
19915 return "";
19918 const char *
19919 thumb_output_move_mem_multiple (int n, rtx *operands)
19921 rtx tmp;
19923 switch (n)
19925 case 2:
19926 if (REGNO (operands[4]) > REGNO (operands[5]))
19928 tmp = operands[4];
19929 operands[4] = operands[5];
19930 operands[5] = tmp;
19932 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19933 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19934 break;
19936 case 3:
19937 if (REGNO (operands[4]) > REGNO (operands[5]))
19939 tmp = operands[4];
19940 operands[4] = operands[5];
19941 operands[5] = tmp;
19943 if (REGNO (operands[5]) > REGNO (operands[6]))
19945 tmp = operands[5];
19946 operands[5] = operands[6];
19947 operands[6] = tmp;
19949 if (REGNO (operands[4]) > REGNO (operands[5]))
19951 tmp = operands[4];
19952 operands[4] = operands[5];
19953 operands[5] = tmp;
19956 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19957 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19958 break;
19960 default:
19961 gcc_unreachable ();
19964 return "";
19967 /* Output a call-via instruction for thumb state. */
19968 const char *
19969 thumb_call_via_reg (rtx reg)
19971 int regno = REGNO (reg);
19972 rtx *labelp;
19974 gcc_assert (regno < LR_REGNUM);
19976 /* If we are in the normal text section we can use a single instance
19977 per compilation unit. If we are doing function sections, then we need
19978 an entry per section, since we can't rely on reachability. */
19979 if (in_section == text_section)
19981 thumb_call_reg_needed = 1;
19983 if (thumb_call_via_label[regno] == NULL)
19984 thumb_call_via_label[regno] = gen_label_rtx ();
19985 labelp = thumb_call_via_label + regno;
19987 else
19989 if (cfun->machine->call_via[regno] == NULL)
19990 cfun->machine->call_via[regno] = gen_label_rtx ();
19991 labelp = cfun->machine->call_via + regno;
19994 output_asm_insn ("bl\t%a0", labelp);
19995 return "";
19998 /* Routines for generating rtl. */
19999 void
20000 thumb_expand_movmemqi (rtx *operands)
20002 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20003 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20004 HOST_WIDE_INT len = INTVAL (operands[2]);
20005 HOST_WIDE_INT offset = 0;
20007 while (len >= 12)
20009 emit_insn (gen_movmem12b (out, in, out, in));
20010 len -= 12;
20013 if (len >= 8)
20015 emit_insn (gen_movmem8b (out, in, out, in));
20016 len -= 8;
20019 if (len >= 4)
20021 rtx reg = gen_reg_rtx (SImode);
20022 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20023 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20024 len -= 4;
20025 offset += 4;
20028 if (len >= 2)
20030 rtx reg = gen_reg_rtx (HImode);
20031 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
20032 plus_constant (in, offset))));
20033 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
20034 reg));
20035 len -= 2;
20036 offset += 2;
20039 if (len)
20041 rtx reg = gen_reg_rtx (QImode);
20042 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
20043 plus_constant (in, offset))));
20044 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
20045 reg));
20049 void
20050 thumb_reload_out_hi (rtx *operands)
20052 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
20055 /* Handle reading a half-word from memory during reload. */
20056 void
20057 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
20059 gcc_unreachable ();
20062 /* Return the length of a function name prefix
20063 that starts with the character 'c'. */
20064 static int
20065 arm_get_strip_length (int c)
20067 switch (c)
20069 ARM_NAME_ENCODING_LENGTHS
20070 default: return 0;
20074 /* Return a pointer to a function's name with any
20075 and all prefix encodings stripped from it. */
20076 const char *
20077 arm_strip_name_encoding (const char *name)
20079 int skip;
20081 while ((skip = arm_get_strip_length (* name)))
20082 name += skip;
20084 return name;
20087 /* If there is a '*' anywhere in the name's prefix, then
20088 emit the stripped name verbatim, otherwise prepend an
20089 underscore if leading underscores are being used. */
20090 void
20091 arm_asm_output_labelref (FILE *stream, const char *name)
20093 int skip;
20094 int verbatim = 0;
20096 while ((skip = arm_get_strip_length (* name)))
20098 verbatim |= (*name == '*');
20099 name += skip;
20102 if (verbatim)
20103 fputs (name, stream);
20104 else
20105 asm_fprintf (stream, "%U%s", name);
20108 static void
20109 arm_file_start (void)
20111 int val;
20113 if (TARGET_UNIFIED_ASM)
20114 asm_fprintf (asm_out_file, "\t.syntax unified\n");
20116 if (TARGET_BPABI)
20118 const char *fpu_name;
20119 if (arm_select[0].string)
20120 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
20121 else if (arm_select[1].string)
20122 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
20123 else
20124 asm_fprintf (asm_out_file, "\t.cpu %s\n",
20125 all_cores[arm_default_cpu].name);
20127 if (TARGET_SOFT_FLOAT)
20129 if (TARGET_VFP)
20130 fpu_name = "softvfp";
20131 else
20132 fpu_name = "softfpa";
20134 else
20136 fpu_name = arm_fpu_desc->name;
20137 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
20139 if (TARGET_HARD_FLOAT)
20140 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
20141 if (TARGET_HARD_FLOAT_ABI)
20142 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
20145 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
20147 /* Some of these attributes only apply when the corresponding features
20148 are used. However we don't have any easy way of figuring this out.
20149 Conservatively record the setting that would have been used. */
20151 /* Tag_ABI_FP_rounding. */
20152 if (flag_rounding_math)
20153 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
20154 if (!flag_unsafe_math_optimizations)
20156 /* Tag_ABI_FP_denomal. */
20157 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
20158 /* Tag_ABI_FP_exceptions. */
20159 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
20161 /* Tag_ABI_FP_user_exceptions. */
20162 if (flag_signaling_nans)
20163 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
20164 /* Tag_ABI_FP_number_model. */
20165 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
20166 flag_finite_math_only ? 1 : 3);
20168 /* Tag_ABI_align8_needed. */
20169 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20170 /* Tag_ABI_align8_preserved. */
20171 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20172 /* Tag_ABI_enum_size. */
20173 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20174 flag_short_enums ? 1 : 2);
20176 /* Tag_ABI_optimization_goals. */
20177 if (optimize_size)
20178 val = 4;
20179 else if (optimize >= 2)
20180 val = 2;
20181 else if (optimize)
20182 val = 1;
20183 else
20184 val = 6;
20185 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20187 /* Tag_ABI_FP_16bit_format. */
20188 if (arm_fp16_format)
20189 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20190 (int)arm_fp16_format);
20192 if (arm_lang_output_object_attributes_hook)
20193 arm_lang_output_object_attributes_hook();
20195 default_file_start();
20198 static void
20199 arm_file_end (void)
20201 int regno;
20203 if (NEED_INDICATE_EXEC_STACK)
20204 /* Add .note.GNU-stack. */
20205 file_end_indicate_exec_stack ();
20207 if (! thumb_call_reg_needed)
20208 return;
20210 switch_to_section (text_section);
20211 asm_fprintf (asm_out_file, "\t.code 16\n");
20212 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20214 for (regno = 0; regno < LR_REGNUM; regno++)
20216 rtx label = thumb_call_via_label[regno];
20218 if (label != 0)
20220 targetm.asm_out.internal_label (asm_out_file, "L",
20221 CODE_LABEL_NUMBER (label));
20222 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20227 #ifndef ARM_PE
20228 /* Symbols in the text segment can be accessed without indirecting via the
20229 constant pool; it may take an extra binary operation, but this is still
20230 faster than indirecting via memory. Don't do this when not optimizing,
20231 since we won't be calculating al of the offsets necessary to do this
20232 simplification. */
20234 static void
20235 arm_encode_section_info (tree decl, rtx rtl, int first)
20237 if (optimize > 0 && TREE_CONSTANT (decl))
20238 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20240 default_encode_section_info (decl, rtl, first);
20242 #endif /* !ARM_PE */
20244 static void
20245 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20247 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20248 && !strcmp (prefix, "L"))
20250 arm_ccfsm_state = 0;
20251 arm_target_insn = NULL;
20253 default_internal_label (stream, prefix, labelno);
20256 /* Output code to add DELTA to the first argument, and then jump
20257 to FUNCTION. Used for C++ multiple inheritance. */
20258 static void
20259 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20260 HOST_WIDE_INT delta,
20261 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20262 tree function)
20264 static int thunk_label = 0;
20265 char label[256];
20266 char labelpc[256];
20267 int mi_delta = delta;
20268 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20269 int shift = 0;
20270 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20271 ? 1 : 0);
20272 if (mi_delta < 0)
20273 mi_delta = - mi_delta;
20275 if (TARGET_THUMB1)
20277 int labelno = thunk_label++;
20278 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20279 /* Thunks are entered in arm mode when avaiable. */
20280 if (TARGET_THUMB1_ONLY)
20282 /* push r3 so we can use it as a temporary. */
20283 /* TODO: Omit this save if r3 is not used. */
20284 fputs ("\tpush {r3}\n", file);
20285 fputs ("\tldr\tr3, ", file);
20287 else
20289 fputs ("\tldr\tr12, ", file);
20291 assemble_name (file, label);
20292 fputc ('\n', file);
20293 if (flag_pic)
20295 /* If we are generating PIC, the ldr instruction below loads
20296 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20297 the address of the add + 8, so we have:
20299 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20300 = target + 1.
20302 Note that we have "+ 1" because some versions of GNU ld
20303 don't set the low bit of the result for R_ARM_REL32
20304 relocations against thumb function symbols.
20305 On ARMv6M this is +4, not +8. */
20306 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20307 assemble_name (file, labelpc);
20308 fputs (":\n", file);
20309 if (TARGET_THUMB1_ONLY)
20311 /* This is 2 insns after the start of the thunk, so we know it
20312 is 4-byte aligned. */
20313 fputs ("\tadd\tr3, pc, r3\n", file);
20314 fputs ("\tmov r12, r3\n", file);
20316 else
20317 fputs ("\tadd\tr12, pc, r12\n", file);
20319 else if (TARGET_THUMB1_ONLY)
20320 fputs ("\tmov r12, r3\n", file);
20322 if (TARGET_THUMB1_ONLY)
20324 if (mi_delta > 255)
20326 fputs ("\tldr\tr3, ", file);
20327 assemble_name (file, label);
20328 fputs ("+4\n", file);
20329 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20330 mi_op, this_regno, this_regno);
20332 else if (mi_delta != 0)
20334 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20335 mi_op, this_regno, this_regno,
20336 mi_delta);
20339 else
20341 /* TODO: Use movw/movt for large constants when available. */
20342 while (mi_delta != 0)
20344 if ((mi_delta & (3 << shift)) == 0)
20345 shift += 2;
20346 else
20348 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20349 mi_op, this_regno, this_regno,
20350 mi_delta & (0xff << shift));
20351 mi_delta &= ~(0xff << shift);
20352 shift += 8;
20356 if (TARGET_THUMB1)
20358 if (TARGET_THUMB1_ONLY)
20359 fputs ("\tpop\t{r3}\n", file);
20361 fprintf (file, "\tbx\tr12\n");
20362 ASM_OUTPUT_ALIGN (file, 2);
20363 assemble_name (file, label);
20364 fputs (":\n", file);
20365 if (flag_pic)
20367 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20368 rtx tem = XEXP (DECL_RTL (function), 0);
20369 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20370 tem = gen_rtx_MINUS (GET_MODE (tem),
20371 tem,
20372 gen_rtx_SYMBOL_REF (Pmode,
20373 ggc_strdup (labelpc)));
20374 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20376 else
20377 /* Output ".word .LTHUNKn". */
20378 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20380 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20381 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20383 else
20385 fputs ("\tb\t", file);
20386 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20387 if (NEED_PLT_RELOC)
20388 fputs ("(PLT)", file);
20389 fputc ('\n', file);
20394 arm_emit_vector_const (FILE *file, rtx x)
20396 int i;
20397 const char * pattern;
20399 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20401 switch (GET_MODE (x))
20403 case V2SImode: pattern = "%08x"; break;
20404 case V4HImode: pattern = "%04x"; break;
20405 case V8QImode: pattern = "%02x"; break;
20406 default: gcc_unreachable ();
20409 fprintf (file, "0x");
20410 for (i = CONST_VECTOR_NUNITS (x); i--;)
20412 rtx element;
20414 element = CONST_VECTOR_ELT (x, i);
20415 fprintf (file, pattern, INTVAL (element));
20418 return 1;
20421 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20422 HFmode constant pool entries are actually loaded with ldr. */
20423 void
20424 arm_emit_fp16_const (rtx c)
20426 REAL_VALUE_TYPE r;
20427 long bits;
20429 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20430 bits = real_to_target (NULL, &r, HFmode);
20431 if (WORDS_BIG_ENDIAN)
20432 assemble_zeros (2);
20433 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20434 if (!WORDS_BIG_ENDIAN)
20435 assemble_zeros (2);
20438 const char *
20439 arm_output_load_gr (rtx *operands)
20441 rtx reg;
20442 rtx offset;
20443 rtx wcgr;
20444 rtx sum;
20446 if (GET_CODE (operands [1]) != MEM
20447 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20448 || GET_CODE (reg = XEXP (sum, 0)) != REG
20449 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20450 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20451 return "wldrw%?\t%0, %1";
20453 /* Fix up an out-of-range load of a GR register. */
20454 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20455 wcgr = operands[0];
20456 operands[0] = reg;
20457 output_asm_insn ("ldr%?\t%0, %1", operands);
20459 operands[0] = wcgr;
20460 operands[1] = reg;
20461 output_asm_insn ("tmcr%?\t%0, %1", operands);
20462 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20464 return "";
20467 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20469 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20470 named arg and all anonymous args onto the stack.
20471 XXX I know the prologue shouldn't be pushing registers, but it is faster
20472 that way. */
20474 static void
20475 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20476 enum machine_mode mode,
20477 tree type,
20478 int *pretend_size,
20479 int second_time ATTRIBUTE_UNUSED)
20481 int nregs;
20483 cfun->machine->uses_anonymous_args = 1;
20484 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20486 nregs = pcum->aapcs_ncrn;
20487 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20488 nregs++;
20490 else
20491 nregs = pcum->nregs;
20493 if (nregs < NUM_ARG_REGS)
20494 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20497 /* Return nonzero if the CONSUMER instruction (a store) does not need
20498 PRODUCER's value to calculate the address. */
20501 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20503 rtx value = PATTERN (producer);
20504 rtx addr = PATTERN (consumer);
20506 if (GET_CODE (value) == COND_EXEC)
20507 value = COND_EXEC_CODE (value);
20508 if (GET_CODE (value) == PARALLEL)
20509 value = XVECEXP (value, 0, 0);
20510 value = XEXP (value, 0);
20511 if (GET_CODE (addr) == COND_EXEC)
20512 addr = COND_EXEC_CODE (addr);
20513 if (GET_CODE (addr) == PARALLEL)
20514 addr = XVECEXP (addr, 0, 0);
20515 addr = XEXP (addr, 0);
20517 return !reg_overlap_mentioned_p (value, addr);
20520 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20521 have an early register shift value or amount dependency on the
20522 result of PRODUCER. */
20525 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20527 rtx value = PATTERN (producer);
20528 rtx op = PATTERN (consumer);
20529 rtx early_op;
20531 if (GET_CODE (value) == COND_EXEC)
20532 value = COND_EXEC_CODE (value);
20533 if (GET_CODE (value) == PARALLEL)
20534 value = XVECEXP (value, 0, 0);
20535 value = XEXP (value, 0);
20536 if (GET_CODE (op) == COND_EXEC)
20537 op = COND_EXEC_CODE (op);
20538 if (GET_CODE (op) == PARALLEL)
20539 op = XVECEXP (op, 0, 0);
20540 op = XEXP (op, 1);
20542 early_op = XEXP (op, 0);
20543 /* This is either an actual independent shift, or a shift applied to
20544 the first operand of another operation. We want the whole shift
20545 operation. */
20546 if (GET_CODE (early_op) == REG)
20547 early_op = op;
20549 return !reg_overlap_mentioned_p (value, early_op);
20552 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20553 have an early register shift value dependency on the result of
20554 PRODUCER. */
20557 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20559 rtx value = PATTERN (producer);
20560 rtx op = PATTERN (consumer);
20561 rtx early_op;
20563 if (GET_CODE (value) == COND_EXEC)
20564 value = COND_EXEC_CODE (value);
20565 if (GET_CODE (value) == PARALLEL)
20566 value = XVECEXP (value, 0, 0);
20567 value = XEXP (value, 0);
20568 if (GET_CODE (op) == COND_EXEC)
20569 op = COND_EXEC_CODE (op);
20570 if (GET_CODE (op) == PARALLEL)
20571 op = XVECEXP (op, 0, 0);
20572 op = XEXP (op, 1);
20574 early_op = XEXP (op, 0);
20576 /* This is either an actual independent shift, or a shift applied to
20577 the first operand of another operation. We want the value being
20578 shifted, in either case. */
20579 if (GET_CODE (early_op) != REG)
20580 early_op = XEXP (early_op, 0);
20582 return !reg_overlap_mentioned_p (value, early_op);
20585 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20586 have an early register mult dependency on the result of
20587 PRODUCER. */
20590 arm_no_early_mul_dep (rtx producer, rtx consumer)
20592 rtx value = PATTERN (producer);
20593 rtx op = PATTERN (consumer);
20595 if (GET_CODE (value) == COND_EXEC)
20596 value = COND_EXEC_CODE (value);
20597 if (GET_CODE (value) == PARALLEL)
20598 value = XVECEXP (value, 0, 0);
20599 value = XEXP (value, 0);
20600 if (GET_CODE (op) == COND_EXEC)
20601 op = COND_EXEC_CODE (op);
20602 if (GET_CODE (op) == PARALLEL)
20603 op = XVECEXP (op, 0, 0);
20604 op = XEXP (op, 1);
20606 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20608 if (GET_CODE (XEXP (op, 0)) == MULT)
20609 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20610 else
20611 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20614 return 0;
20617 /* We can't rely on the caller doing the proper promotion when
20618 using APCS or ATPCS. */
20620 static bool
20621 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20623 return !TARGET_AAPCS_BASED;
20626 static enum machine_mode
20627 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20628 enum machine_mode mode,
20629 int *punsignedp ATTRIBUTE_UNUSED,
20630 const_tree fntype ATTRIBUTE_UNUSED,
20631 int for_return ATTRIBUTE_UNUSED)
20633 if (GET_MODE_CLASS (mode) == MODE_INT
20634 && GET_MODE_SIZE (mode) < 4)
20635 return SImode;
20637 return mode;
20640 /* AAPCS based ABIs use short enums by default. */
20642 static bool
20643 arm_default_short_enums (void)
20645 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20649 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20651 static bool
20652 arm_align_anon_bitfield (void)
20654 return TARGET_AAPCS_BASED;
20658 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20660 static tree
20661 arm_cxx_guard_type (void)
20663 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20666 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20667 has an accumulator dependency on the result of the producer (a
20668 multiplication instruction) and no other dependency on that result. */
20670 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20672 rtx mul = PATTERN (producer);
20673 rtx mac = PATTERN (consumer);
20674 rtx mul_result;
20675 rtx mac_op0, mac_op1, mac_acc;
20677 if (GET_CODE (mul) == COND_EXEC)
20678 mul = COND_EXEC_CODE (mul);
20679 if (GET_CODE (mac) == COND_EXEC)
20680 mac = COND_EXEC_CODE (mac);
20682 /* Check that mul is of the form (set (...) (mult ...))
20683 and mla is of the form (set (...) (plus (mult ...) (...))). */
20684 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20685 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20686 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20687 return 0;
20689 mul_result = XEXP (mul, 0);
20690 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20691 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20692 mac_acc = XEXP (XEXP (mac, 1), 1);
20694 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20695 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20696 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20700 /* The EABI says test the least significant bit of a guard variable. */
20702 static bool
20703 arm_cxx_guard_mask_bit (void)
20705 return TARGET_AAPCS_BASED;
20709 /* The EABI specifies that all array cookies are 8 bytes long. */
20711 static tree
20712 arm_get_cookie_size (tree type)
20714 tree size;
20716 if (!TARGET_AAPCS_BASED)
20717 return default_cxx_get_cookie_size (type);
20719 size = build_int_cst (sizetype, 8);
20720 return size;
20724 /* The EABI says that array cookies should also contain the element size. */
20726 static bool
20727 arm_cookie_has_size (void)
20729 return TARGET_AAPCS_BASED;
20733 /* The EABI says constructors and destructors should return a pointer to
20734 the object constructed/destroyed. */
20736 static bool
20737 arm_cxx_cdtor_returns_this (void)
20739 return TARGET_AAPCS_BASED;
20742 /* The EABI says that an inline function may never be the key
20743 method. */
20745 static bool
20746 arm_cxx_key_method_may_be_inline (void)
20748 return !TARGET_AAPCS_BASED;
20751 static void
20752 arm_cxx_determine_class_data_visibility (tree decl)
20754 if (!TARGET_AAPCS_BASED
20755 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20756 return;
20758 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20759 is exported. However, on systems without dynamic vague linkage,
20760 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20761 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20762 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20763 else
20764 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20765 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20768 static bool
20769 arm_cxx_class_data_always_comdat (void)
20771 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20772 vague linkage if the class has no key function. */
20773 return !TARGET_AAPCS_BASED;
20777 /* The EABI says __aeabi_atexit should be used to register static
20778 destructors. */
20780 static bool
20781 arm_cxx_use_aeabi_atexit (void)
20783 return TARGET_AAPCS_BASED;
20787 void
20788 arm_set_return_address (rtx source, rtx scratch)
20790 arm_stack_offsets *offsets;
20791 HOST_WIDE_INT delta;
20792 rtx addr;
20793 unsigned long saved_regs;
20795 offsets = arm_get_frame_offsets ();
20796 saved_regs = offsets->saved_regs_mask;
20798 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20799 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20800 else
20802 if (frame_pointer_needed)
20803 addr = plus_constant(hard_frame_pointer_rtx, -4);
20804 else
20806 /* LR will be the first saved register. */
20807 delta = offsets->outgoing_args - (offsets->frame + 4);
20810 if (delta >= 4096)
20812 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20813 GEN_INT (delta & ~4095)));
20814 addr = scratch;
20815 delta &= 4095;
20817 else
20818 addr = stack_pointer_rtx;
20820 addr = plus_constant (addr, delta);
20822 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20827 void
20828 thumb_set_return_address (rtx source, rtx scratch)
20830 arm_stack_offsets *offsets;
20831 HOST_WIDE_INT delta;
20832 HOST_WIDE_INT limit;
20833 int reg;
20834 rtx addr;
20835 unsigned long mask;
20837 emit_use (source);
20839 offsets = arm_get_frame_offsets ();
20840 mask = offsets->saved_regs_mask;
20841 if (mask & (1 << LR_REGNUM))
20843 limit = 1024;
20844 /* Find the saved regs. */
20845 if (frame_pointer_needed)
20847 delta = offsets->soft_frame - offsets->saved_args;
20848 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20849 if (TARGET_THUMB1)
20850 limit = 128;
20852 else
20854 delta = offsets->outgoing_args - offsets->saved_args;
20855 reg = SP_REGNUM;
20857 /* Allow for the stack frame. */
20858 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20859 delta -= 16;
20860 /* The link register is always the first saved register. */
20861 delta -= 4;
20863 /* Construct the address. */
20864 addr = gen_rtx_REG (SImode, reg);
20865 if (delta > limit)
20867 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20868 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20869 addr = scratch;
20871 else
20872 addr = plus_constant (addr, delta);
20874 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20876 else
20877 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20880 /* Implements target hook vector_mode_supported_p. */
20881 bool
20882 arm_vector_mode_supported_p (enum machine_mode mode)
20884 /* Neon also supports V2SImode, etc. listed in the clause below. */
20885 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20886 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20887 return true;
20889 if ((TARGET_NEON || TARGET_IWMMXT)
20890 && ((mode == V2SImode)
20891 || (mode == V4HImode)
20892 || (mode == V8QImode)))
20893 return true;
20895 return false;
20898 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20899 ARM insns and therefore guarantee that the shift count is modulo 256.
20900 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20901 guarantee no particular behavior for out-of-range counts. */
20903 static unsigned HOST_WIDE_INT
20904 arm_shift_truncation_mask (enum machine_mode mode)
20906 return mode == SImode ? 255 : 0;
20910 /* Map internal gcc register numbers to DWARF2 register numbers. */
20912 unsigned int
20913 arm_dbx_register_number (unsigned int regno)
20915 if (regno < 16)
20916 return regno;
20918 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20919 compatibility. The EABI defines them as registers 96-103. */
20920 if (IS_FPA_REGNUM (regno))
20921 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20923 if (IS_VFP_REGNUM (regno))
20925 /* See comment in arm_dwarf_register_span. */
20926 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20927 return 64 + regno - FIRST_VFP_REGNUM;
20928 else
20929 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
20932 if (IS_IWMMXT_GR_REGNUM (regno))
20933 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20935 if (IS_IWMMXT_REGNUM (regno))
20936 return 112 + regno - FIRST_IWMMXT_REGNUM;
20938 gcc_unreachable ();
20941 /* Dwarf models VFPv3 registers as 32 64-bit registers.
20942 GCC models tham as 64 32-bit registers, so we need to describe this to
20943 the DWARF generation code. Other registers can use the default. */
20944 static rtx
20945 arm_dwarf_register_span (rtx rtl)
20947 unsigned regno;
20948 int nregs;
20949 int i;
20950 rtx p;
20952 regno = REGNO (rtl);
20953 if (!IS_VFP_REGNUM (regno))
20954 return NULL_RTX;
20956 /* XXX FIXME: The EABI defines two VFP register ranges:
20957 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
20958 256-287: D0-D31
20959 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
20960 corresponding D register. Until GDB supports this, we shall use the
20961 legacy encodings. We also use these encodings for D0-D15 for
20962 compatibility with older debuggers. */
20963 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20964 return NULL_RTX;
20966 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
20967 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
20968 regno = (regno - FIRST_VFP_REGNUM) / 2;
20969 for (i = 0; i < nregs; i++)
20970 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
20972 return p;
20975 #ifdef TARGET_UNWIND_INFO
20976 /* Emit unwind directives for a store-multiple instruction or stack pointer
20977 push during alignment.
20978 These should only ever be generated by the function prologue code, so
20979 expect them to have a particular form. */
20981 static void
20982 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
20984 int i;
20985 HOST_WIDE_INT offset;
20986 HOST_WIDE_INT nregs;
20987 int reg_size;
20988 unsigned reg;
20989 unsigned lastreg;
20990 rtx e;
20992 e = XVECEXP (p, 0, 0);
20993 if (GET_CODE (e) != SET)
20994 abort ();
20996 /* First insn will adjust the stack pointer. */
20997 if (GET_CODE (e) != SET
20998 || GET_CODE (XEXP (e, 0)) != REG
20999 || REGNO (XEXP (e, 0)) != SP_REGNUM
21000 || GET_CODE (XEXP (e, 1)) != PLUS)
21001 abort ();
21003 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21004 nregs = XVECLEN (p, 0) - 1;
21006 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21007 if (reg < 16)
21009 /* The function prologue may also push pc, but not annotate it as it is
21010 never restored. We turn this into a stack pointer adjustment. */
21011 if (nregs * 4 == offset - 4)
21013 fprintf (asm_out_file, "\t.pad #4\n");
21014 offset -= 4;
21016 reg_size = 4;
21017 fprintf (asm_out_file, "\t.save {");
21019 else if (IS_VFP_REGNUM (reg))
21021 reg_size = 8;
21022 fprintf (asm_out_file, "\t.vsave {");
21024 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21026 /* FPA registers are done differently. */
21027 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
21028 return;
21030 else
21031 /* Unknown register type. */
21032 abort ();
21034 /* If the stack increment doesn't match the size of the saved registers,
21035 something has gone horribly wrong. */
21036 if (offset != nregs * reg_size)
21037 abort ();
21039 offset = 0;
21040 lastreg = 0;
21041 /* The remaining insns will describe the stores. */
21042 for (i = 1; i <= nregs; i++)
21044 /* Expect (set (mem <addr>) (reg)).
21045 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21046 e = XVECEXP (p, 0, i);
21047 if (GET_CODE (e) != SET
21048 || GET_CODE (XEXP (e, 0)) != MEM
21049 || GET_CODE (XEXP (e, 1)) != REG)
21050 abort ();
21052 reg = REGNO (XEXP (e, 1));
21053 if (reg < lastreg)
21054 abort ();
21056 if (i != 1)
21057 fprintf (asm_out_file, ", ");
21058 /* We can't use %r for vfp because we need to use the
21059 double precision register names. */
21060 if (IS_VFP_REGNUM (reg))
21061 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
21062 else
21063 asm_fprintf (asm_out_file, "%r", reg);
21065 #ifdef ENABLE_CHECKING
21066 /* Check that the addresses are consecutive. */
21067 e = XEXP (XEXP (e, 0), 0);
21068 if (GET_CODE (e) == PLUS)
21070 offset += reg_size;
21071 if (GET_CODE (XEXP (e, 0)) != REG
21072 || REGNO (XEXP (e, 0)) != SP_REGNUM
21073 || GET_CODE (XEXP (e, 1)) != CONST_INT
21074 || offset != INTVAL (XEXP (e, 1)))
21075 abort ();
21077 else if (i != 1
21078 || GET_CODE (e) != REG
21079 || REGNO (e) != SP_REGNUM)
21080 abort ();
21081 #endif
21083 fprintf (asm_out_file, "}\n");
21086 /* Emit unwind directives for a SET. */
21088 static void
21089 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
21091 rtx e0;
21092 rtx e1;
21093 unsigned reg;
21095 e0 = XEXP (p, 0);
21096 e1 = XEXP (p, 1);
21097 switch (GET_CODE (e0))
21099 case MEM:
21100 /* Pushing a single register. */
21101 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
21102 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
21103 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
21104 abort ();
21106 asm_fprintf (asm_out_file, "\t.save ");
21107 if (IS_VFP_REGNUM (REGNO (e1)))
21108 asm_fprintf(asm_out_file, "{d%d}\n",
21109 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
21110 else
21111 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
21112 break;
21114 case REG:
21115 if (REGNO (e0) == SP_REGNUM)
21117 /* A stack increment. */
21118 if (GET_CODE (e1) != PLUS
21119 || GET_CODE (XEXP (e1, 0)) != REG
21120 || REGNO (XEXP (e1, 0)) != SP_REGNUM
21121 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21122 abort ();
21124 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
21125 -INTVAL (XEXP (e1, 1)));
21127 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
21129 HOST_WIDE_INT offset;
21131 if (GET_CODE (e1) == PLUS)
21133 if (GET_CODE (XEXP (e1, 0)) != REG
21134 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21135 abort ();
21136 reg = REGNO (XEXP (e1, 0));
21137 offset = INTVAL (XEXP (e1, 1));
21138 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
21139 HARD_FRAME_POINTER_REGNUM, reg,
21140 offset);
21142 else if (GET_CODE (e1) == REG)
21144 reg = REGNO (e1);
21145 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21146 HARD_FRAME_POINTER_REGNUM, reg);
21148 else
21149 abort ();
21151 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
21153 /* Move from sp to reg. */
21154 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
21156 else if (GET_CODE (e1) == PLUS
21157 && GET_CODE (XEXP (e1, 0)) == REG
21158 && REGNO (XEXP (e1, 0)) == SP_REGNUM
21159 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
21161 /* Set reg to offset from sp. */
21162 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
21163 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
21165 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
21167 /* Stack pointer save before alignment. */
21168 reg = REGNO (e0);
21169 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21170 reg + 0x90, reg);
21172 else
21173 abort ();
21174 break;
21176 default:
21177 abort ();
21182 /* Emit unwind directives for the given insn. */
21184 static void
21185 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21187 rtx pat;
21189 if (!ARM_EABI_UNWIND_TABLES)
21190 return;
21192 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21193 && (TREE_NOTHROW (current_function_decl)
21194 || crtl->all_throwers_are_sibcalls))
21195 return;
21197 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21198 return;
21200 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21201 if (pat)
21202 pat = XEXP (pat, 0);
21203 else
21204 pat = PATTERN (insn);
21206 switch (GET_CODE (pat))
21208 case SET:
21209 arm_unwind_emit_set (asm_out_file, pat);
21210 break;
21212 case SEQUENCE:
21213 /* Store multiple. */
21214 arm_unwind_emit_sequence (asm_out_file, pat);
21215 break;
21217 default:
21218 abort();
21223 /* Output a reference from a function exception table to the type_info
21224 object X. The EABI specifies that the symbol should be relocated by
21225 an R_ARM_TARGET2 relocation. */
21227 static bool
21228 arm_output_ttype (rtx x)
21230 fputs ("\t.word\t", asm_out_file);
21231 output_addr_const (asm_out_file, x);
21232 /* Use special relocations for symbol references. */
21233 if (GET_CODE (x) != CONST_INT)
21234 fputs ("(TARGET2)", asm_out_file);
21235 fputc ('\n', asm_out_file);
21237 return TRUE;
21239 #endif /* TARGET_UNWIND_INFO */
21242 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21243 stack alignment. */
21245 static void
21246 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21248 rtx unspec = SET_SRC (pattern);
21249 gcc_assert (GET_CODE (unspec) == UNSPEC);
21251 switch (index)
21253 case UNSPEC_STACK_ALIGN:
21254 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21255 put anything on the stack, so hopefully it won't matter.
21256 CFA = SP will be correct after alignment. */
21257 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21258 SET_DEST (pattern));
21259 break;
21260 default:
21261 gcc_unreachable ();
21266 /* Output unwind directives for the start/end of a function. */
21268 void
21269 arm_output_fn_unwind (FILE * f, bool prologue)
21271 if (!ARM_EABI_UNWIND_TABLES)
21272 return;
21274 if (prologue)
21275 fputs ("\t.fnstart\n", f);
21276 else
21278 /* If this function will never be unwound, then mark it as such.
21279 The came condition is used in arm_unwind_emit to suppress
21280 the frame annotations. */
21281 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21282 && (TREE_NOTHROW (current_function_decl)
21283 || crtl->all_throwers_are_sibcalls))
21284 fputs("\t.cantunwind\n", f);
21286 fputs ("\t.fnend\n", f);
21290 static bool
21291 arm_emit_tls_decoration (FILE *fp, rtx x)
21293 enum tls_reloc reloc;
21294 rtx val;
21296 val = XVECEXP (x, 0, 0);
21297 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21299 output_addr_const (fp, val);
21301 switch (reloc)
21303 case TLS_GD32:
21304 fputs ("(tlsgd)", fp);
21305 break;
21306 case TLS_LDM32:
21307 fputs ("(tlsldm)", fp);
21308 break;
21309 case TLS_LDO32:
21310 fputs ("(tlsldo)", fp);
21311 break;
21312 case TLS_IE32:
21313 fputs ("(gottpoff)", fp);
21314 break;
21315 case TLS_LE32:
21316 fputs ("(tpoff)", fp);
21317 break;
21318 default:
21319 gcc_unreachable ();
21322 switch (reloc)
21324 case TLS_GD32:
21325 case TLS_LDM32:
21326 case TLS_IE32:
21327 fputs (" + (. - ", fp);
21328 output_addr_const (fp, XVECEXP (x, 0, 2));
21329 fputs (" - ", fp);
21330 output_addr_const (fp, XVECEXP (x, 0, 3));
21331 fputc (')', fp);
21332 break;
21333 default:
21334 break;
21337 return TRUE;
21340 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21342 static void
21343 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21345 gcc_assert (size == 4);
21346 fputs ("\t.word\t", file);
21347 output_addr_const (file, x);
21348 fputs ("(tlsldo)", file);
21351 bool
21352 arm_output_addr_const_extra (FILE *fp, rtx x)
21354 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21355 return arm_emit_tls_decoration (fp, x);
21356 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21358 char label[256];
21359 int labelno = INTVAL (XVECEXP (x, 0, 0));
21361 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21362 assemble_name_raw (fp, label);
21364 return TRUE;
21366 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21368 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21369 if (GOT_PCREL)
21370 fputs ("+.", fp);
21371 fputs ("-(", fp);
21372 output_addr_const (fp, XVECEXP (x, 0, 0));
21373 fputc (')', fp);
21374 return TRUE;
21376 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
21378 output_addr_const (fp, XVECEXP (x, 0, 0));
21379 if (GOT_PCREL)
21380 fputs ("+.", fp);
21381 fputs ("-(", fp);
21382 output_addr_const (fp, XVECEXP (x, 0, 1));
21383 fputc (')', fp);
21384 return TRUE;
21386 else if (GET_CODE (x) == CONST_VECTOR)
21387 return arm_emit_vector_const (fp, x);
21389 return FALSE;
21392 /* Output assembly for a shift instruction.
21393 SET_FLAGS determines how the instruction modifies the condition codes.
21394 0 - Do not set condition codes.
21395 1 - Set condition codes.
21396 2 - Use smallest instruction. */
21397 const char *
21398 arm_output_shift(rtx * operands, int set_flags)
21400 char pattern[100];
21401 static const char flag_chars[3] = {'?', '.', '!'};
21402 const char *shift;
21403 HOST_WIDE_INT val;
21404 char c;
21406 c = flag_chars[set_flags];
21407 if (TARGET_UNIFIED_ASM)
21409 shift = shift_op(operands[3], &val);
21410 if (shift)
21412 if (val != -1)
21413 operands[2] = GEN_INT(val);
21414 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21416 else
21417 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21419 else
21420 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21421 output_asm_insn (pattern, operands);
21422 return "";
21425 /* Output a Thumb-1 casesi dispatch sequence. */
21426 const char *
21427 thumb1_output_casesi (rtx *operands)
21429 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21431 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21433 switch (GET_MODE(diff_vec))
21435 case QImode:
21436 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21437 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21438 case HImode:
21439 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21440 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21441 case SImode:
21442 return "bl\t%___gnu_thumb1_case_si";
21443 default:
21444 gcc_unreachable ();
21448 /* Output a Thumb-2 casesi instruction. */
21449 const char *
21450 thumb2_output_casesi (rtx *operands)
21452 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21454 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21456 output_asm_insn ("cmp\t%0, %1", operands);
21457 output_asm_insn ("bhi\t%l3", operands);
21458 switch (GET_MODE(diff_vec))
21460 case QImode:
21461 return "tbb\t[%|pc, %0]";
21462 case HImode:
21463 return "tbh\t[%|pc, %0, lsl #1]";
21464 case SImode:
21465 if (flag_pic)
21467 output_asm_insn ("adr\t%4, %l2", operands);
21468 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21469 output_asm_insn ("add\t%4, %4, %5", operands);
21470 return "bx\t%4";
21472 else
21474 output_asm_insn ("adr\t%4, %l2", operands);
21475 return "ldr\t%|pc, [%4, %0, lsl #2]";
21477 default:
21478 gcc_unreachable ();
21482 /* Most ARM cores are single issue, but some newer ones can dual issue.
21483 The scheduler descriptions rely on this being correct. */
21484 static int
21485 arm_issue_rate (void)
21487 switch (arm_tune)
21489 case cortexr4:
21490 case cortexr4f:
21491 case cortexa8:
21492 case cortexa9:
21493 return 2;
21495 default:
21496 return 1;
21500 /* A table and a function to perform ARM-specific name mangling for
21501 NEON vector types in order to conform to the AAPCS (see "Procedure
21502 Call Standard for the ARM Architecture", Appendix A). To qualify
21503 for emission with the mangled names defined in that document, a
21504 vector type must not only be of the correct mode but also be
21505 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21506 typedef struct
21508 enum machine_mode mode;
21509 const char *element_type_name;
21510 const char *aapcs_name;
21511 } arm_mangle_map_entry;
21513 static arm_mangle_map_entry arm_mangle_map[] = {
21514 /* 64-bit containerized types. */
21515 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21516 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21517 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21518 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21519 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21520 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21521 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21522 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21523 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21524 /* 128-bit containerized types. */
21525 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21526 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21527 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21528 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21529 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21530 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21531 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21532 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21533 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21534 { VOIDmode, NULL, NULL }
21537 const char *
21538 arm_mangle_type (const_tree type)
21540 arm_mangle_map_entry *pos = arm_mangle_map;
21542 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21543 has to be managled as if it is in the "std" namespace. */
21544 if (TARGET_AAPCS_BASED
21545 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21547 static bool warned;
21548 if (!warned && warn_psabi && !in_system_header)
21550 warned = true;
21551 inform (input_location,
21552 "the mangling of %<va_list%> has changed in GCC 4.4");
21554 return "St9__va_list";
21557 /* Half-precision float. */
21558 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21559 return "Dh";
21561 if (TREE_CODE (type) != VECTOR_TYPE)
21562 return NULL;
21564 /* Check the mode of the vector type, and the name of the vector
21565 element type, against the table. */
21566 while (pos->mode != VOIDmode)
21568 tree elt_type = TREE_TYPE (type);
21570 if (pos->mode == TYPE_MODE (type)
21571 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21572 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21573 pos->element_type_name))
21574 return pos->aapcs_name;
21576 pos++;
21579 /* Use the default mangling for unrecognized (possibly user-defined)
21580 vector types. */
21581 return NULL;
21584 /* Order of allocation of core registers for Thumb: this allocation is
21585 written over the corresponding initial entries of the array
21586 initialized with REG_ALLOC_ORDER. We allocate all low registers
21587 first. Saving and restoring a low register is usually cheaper than
21588 using a call-clobbered high register. */
21590 static const int thumb_core_reg_alloc_order[] =
21592 3, 2, 1, 0, 4, 5, 6, 7,
21593 14, 12, 8, 9, 10, 11, 13, 15
21596 /* Adjust register allocation order when compiling for Thumb. */
21598 void
21599 arm_order_regs_for_local_alloc (void)
21601 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21602 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21603 if (TARGET_THUMB)
21604 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21605 sizeof (thumb_core_reg_alloc_order));
21608 /* Set default optimization options. */
21609 void
21610 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21612 /* Enable section anchors by default at -O1 or higher.
21613 Use 2 to distinguish from an explicit -fsection-anchors
21614 given on the command line. */
21615 if (level > 0)
21616 flag_section_anchors = 2;
21619 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21621 bool
21622 arm_frame_pointer_required (void)
21624 return (cfun->has_nonlocal_label
21625 || SUBTARGET_FRAME_POINTER_REQUIRED
21626 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21629 /* Only thumb1 can't support conditional execution, so return true if
21630 the target is not thumb1. */
21631 static bool
21632 arm_have_conditional_execution (void)
21634 return !TARGET_THUMB1;
21637 #include "gt-arm.h"