1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
59 /* Forward definitions of types. */
60 typedef struct minipool_node Mnode
;
61 typedef struct minipool_fixup Mfix
;
63 void (*arm_lang_output_object_attributes_hook
)(void);
65 /* Forward function declarations. */
66 static int arm_compute_static_chain_stack_bytes (void);
67 static arm_stack_offsets
*arm_get_frame_offsets (void);
68 static void arm_add_gc_roots (void);
69 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
70 HOST_WIDE_INT
, rtx
, rtx
, int, int);
71 static unsigned bit_count (unsigned long);
72 static int arm_address_register_rtx_p (rtx
, int);
73 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
74 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
75 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
76 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
77 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
78 inline static int thumb1_index_register_rtx_p (rtx
, int);
79 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
80 static int thumb_far_jump_used_p (void);
81 static bool thumb_force_lr_save (void);
82 static int const_ok_for_op (HOST_WIDE_INT
, enum rtx_code
);
83 static rtx
emit_sfm (int, int);
84 static unsigned arm_size_return_regs (void);
85 static bool arm_assemble_integer (rtx
, unsigned int, int);
86 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
87 static arm_cc
get_arm_condition_code (rtx
);
88 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
89 static rtx
is_jump_table (rtx
);
90 static const char *output_multi_immediate (rtx
*, const char *, const char *,
92 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
93 static struct machine_function
*arm_init_machine_status (void);
94 static void thumb_exit (FILE *, int);
95 static rtx
is_jump_table (rtx
);
96 static HOST_WIDE_INT
get_jump_table_size (rtx
);
97 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
98 static Mnode
*add_minipool_forward_ref (Mfix
*);
99 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
100 static Mnode
*add_minipool_backward_ref (Mfix
*);
101 static void assign_minipool_offsets (Mfix
*);
102 static void arm_print_value (FILE *, rtx
);
103 static void dump_minipool (rtx
);
104 static int arm_barrier_cost (rtx
);
105 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
106 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
107 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
109 static void arm_reorg (void);
110 static bool note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
111 static unsigned long arm_compute_save_reg0_reg12_mask (void);
112 static unsigned long arm_compute_save_reg_mask (void);
113 static unsigned long arm_isr_value (tree
);
114 static unsigned long arm_compute_func_type (void);
115 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
116 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
117 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
118 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
119 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
121 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
122 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
123 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT
);
124 static int arm_comp_type_attributes (const_tree
, const_tree
);
125 static void arm_set_default_type_attributes (tree
);
126 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
127 static int count_insns_for_constant (HOST_WIDE_INT
, int);
128 static int arm_get_strip_length (int);
129 static bool arm_function_ok_for_sibcall (tree
, tree
);
130 static enum machine_mode
arm_promote_function_mode (const_tree
,
131 enum machine_mode
, int *,
133 static bool arm_return_in_memory (const_tree
, const_tree
);
134 static rtx
arm_function_value (const_tree
, const_tree
, bool);
135 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
137 static void arm_internal_label (FILE *, const char *, unsigned long);
138 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
140 static bool arm_have_conditional_execution (void);
141 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
142 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
143 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
144 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
145 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
146 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
147 static bool arm_rtx_costs (rtx
, int, int, int *, bool);
148 static int arm_address_cost (rtx
, bool);
149 static bool arm_memory_load_p (rtx
);
150 static bool arm_cirrus_insn_p (rtx
);
151 static void cirrus_reorg (rtx
);
152 static void arm_init_builtins (void);
153 static void arm_init_iwmmxt_builtins (void);
154 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
155 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
156 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
157 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
158 static void emit_constant_insn (rtx cond
, rtx pattern
);
159 static rtx
emit_set_insn (rtx
, rtx
);
160 static int arm_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
162 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
164 static int aapcs_select_return_coproc (const_tree
, const_tree
);
166 #ifdef OBJECT_FORMAT_ELF
167 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
168 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
171 static void arm_encode_section_info (tree
, rtx
, int);
174 static void arm_file_end (void);
175 static void arm_file_start (void);
177 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
179 static bool arm_pass_by_reference (CUMULATIVE_ARGS
*,
180 enum machine_mode
, const_tree
, bool);
181 static bool arm_promote_prototypes (const_tree
);
182 static bool arm_default_short_enums (void);
183 static bool arm_align_anon_bitfield (void);
184 static bool arm_return_in_msb (const_tree
);
185 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
186 static bool arm_return_in_memory (const_tree
, const_tree
);
187 #ifdef TARGET_UNWIND_INFO
188 static void arm_unwind_emit (FILE *, rtx
);
189 static bool arm_output_ttype (rtx
);
191 static void arm_dwarf_handle_frame_unspec (const char *, rtx
, int);
192 static rtx
arm_dwarf_register_span (rtx
);
194 static tree
arm_cxx_guard_type (void);
195 static bool arm_cxx_guard_mask_bit (void);
196 static tree
arm_get_cookie_size (tree
);
197 static bool arm_cookie_has_size (void);
198 static bool arm_cxx_cdtor_returns_this (void);
199 static bool arm_cxx_key_method_may_be_inline (void);
200 static void arm_cxx_determine_class_data_visibility (tree
);
201 static bool arm_cxx_class_data_always_comdat (void);
202 static bool arm_cxx_use_aeabi_atexit (void);
203 static void arm_init_libfuncs (void);
204 static tree
arm_build_builtin_va_list (void);
205 static void arm_expand_builtin_va_start (tree
, rtx
);
206 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
207 static bool arm_handle_option (size_t, const char *, int);
208 static void arm_target_help (void);
209 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
210 static bool arm_cannot_copy_insn_p (rtx
);
211 static bool arm_tls_symbol_p (rtx x
);
212 static int arm_issue_rate (void);
213 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
214 static bool arm_allocate_stack_slots_for_args (void);
215 static const char *arm_invalid_parameter_type (const_tree t
);
216 static const char *arm_invalid_return_type (const_tree t
);
217 static tree
arm_promoted_type (const_tree t
);
218 static tree
arm_convert_to_type (tree type
, tree expr
);
219 static bool arm_scalar_mode_supported_p (enum machine_mode
);
220 static bool arm_frame_pointer_required (void);
221 static bool arm_can_eliminate (const int, const int);
222 static void arm_asm_trampoline_template (FILE *);
223 static void arm_trampoline_init (rtx
, tree
, rtx
);
224 static rtx
arm_trampoline_adjust_address (rtx
);
225 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
228 /* Table of machine attributes. */
229 static const struct attribute_spec arm_attribute_table
[] =
231 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
232 /* Function calls made to this symbol must be done indirectly, because
233 it may lie outside of the 26 bit addressing range of a normal function
235 { "long_call", 0, 0, false, true, true, NULL
},
236 /* Whereas these functions are always known to reside within the 26 bit
238 { "short_call", 0, 0, false, true, true, NULL
},
239 /* Specify the procedure call conventions for a function. */
240 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
},
241 /* Interrupt Service Routines have special prologue and epilogue requirements. */
242 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
},
243 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
},
244 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
246 /* ARM/PE has three new attributes:
248 dllexport - for exporting a function/variable that will live in a dll
249 dllimport - for importing a function/variable from a dll
251 Microsoft allows multiple declspecs in one __declspec, separating
252 them with spaces. We do NOT support this. Instead, use __declspec
255 { "dllimport", 0, 0, true, false, false, NULL
},
256 { "dllexport", 0, 0, true, false, false, NULL
},
257 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
258 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
259 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
260 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
261 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
},
263 { NULL
, 0, 0, false, false, false, NULL
}
266 /* Initialize the GCC target structure. */
267 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
268 #undef TARGET_MERGE_DECL_ATTRIBUTES
269 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
272 #undef TARGET_LEGITIMIZE_ADDRESS
273 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
275 #undef TARGET_ATTRIBUTE_TABLE
276 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
278 #undef TARGET_ASM_FILE_START
279 #define TARGET_ASM_FILE_START arm_file_start
280 #undef TARGET_ASM_FILE_END
281 #define TARGET_ASM_FILE_END arm_file_end
283 #undef TARGET_ASM_ALIGNED_SI_OP
284 #define TARGET_ASM_ALIGNED_SI_OP NULL
285 #undef TARGET_ASM_INTEGER
286 #define TARGET_ASM_INTEGER arm_assemble_integer
288 #undef TARGET_ASM_FUNCTION_PROLOGUE
289 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
294 #undef TARGET_DEFAULT_TARGET_FLAGS
295 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
296 #undef TARGET_HANDLE_OPTION
297 #define TARGET_HANDLE_OPTION arm_handle_option
299 #define TARGET_HELP arm_target_help
301 #undef TARGET_COMP_TYPE_ATTRIBUTES
302 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
304 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
305 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
307 #undef TARGET_SCHED_ADJUST_COST
308 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
310 #undef TARGET_ENCODE_SECTION_INFO
312 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
314 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
317 #undef TARGET_STRIP_NAME_ENCODING
318 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
320 #undef TARGET_ASM_INTERNAL_LABEL
321 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
324 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
326 #undef TARGET_FUNCTION_VALUE
327 #define TARGET_FUNCTION_VALUE arm_function_value
329 #undef TARGET_LIBCALL_VALUE
330 #define TARGET_LIBCALL_VALUE arm_libcall_value
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
334 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
335 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
337 #undef TARGET_RTX_COSTS
338 #define TARGET_RTX_COSTS arm_rtx_costs
339 #undef TARGET_ADDRESS_COST
340 #define TARGET_ADDRESS_COST arm_address_cost
342 #undef TARGET_SHIFT_TRUNCATION_MASK
343 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
344 #undef TARGET_VECTOR_MODE_SUPPORTED_P
345 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
347 #undef TARGET_MACHINE_DEPENDENT_REORG
348 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
350 #undef TARGET_INIT_BUILTINS
351 #define TARGET_INIT_BUILTINS arm_init_builtins
352 #undef TARGET_EXPAND_BUILTIN
353 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
355 #undef TARGET_INIT_LIBFUNCS
356 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
358 #undef TARGET_PROMOTE_FUNCTION_MODE
359 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
360 #undef TARGET_PROMOTE_PROTOTYPES
361 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
364 #undef TARGET_ARG_PARTIAL_BYTES
365 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
367 #undef TARGET_SETUP_INCOMING_VARARGS
368 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
370 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
371 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
373 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
374 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
375 #undef TARGET_TRAMPOLINE_INIT
376 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
377 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
378 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
380 #undef TARGET_DEFAULT_SHORT_ENUMS
381 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
383 #undef TARGET_ALIGN_ANON_BITFIELD
384 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
386 #undef TARGET_NARROW_VOLATILE_BITFIELD
387 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
389 #undef TARGET_CXX_GUARD_TYPE
390 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
392 #undef TARGET_CXX_GUARD_MASK_BIT
393 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
395 #undef TARGET_CXX_GET_COOKIE_SIZE
396 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
398 #undef TARGET_CXX_COOKIE_HAS_SIZE
399 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
401 #undef TARGET_CXX_CDTOR_RETURNS_THIS
402 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
404 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
405 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
407 #undef TARGET_CXX_USE_AEABI_ATEXIT
408 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
410 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
411 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
412 arm_cxx_determine_class_data_visibility
414 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
415 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
417 #undef TARGET_RETURN_IN_MSB
418 #define TARGET_RETURN_IN_MSB arm_return_in_msb
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
423 #undef TARGET_MUST_PASS_IN_STACK
424 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
426 #ifdef TARGET_UNWIND_INFO
427 #undef TARGET_UNWIND_EMIT
428 #define TARGET_UNWIND_EMIT arm_unwind_emit
430 /* EABI unwinding tables use a different format for the typeinfo tables. */
431 #undef TARGET_ASM_TTYPE
432 #define TARGET_ASM_TTYPE arm_output_ttype
434 #undef TARGET_ARM_EABI_UNWINDER
435 #define TARGET_ARM_EABI_UNWINDER true
436 #endif /* TARGET_UNWIND_INFO */
438 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
439 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
441 #undef TARGET_DWARF_REGISTER_SPAN
442 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
444 #undef TARGET_CANNOT_COPY_INSN_P
445 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
448 #undef TARGET_HAVE_TLS
449 #define TARGET_HAVE_TLS true
452 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
453 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
455 #undef TARGET_CANNOT_FORCE_CONST_MEM
456 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
458 #undef TARGET_MAX_ANCHOR_OFFSET
459 #define TARGET_MAX_ANCHOR_OFFSET 4095
461 /* The minimum is set such that the total size of the block
462 for a particular anchor is -4088 + 1 + 4095 bytes, which is
463 divisible by eight, ensuring natural spacing of anchors. */
464 #undef TARGET_MIN_ANCHOR_OFFSET
465 #define TARGET_MIN_ANCHOR_OFFSET -4088
467 #undef TARGET_SCHED_ISSUE_RATE
468 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
470 #undef TARGET_MANGLE_TYPE
471 #define TARGET_MANGLE_TYPE arm_mangle_type
473 #undef TARGET_BUILD_BUILTIN_VA_LIST
474 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
475 #undef TARGET_EXPAND_BUILTIN_VA_START
476 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
477 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
478 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
481 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
482 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
485 #undef TARGET_LEGITIMATE_ADDRESS_P
486 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
488 #undef TARGET_INVALID_PARAMETER_TYPE
489 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
491 #undef TARGET_INVALID_RETURN_TYPE
492 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
494 #undef TARGET_PROMOTED_TYPE
495 #define TARGET_PROMOTED_TYPE arm_promoted_type
497 #undef TARGET_CONVERT_TO_TYPE
498 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
500 #undef TARGET_SCALAR_MODE_SUPPORTED_P
501 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
503 #undef TARGET_FRAME_POINTER_REQUIRED
504 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
506 #undef TARGET_CAN_ELIMINATE
507 #define TARGET_CAN_ELIMINATE arm_can_eliminate
509 struct gcc_target targetm
= TARGET_INITIALIZER
;
511 /* Obstack for minipool constant handling. */
512 static struct obstack minipool_obstack
;
513 static char * minipool_startobj
;
515 /* The maximum number of insns skipped which
516 will be conditionalised if possible. */
517 static int max_insns_skipped
= 5;
519 extern FILE * asm_out_file
;
521 /* True if we are currently building a constant table. */
522 int making_const_table
;
524 /* The processor for which instructions should be scheduled. */
525 enum processor_type arm_tune
= arm_none
;
527 /* The current tuning set. */
528 const struct tune_params
*current_tune
;
530 /* Which floating point hardware to schedule for. */
533 /* Which floating popint hardware to use. */
534 const struct arm_fpu_desc
*arm_fpu_desc
;
536 /* Whether to use floating point hardware. */
537 enum float_abi_type arm_float_abi
;
539 /* Which __fp16 format to use. */
540 enum arm_fp16_format_type arm_fp16_format
;
542 /* Which ABI to use. */
543 enum arm_abi_type arm_abi
;
545 /* Which thread pointer model to use. */
546 enum arm_tp_type target_thread_pointer
= TP_AUTO
;
548 /* Used to parse -mstructure_size_boundary command line option. */
549 int arm_structure_size_boundary
= DEFAULT_STRUCTURE_SIZE_BOUNDARY
;
551 /* Used for Thumb call_via trampolines. */
552 rtx thumb_call_via_label
[14];
553 static int thumb_call_reg_needed
;
555 /* Bit values used to identify processor capabilities. */
556 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
557 #define FL_ARCH3M (1 << 1) /* Extended multiply */
558 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
559 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
560 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
561 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
562 #define FL_THUMB (1 << 6) /* Thumb aware */
563 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
564 #define FL_STRONG (1 << 8) /* StrongARM */
565 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
566 #define FL_XSCALE (1 << 10) /* XScale */
567 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
568 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
569 media instructions. */
570 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
571 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
572 Note: ARM6 & 7 derivatives only. */
573 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
574 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
575 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
577 #define FL_DIV (1 << 18) /* Hardware divide. */
578 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
579 #define FL_NEON (1 << 20) /* Neon instructions. */
580 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
583 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
585 /* Flags that only effect tuning, not available instructions. */
586 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
589 #define FL_FOR_ARCH2 FL_NOTM
590 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
591 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
592 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
593 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
594 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
595 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
596 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
597 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
598 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
599 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
600 #define FL_FOR_ARCH6J FL_FOR_ARCH6
601 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
602 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
603 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
604 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
605 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
606 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
607 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
608 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
609 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
610 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
612 /* The bits in this mask specify which
613 instructions we are allowed to generate. */
614 static unsigned long insn_flags
= 0;
616 /* The bits in this mask specify which instruction scheduling options should
618 static unsigned long tune_flags
= 0;
620 /* The following are used in the arm.md file as equivalents to bits
621 in the above two flag variables. */
623 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
626 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
629 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
632 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
635 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
638 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
641 /* Nonzero if this chip supports the ARM 6K extensions. */
644 /* Nonzero if instructions not present in the 'M' profile can be used. */
645 int arm_arch_notm
= 0;
647 /* Nonzero if instructions present in ARMv7E-M can be used. */
650 /* Nonzero if this chip can benefit from load scheduling. */
651 int arm_ld_sched
= 0;
653 /* Nonzero if this chip is a StrongARM. */
654 int arm_tune_strongarm
= 0;
656 /* Nonzero if this chip is a Cirrus variant. */
657 int arm_arch_cirrus
= 0;
659 /* Nonzero if this chip supports Intel Wireless MMX technology. */
660 int arm_arch_iwmmxt
= 0;
662 /* Nonzero if this chip is an XScale. */
663 int arm_arch_xscale
= 0;
665 /* Nonzero if tuning for XScale */
666 int arm_tune_xscale
= 0;
668 /* Nonzero if we want to tune for stores that access the write-buffer.
669 This typically means an ARM6 or ARM7 with MMU or MPU. */
670 int arm_tune_wbuf
= 0;
672 /* Nonzero if tuning for Cortex-A9. */
673 int arm_tune_cortex_a9
= 0;
675 /* Nonzero if generating Thumb instructions. */
678 /* Nonzero if we should define __THUMB_INTERWORK__ in the
680 XXX This is a bit of a hack, it's intended to help work around
681 problems in GLD which doesn't understand that armv5t code is
682 interworking clean. */
683 int arm_cpp_interwork
= 0;
685 /* Nonzero if chip supports Thumb 2. */
688 /* Nonzero if chip supports integer division instruction. */
691 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
692 must report the mode of the memory reference from PRINT_OPERAND to
693 PRINT_OPERAND_ADDRESS. */
694 enum machine_mode output_memory_reference_mode
;
696 /* The register number to be used for the PIC offset register. */
697 unsigned arm_pic_register
= INVALID_REGNUM
;
699 /* Set to 1 after arm_reorg has started. Reset to start at the start of
700 the next function. */
701 static int after_arm_reorg
= 0;
703 static enum arm_pcs arm_pcs_default
;
705 /* For an explanation of these variables, see final_prescan_insn below. */
707 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
708 enum arm_cond_code arm_current_cc
;
710 int arm_target_label
;
711 /* The number of conditionally executed insns, including the current insn. */
712 int arm_condexec_count
= 0;
713 /* A bitmask specifying the patterns for the IT block.
714 Zero means do not output an IT block before this insn. */
715 int arm_condexec_mask
= 0;
716 /* The number of bits used in arm_condexec_mask. */
717 int arm_condexec_masklen
= 0;
719 /* The condition codes of the ARM, and the inverse function. */
720 static const char * const arm_condition_codes
[] =
722 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
723 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
726 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
727 #define streq(string1, string2) (strcmp (string1, string2) == 0)
729 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
730 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
731 | (1 << PIC_OFFSET_TABLE_REGNUM)))
733 /* Initialization code. */
737 const char *const name
;
738 enum processor_type core
;
740 const unsigned long flags
;
741 const struct tune_params
*const tune
;
744 const struct tune_params arm_slowmul_tune
=
746 arm_slowmul_rtx_costs
,
750 const struct tune_params arm_fastmul_tune
=
752 arm_fastmul_rtx_costs
,
756 const struct tune_params arm_xscale_tune
=
758 arm_xscale_rtx_costs
,
762 const struct tune_params arm_9e_tune
=
768 /* Not all of these give usefully different compilation alternatives,
769 but there is no simple way of generalizing them. */
770 static const struct processors all_cores
[] =
773 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
774 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
775 #include "arm-cores.def"
777 {NULL
, arm_none
, NULL
, 0, NULL
}
780 static const struct processors all_architectures
[] =
782 /* ARM Architectures */
783 /* We don't specify tuning costs here as it will be figured out
786 {"armv2", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
787 {"armv2a", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
788 {"armv3", arm6
, "3", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3
, NULL
},
789 {"armv3m", arm7m
, "3M", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3M
, NULL
},
790 {"armv4", arm7tdmi
, "4", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH4
, NULL
},
791 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
792 implementations that support it, so we will leave it out for now. */
793 {"armv4t", arm7tdmi
, "4T", FL_CO_PROC
| FL_FOR_ARCH4T
, NULL
},
794 {"armv5", arm10tdmi
, "5", FL_CO_PROC
| FL_FOR_ARCH5
, NULL
},
795 {"armv5t", arm10tdmi
, "5T", FL_CO_PROC
| FL_FOR_ARCH5T
, NULL
},
796 {"armv5e", arm1026ejs
, "5E", FL_CO_PROC
| FL_FOR_ARCH5E
, NULL
},
797 {"armv5te", arm1026ejs
, "5TE", FL_CO_PROC
| FL_FOR_ARCH5TE
, NULL
},
798 {"armv6", arm1136js
, "6", FL_CO_PROC
| FL_FOR_ARCH6
, NULL
},
799 {"armv6j", arm1136js
, "6J", FL_CO_PROC
| FL_FOR_ARCH6J
, NULL
},
800 {"armv6k", mpcore
, "6K", FL_CO_PROC
| FL_FOR_ARCH6K
, NULL
},
801 {"armv6z", arm1176jzs
, "6Z", FL_CO_PROC
| FL_FOR_ARCH6Z
, NULL
},
802 {"armv6zk", arm1176jzs
, "6ZK", FL_CO_PROC
| FL_FOR_ARCH6ZK
, NULL
},
803 {"armv6t2", arm1156t2s
, "6T2", FL_CO_PROC
| FL_FOR_ARCH6T2
, NULL
},
804 {"armv6-m", cortexm1
, "6M", FL_FOR_ARCH6M
, NULL
},
805 {"armv7", cortexa8
, "7", FL_CO_PROC
| FL_FOR_ARCH7
, NULL
},
806 {"armv7-a", cortexa8
, "7A", FL_CO_PROC
| FL_FOR_ARCH7A
, NULL
},
807 {"armv7-r", cortexr4
, "7R", FL_CO_PROC
| FL_FOR_ARCH7R
, NULL
},
808 {"armv7-m", cortexm3
, "7M", FL_CO_PROC
| FL_FOR_ARCH7M
, NULL
},
809 {"armv7e-m", cortexm3
, "7EM", FL_CO_PROC
| FL_FOR_ARCH7EM
, NULL
},
810 {"ep9312", ep9312
, "4T", FL_LDSCHED
| FL_CIRRUS
| FL_FOR_ARCH4
, NULL
},
811 {"iwmmxt", iwmmxt
, "5TE", FL_LDSCHED
| FL_STRONG
| FL_FOR_ARCH5TE
| FL_XSCALE
| FL_IWMMXT
, NULL
},
812 {"iwmmxt2", iwmmxt2
, "5TE", FL_LDSCHED
| FL_STRONG
| FL_FOR_ARCH5TE
| FL_XSCALE
| FL_IWMMXT
, NULL
},
813 {NULL
, arm_none
, NULL
, 0 , NULL
}
817 /* These are populated as commandline arguments are processed, or NULL
819 static const struct processors
*arm_selected_arch
;
820 static const struct processors
*arm_selected_cpu
;
821 static const struct processors
*arm_selected_tune
;
823 /* The name of the preprocessor macro to define for this architecture. */
825 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
827 /* Available values for -mfpu=. */
829 static const struct arm_fpu_desc all_fpus
[] =
831 {"fpa", ARM_FP_MODEL_FPA
, 0, VFP_NONE
, false, false},
832 {"fpe2", ARM_FP_MODEL_FPA
, 2, VFP_NONE
, false, false},
833 {"fpe3", ARM_FP_MODEL_FPA
, 3, VFP_NONE
, false, false},
834 {"maverick", ARM_FP_MODEL_MAVERICK
, 0, VFP_NONE
, false, false},
835 {"vfp", ARM_FP_MODEL_VFP
, 2, VFP_REG_D16
, false, false},
836 {"vfpv3", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, false},
837 {"vfpv3-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, true},
838 {"vfpv3-d16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D16
, false, false},
839 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D16
, false, true},
840 {"vfpv3xd", ARM_FP_MODEL_VFP
, 3, VFP_REG_SINGLE
, false, false},
841 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_SINGLE
, false, true},
842 {"neon", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, true , false},
843 {"neon-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, true , true },
844 {"vfpv4", ARM_FP_MODEL_VFP
, 4, VFP_REG_D32
, false, true},
845 {"vfpv4-d16", ARM_FP_MODEL_VFP
, 4, VFP_REG_D16
, false, true},
846 {"fpv4-sp-d16", ARM_FP_MODEL_VFP
, 4, VFP_REG_SINGLE
, false, true},
847 {"neon-vfpv4", ARM_FP_MODEL_VFP
, 4, VFP_REG_D32
, true, true},
848 /* Compatibility aliases. */
849 {"vfp3", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, false},
856 enum float_abi_type abi_type
;
860 /* Available values for -mfloat-abi=. */
862 static const struct float_abi all_float_abis
[] =
864 {"soft", ARM_FLOAT_ABI_SOFT
},
865 {"softfp", ARM_FLOAT_ABI_SOFTFP
},
866 {"hard", ARM_FLOAT_ABI_HARD
}
873 enum arm_fp16_format_type fp16_format_type
;
877 /* Available values for -mfp16-format=. */
879 static const struct fp16_format all_fp16_formats
[] =
881 {"none", ARM_FP16_FORMAT_NONE
},
882 {"ieee", ARM_FP16_FORMAT_IEEE
},
883 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE
}
890 enum arm_abi_type abi_type
;
894 /* Available values for -mabi=. */
896 static const struct abi_name arm_all_abis
[] =
898 {"apcs-gnu", ARM_ABI_APCS
},
899 {"atpcs", ARM_ABI_ATPCS
},
900 {"aapcs", ARM_ABI_AAPCS
},
901 {"iwmmxt", ARM_ABI_IWMMXT
},
902 {"aapcs-linux", ARM_ABI_AAPCS_LINUX
}
905 /* Supported TLS relocations. */
915 /* The maximum number of insns to be used when loading a constant. */
917 arm_constant_limit (bool size_p
)
919 return size_p
? 1 : current_tune
->constant_limit
;
922 /* Emit an insn that's a simple single-set. Both the operands must be known
925 emit_set_insn (rtx x
, rtx y
)
927 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
930 /* Return the number of bits set in VALUE. */
932 bit_count (unsigned long value
)
934 unsigned long count
= 0;
939 value
&= value
- 1; /* Clear the least-significant set bit. */
945 /* Set up library functions unique to ARM. */
948 arm_init_libfuncs (void)
950 /* There are no special library functions unless we are using the
955 /* The functions below are described in Section 4 of the "Run-Time
956 ABI for the ARM architecture", Version 1.0. */
958 /* Double-precision floating-point arithmetic. Table 2. */
959 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
960 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
961 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
962 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
963 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
965 /* Double-precision comparisons. Table 3. */
966 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
967 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
968 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
969 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
970 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
971 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
972 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
974 /* Single-precision floating-point arithmetic. Table 4. */
975 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
976 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
977 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
978 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
979 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
981 /* Single-precision comparisons. Table 5. */
982 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
983 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
984 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
985 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
986 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
987 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
988 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
990 /* Floating-point to integer conversions. Table 6. */
991 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
992 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
993 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
994 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
995 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
996 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
997 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
998 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1000 /* Conversions between floating types. Table 7. */
1001 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1002 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1004 /* Integer to floating-point conversions. Table 8. */
1005 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1006 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1007 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1008 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1009 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1010 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1011 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1012 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1014 /* Long long. Table 9. */
1015 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1016 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1017 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1018 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1019 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1020 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1021 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1022 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1024 /* Integer (32/32->32) division. \S 4.3.1. */
1025 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1026 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1028 /* The divmod functions are designed so that they can be used for
1029 plain division, even though they return both the quotient and the
1030 remainder. The quotient is returned in the usual location (i.e.,
1031 r0 for SImode, {r0, r1} for DImode), just as would be expected
1032 for an ordinary division routine. Because the AAPCS calling
1033 conventions specify that all of { r0, r1, r2, r3 } are
1034 callee-saved registers, there is no need to tell the compiler
1035 explicitly that those registers are clobbered by these
1037 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1038 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1040 /* For SImode division the ABI provides div-without-mod routines,
1041 which are faster. */
1042 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1043 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1045 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1046 divmod libcalls instead. */
1047 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1048 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1049 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1050 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1052 /* Half-precision float operations. The compiler handles all operations
1053 with NULL libfuncs by converting the SFmode. */
1054 switch (arm_fp16_format
)
1056 case ARM_FP16_FORMAT_IEEE
:
1057 case ARM_FP16_FORMAT_ALTERNATIVE
:
1060 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1061 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1063 : "__gnu_f2h_alternative"));
1064 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1065 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1067 : "__gnu_h2f_alternative"));
1070 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1071 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1072 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1073 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1074 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1077 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1078 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1079 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1080 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1081 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1082 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1083 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1090 if (TARGET_AAPCS_BASED
)
1091 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1094 /* On AAPCS systems, this is the "struct __va_list". */
1095 static GTY(()) tree va_list_type
;
1097 /* Return the type to use as __builtin_va_list. */
1099 arm_build_builtin_va_list (void)
1104 if (!TARGET_AAPCS_BASED
)
1105 return std_build_builtin_va_list ();
1107 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1115 The C Library ABI further reinforces this definition in \S
1118 We must follow this definition exactly. The structure tag
1119 name is visible in C++ mangled names, and thus forms a part
1120 of the ABI. The field name may be used by people who
1121 #include <stdarg.h>. */
1122 /* Create the type. */
1123 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1124 /* Give it the required name. */
1125 va_list_name
= build_decl (BUILTINS_LOCATION
,
1127 get_identifier ("__va_list"),
1129 DECL_ARTIFICIAL (va_list_name
) = 1;
1130 TYPE_NAME (va_list_type
) = va_list_name
;
1131 /* Create the __ap field. */
1132 ap_field
= build_decl (BUILTINS_LOCATION
,
1134 get_identifier ("__ap"),
1136 DECL_ARTIFICIAL (ap_field
) = 1;
1137 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1138 TYPE_FIELDS (va_list_type
) = ap_field
;
1139 /* Compute its layout. */
1140 layout_type (va_list_type
);
1142 return va_list_type
;
1145 /* Return an expression of type "void *" pointing to the next
1146 available argument in a variable-argument list. VALIST is the
1147 user-level va_list object, of type __builtin_va_list. */
1149 arm_extract_valist_ptr (tree valist
)
1151 if (TREE_TYPE (valist
) == error_mark_node
)
1152 return error_mark_node
;
1154 /* On an AAPCS target, the pointer is stored within "struct
1156 if (TARGET_AAPCS_BASED
)
1158 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1159 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1160 valist
, ap_field
, NULL_TREE
);
1166 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1168 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1170 valist
= arm_extract_valist_ptr (valist
);
1171 std_expand_builtin_va_start (valist
, nextarg
);
1174 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1176 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1179 valist
= arm_extract_valist_ptr (valist
);
1180 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1183 /* Lookup NAME in SEL. */
1185 static const struct processors
*
1186 arm_find_cpu (const char *name
, const struct processors
*sel
, const char *desc
)
1188 if (!(name
&& *name
))
1191 for (; sel
->name
!= NULL
; sel
++)
1193 if (streq (name
, sel
->name
))
1197 error ("bad value (%s) for %s switch", name
, desc
);
1201 /* Implement TARGET_HANDLE_OPTION. */
1204 arm_handle_option (size_t code
, const char *arg
, int value ATTRIBUTE_UNUSED
)
1209 arm_selected_arch
= arm_find_cpu(arg
, all_architectures
, "-march");
1213 arm_selected_cpu
= arm_find_cpu(arg
, all_cores
, "-mcpu");
1216 case OPT_mhard_float
:
1217 target_float_abi_name
= "hard";
1220 case OPT_msoft_float
:
1221 target_float_abi_name
= "soft";
1225 arm_selected_tune
= arm_find_cpu(arg
, all_cores
, "-mtune");
1234 arm_target_help (void)
1237 static int columns
= 0;
1240 /* If we have not done so already, obtain the desired maximum width of
1241 the output. Note - this is a duplication of the code at the start of
1242 gcc/opts.c:print_specific_help() - the two copies should probably be
1243 replaced by a single function. */
1248 GET_ENVIRONMENT (p
, "COLUMNS");
1251 int value
= atoi (p
);
1258 /* Use a reasonable default. */
1262 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1264 /* The - 2 is because we know that the last entry in the array is NULL. */
1265 i
= ARRAY_SIZE (all_cores
) - 2;
1267 printf (" %s", all_cores
[i
].name
);
1268 remaining
= columns
- (strlen (all_cores
[i
].name
) + 4);
1269 gcc_assert (remaining
>= 0);
1273 int len
= strlen (all_cores
[i
].name
);
1275 if (remaining
> len
+ 2)
1277 printf (", %s", all_cores
[i
].name
);
1278 remaining
-= len
+ 2;
1284 printf ("\n %s", all_cores
[i
].name
);
1285 remaining
= columns
- (len
+ 4);
1289 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1291 i
= ARRAY_SIZE (all_architectures
) - 2;
1294 printf (" %s", all_architectures
[i
].name
);
1295 remaining
= columns
- (strlen (all_architectures
[i
].name
) + 4);
1296 gcc_assert (remaining
>= 0);
1300 int len
= strlen (all_architectures
[i
].name
);
1302 if (remaining
> len
+ 2)
1304 printf (", %s", all_architectures
[i
].name
);
1305 remaining
-= len
+ 2;
1311 printf ("\n %s", all_architectures
[i
].name
);
1312 remaining
= columns
- (len
+ 4);
1319 /* Fix up any incompatible options that the user has specified.
1320 This has now turned into a maze. */
1322 arm_override_options (void)
1326 if (arm_selected_arch
)
1328 if (arm_selected_cpu
)
1330 /* Check for conflict between mcpu and march. */
1331 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1333 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1334 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1335 /* -march wins for code generation.
1336 -mcpu wins for default tuning. */
1337 if (!arm_selected_tune
)
1338 arm_selected_tune
= arm_selected_cpu
;
1340 arm_selected_cpu
= arm_selected_arch
;
1344 arm_selected_arch
= NULL
;
1347 /* Pick a CPU based on the architecture. */
1348 arm_selected_cpu
= arm_selected_arch
;
1351 /* If the user did not specify a processor, choose one for them. */
1352 if (!arm_selected_cpu
)
1354 const struct processors
* sel
;
1355 unsigned int sought
;
1357 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1358 if (!arm_selected_cpu
->name
)
1360 #ifdef SUBTARGET_CPU_DEFAULT
1361 /* Use the subtarget default CPU if none was specified by
1363 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1365 /* Default to ARM6. */
1366 if (arm_selected_cpu
->name
)
1367 arm_selected_cpu
= &all_cores
[arm6
];
1370 sel
= arm_selected_cpu
;
1371 insn_flags
= sel
->flags
;
1373 /* Now check to see if the user has specified some command line
1374 switch that require certain abilities from the cpu. */
1377 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1379 sought
|= (FL_THUMB
| FL_MODE32
);
1381 /* There are no ARM processors that support both APCS-26 and
1382 interworking. Therefore we force FL_MODE26 to be removed
1383 from insn_flags here (if it was set), so that the search
1384 below will always be able to find a compatible processor. */
1385 insn_flags
&= ~FL_MODE26
;
1388 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1390 /* Try to locate a CPU type that supports all of the abilities
1391 of the default CPU, plus the extra abilities requested by
1393 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1394 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1397 if (sel
->name
== NULL
)
1399 unsigned current_bit_count
= 0;
1400 const struct processors
* best_fit
= NULL
;
1402 /* Ideally we would like to issue an error message here
1403 saying that it was not possible to find a CPU compatible
1404 with the default CPU, but which also supports the command
1405 line options specified by the programmer, and so they
1406 ought to use the -mcpu=<name> command line option to
1407 override the default CPU type.
1409 If we cannot find a cpu that has both the
1410 characteristics of the default cpu and the given
1411 command line options we scan the array again looking
1412 for a best match. */
1413 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1414 if ((sel
->flags
& sought
) == sought
)
1418 count
= bit_count (sel
->flags
& insn_flags
);
1420 if (count
>= current_bit_count
)
1423 current_bit_count
= count
;
1427 gcc_assert (best_fit
);
1431 arm_selected_cpu
= sel
;
1435 gcc_assert (arm_selected_cpu
);
1436 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1437 if (!arm_selected_tune
)
1438 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1440 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1441 insn_flags
= arm_selected_cpu
->flags
;
1443 arm_tune
= arm_selected_tune
->core
;
1444 tune_flags
= arm_selected_tune
->flags
;
1445 current_tune
= arm_selected_tune
->tune
;
1447 if (target_fp16_format_name
)
1449 for (i
= 0; i
< ARRAY_SIZE (all_fp16_formats
); i
++)
1451 if (streq (all_fp16_formats
[i
].name
, target_fp16_format_name
))
1453 arm_fp16_format
= all_fp16_formats
[i
].fp16_format_type
;
1457 if (i
== ARRAY_SIZE (all_fp16_formats
))
1458 error ("invalid __fp16 format option: -mfp16-format=%s",
1459 target_fp16_format_name
);
1462 arm_fp16_format
= ARM_FP16_FORMAT_NONE
;
1464 if (target_abi_name
)
1466 for (i
= 0; i
< ARRAY_SIZE (arm_all_abis
); i
++)
1468 if (streq (arm_all_abis
[i
].name
, target_abi_name
))
1470 arm_abi
= arm_all_abis
[i
].abi_type
;
1474 if (i
== ARRAY_SIZE (arm_all_abis
))
1475 error ("invalid ABI option: -mabi=%s", target_abi_name
);
1478 arm_abi
= ARM_DEFAULT_ABI
;
1480 /* Make sure that the processor choice does not conflict with any of the
1481 other command line choices. */
1482 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1483 error ("target CPU does not support ARM mode");
1485 /* BPABI targets use linker tricks to allow interworking on cores
1486 without thumb support. */
1487 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1489 warning (0, "target CPU does not support interworking" );
1490 target_flags
&= ~MASK_INTERWORK
;
1493 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1495 warning (0, "target CPU does not support THUMB instructions");
1496 target_flags
&= ~MASK_THUMB
;
1499 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1501 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1502 target_flags
&= ~MASK_APCS_FRAME
;
1505 /* Callee super interworking implies thumb interworking. Adding
1506 this to the flags here simplifies the logic elsewhere. */
1507 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1508 target_flags
|= MASK_INTERWORK
;
1510 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1511 from here where no function is being compiled currently. */
1512 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1513 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1515 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1516 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1518 if (TARGET_ARM
&& TARGET_CALLER_INTERWORKING
)
1519 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1521 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1523 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1524 target_flags
|= MASK_APCS_FRAME
;
1527 if (TARGET_POKE_FUNCTION_NAME
)
1528 target_flags
|= MASK_APCS_FRAME
;
1530 if (TARGET_APCS_REENT
&& flag_pic
)
1531 error ("-fpic and -mapcs-reent are incompatible");
1533 if (TARGET_APCS_REENT
)
1534 warning (0, "APCS reentrant code not supported. Ignored");
1536 /* If this target is normally configured to use APCS frames, warn if they
1537 are turned off and debugging is turned on. */
1539 && write_symbols
!= NO_DEBUG
1540 && !TARGET_APCS_FRAME
1541 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1542 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1544 if (TARGET_APCS_FLOAT
)
1545 warning (0, "passing floating point arguments in fp regs not yet supported");
1547 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1548 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1549 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1550 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1551 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1552 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1553 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1554 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1555 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1556 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1557 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1558 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1559 arm_arch_cirrus
= (insn_flags
& FL_CIRRUS
) != 0;
1561 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1562 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1563 thumb_code
= (TARGET_ARM
== 0);
1564 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1565 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1566 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1567 arm_arch_hwdiv
= (insn_flags
& FL_DIV
) != 0;
1568 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1570 /* If we are not using the default (ARM mode) section anchor offset
1571 ranges, then set the correct ranges now. */
1574 /* Thumb-1 LDR instructions cannot have negative offsets.
1575 Permissible positive offset ranges are 5-bit (for byte loads),
1576 6-bit (for halfword loads), or 7-bit (for word loads).
1577 Empirical results suggest a 7-bit anchor range gives the best
1578 overall code size. */
1579 targetm
.min_anchor_offset
= 0;
1580 targetm
.max_anchor_offset
= 127;
1582 else if (TARGET_THUMB2
)
1584 /* The minimum is set such that the total size of the block
1585 for a particular anchor is 248 + 1 + 4095 bytes, which is
1586 divisible by eight, ensuring natural spacing of anchors. */
1587 targetm
.min_anchor_offset
= -248;
1588 targetm
.max_anchor_offset
= 4095;
1591 /* V5 code we generate is completely interworking capable, so we turn off
1592 TARGET_INTERWORK here to avoid many tests later on. */
1594 /* XXX However, we must pass the right pre-processor defines to CPP
1595 or GLD can get confused. This is a hack. */
1596 if (TARGET_INTERWORK
)
1597 arm_cpp_interwork
= 1;
1600 target_flags
&= ~MASK_INTERWORK
;
1602 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1603 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1605 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1606 error ("iwmmxt abi requires an iwmmxt capable cpu");
1608 if (target_fpu_name
== NULL
&& target_fpe_name
!= NULL
)
1610 if (streq (target_fpe_name
, "2"))
1611 target_fpu_name
= "fpe2";
1612 else if (streq (target_fpe_name
, "3"))
1613 target_fpu_name
= "fpe3";
1615 error ("invalid floating point emulation option: -mfpe=%s",
1619 if (target_fpu_name
== NULL
)
1621 #ifdef FPUTYPE_DEFAULT
1622 target_fpu_name
= FPUTYPE_DEFAULT
;
1624 if (arm_arch_cirrus
)
1625 target_fpu_name
= "maverick";
1627 target_fpu_name
= "fpe2";
1631 arm_fpu_desc
= NULL
;
1632 for (i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
1634 if (streq (all_fpus
[i
].name
, target_fpu_name
))
1636 arm_fpu_desc
= &all_fpus
[i
];
1643 error ("invalid floating point option: -mfpu=%s", target_fpu_name
);
1647 switch (arm_fpu_desc
->model
)
1649 case ARM_FP_MODEL_FPA
:
1650 if (arm_fpu_desc
->rev
== 2)
1651 arm_fpu_attr
= FPU_FPE2
;
1652 else if (arm_fpu_desc
->rev
== 3)
1653 arm_fpu_attr
= FPU_FPE3
;
1655 arm_fpu_attr
= FPU_FPA
;
1658 case ARM_FP_MODEL_MAVERICK
:
1659 arm_fpu_attr
= FPU_MAVERICK
;
1662 case ARM_FP_MODEL_VFP
:
1663 arm_fpu_attr
= FPU_VFP
;
1670 if (target_float_abi_name
!= NULL
)
1672 /* The user specified a FP ABI. */
1673 for (i
= 0; i
< ARRAY_SIZE (all_float_abis
); i
++)
1675 if (streq (all_float_abis
[i
].name
, target_float_abi_name
))
1677 arm_float_abi
= all_float_abis
[i
].abi_type
;
1681 if (i
== ARRAY_SIZE (all_float_abis
))
1682 error ("invalid floating point abi: -mfloat-abi=%s",
1683 target_float_abi_name
);
1686 arm_float_abi
= TARGET_DEFAULT_FLOAT_ABI
;
1688 if (TARGET_AAPCS_BASED
1689 && (arm_fpu_desc
->model
== ARM_FP_MODEL_FPA
))
1690 error ("FPA is unsupported in the AAPCS");
1692 if (TARGET_AAPCS_BASED
)
1694 if (TARGET_CALLER_INTERWORKING
)
1695 error ("AAPCS does not support -mcaller-super-interworking");
1697 if (TARGET_CALLEE_INTERWORKING
)
1698 error ("AAPCS does not support -mcallee-super-interworking");
1701 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1702 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1703 will ever exist. GCC makes no attempt to support this combination. */
1704 if (TARGET_IWMMXT
&& !TARGET_SOFT_FLOAT
)
1705 sorry ("iWMMXt and hardware floating point");
1707 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1708 if (TARGET_THUMB2
&& TARGET_IWMMXT
)
1709 sorry ("Thumb-2 iWMMXt");
1711 /* __fp16 support currently assumes the core has ldrh. */
1712 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1713 sorry ("__fp16 and no ldrh");
1715 /* If soft-float is specified then don't use FPU. */
1716 if (TARGET_SOFT_FLOAT
)
1717 arm_fpu_attr
= FPU_NONE
;
1719 if (TARGET_AAPCS_BASED
)
1721 if (arm_abi
== ARM_ABI_IWMMXT
)
1722 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1723 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1724 && TARGET_HARD_FLOAT
1726 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1728 arm_pcs_default
= ARM_PCS_AAPCS
;
1732 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1733 sorry ("-mfloat-abi=hard and VFP");
1735 if (arm_abi
== ARM_ABI_APCS
)
1736 arm_pcs_default
= ARM_PCS_APCS
;
1738 arm_pcs_default
= ARM_PCS_ATPCS
;
1741 /* For arm2/3 there is no need to do any scheduling if there is only
1742 a floating point emulator, or we are doing software floating-point. */
1743 if ((TARGET_SOFT_FLOAT
1744 || (TARGET_FPA
&& arm_fpu_desc
->rev
))
1745 && (tune_flags
& FL_MODE32
) == 0)
1746 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1748 if (target_thread_switch
)
1750 if (strcmp (target_thread_switch
, "soft") == 0)
1751 target_thread_pointer
= TP_SOFT
;
1752 else if (strcmp (target_thread_switch
, "auto") == 0)
1753 target_thread_pointer
= TP_AUTO
;
1754 else if (strcmp (target_thread_switch
, "cp15") == 0)
1755 target_thread_pointer
= TP_CP15
;
1757 error ("invalid thread pointer option: -mtp=%s", target_thread_switch
);
1760 /* Use the cp15 method if it is available. */
1761 if (target_thread_pointer
== TP_AUTO
)
1763 if (arm_arch6k
&& !TARGET_THUMB1
)
1764 target_thread_pointer
= TP_CP15
;
1766 target_thread_pointer
= TP_SOFT
;
1769 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1770 error ("can not use -mtp=cp15 with 16-bit Thumb");
1772 /* Override the default structure alignment for AAPCS ABI. */
1773 if (TARGET_AAPCS_BASED
)
1774 arm_structure_size_boundary
= 8;
1776 if (structure_size_string
!= NULL
)
1778 int size
= strtol (structure_size_string
, NULL
, 0);
1780 if (size
== 8 || size
== 32
1781 || (ARM_DOUBLEWORD_ALIGN
&& size
== 64))
1782 arm_structure_size_boundary
= size
;
1784 warning (0, "structure size boundary can only be set to %s",
1785 ARM_DOUBLEWORD_ALIGN
? "8, 32 or 64": "8 or 32");
1788 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1790 error ("RTP PIC is incompatible with Thumb");
1794 /* If stack checking is disabled, we can use r10 as the PIC register,
1795 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1796 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1798 if (TARGET_VXWORKS_RTP
)
1799 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1800 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1803 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1804 arm_pic_register
= 9;
1806 if (arm_pic_register_string
!= NULL
)
1808 int pic_register
= decode_reg_name (arm_pic_register_string
);
1811 warning (0, "-mpic-register= is useless without -fpic");
1813 /* Prevent the user from choosing an obviously stupid PIC register. */
1814 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1815 || pic_register
== HARD_FRAME_POINTER_REGNUM
1816 || pic_register
== STACK_POINTER_REGNUM
1817 || pic_register
>= PC_REGNUM
1818 || (TARGET_VXWORKS_RTP
1819 && (unsigned int) pic_register
!= arm_pic_register
))
1820 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1822 arm_pic_register
= pic_register
;
1825 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1826 if (fix_cm3_ldrd
== 2)
1828 if (arm_selected_cpu
->core
== cortexm3
)
1834 if (TARGET_THUMB1
&& flag_schedule_insns
)
1836 /* Don't warn since it's on by default in -O2. */
1837 flag_schedule_insns
= 0;
1842 /* If optimizing for size, bump the number of instructions that we
1843 are prepared to conditionally execute (even on a StrongARM). */
1844 max_insns_skipped
= 6;
1848 /* StrongARM has early execution of branches, so a sequence
1849 that is worth skipping is shorter. */
1850 if (arm_tune_strongarm
)
1851 max_insns_skipped
= 3;
1854 /* Hot/Cold partitioning is not currently supported, since we can't
1855 handle literal pool placement in that case. */
1856 if (flag_reorder_blocks_and_partition
)
1858 inform (input_location
,
1859 "-freorder-blocks-and-partition not supported on this architecture");
1860 flag_reorder_blocks_and_partition
= 0;
1861 flag_reorder_blocks
= 1;
1864 /* Register global variables with the garbage collector. */
1865 arm_add_gc_roots ();
1869 arm_add_gc_roots (void)
1871 gcc_obstack_init(&minipool_obstack
);
1872 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
1875 /* A table of known ARM exception types.
1876 For use with the interrupt function attribute. */
1880 const char *const arg
;
1881 const unsigned long return_value
;
1885 static const isr_attribute_arg isr_attribute_args
[] =
1887 { "IRQ", ARM_FT_ISR
},
1888 { "irq", ARM_FT_ISR
},
1889 { "FIQ", ARM_FT_FIQ
},
1890 { "fiq", ARM_FT_FIQ
},
1891 { "ABORT", ARM_FT_ISR
},
1892 { "abort", ARM_FT_ISR
},
1893 { "ABORT", ARM_FT_ISR
},
1894 { "abort", ARM_FT_ISR
},
1895 { "UNDEF", ARM_FT_EXCEPTION
},
1896 { "undef", ARM_FT_EXCEPTION
},
1897 { "SWI", ARM_FT_EXCEPTION
},
1898 { "swi", ARM_FT_EXCEPTION
},
1899 { NULL
, ARM_FT_NORMAL
}
1902 /* Returns the (interrupt) function type of the current
1903 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1905 static unsigned long
1906 arm_isr_value (tree argument
)
1908 const isr_attribute_arg
* ptr
;
1912 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
1914 /* No argument - default to IRQ. */
1915 if (argument
== NULL_TREE
)
1918 /* Get the value of the argument. */
1919 if (TREE_VALUE (argument
) == NULL_TREE
1920 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
1921 return ARM_FT_UNKNOWN
;
1923 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
1925 /* Check it against the list of known arguments. */
1926 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
1927 if (streq (arg
, ptr
->arg
))
1928 return ptr
->return_value
;
1930 /* An unrecognized interrupt type. */
1931 return ARM_FT_UNKNOWN
;
1934 /* Computes the type of the current function. */
1936 static unsigned long
1937 arm_compute_func_type (void)
1939 unsigned long type
= ARM_FT_UNKNOWN
;
1943 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
1945 /* Decide if the current function is volatile. Such functions
1946 never return, and many memory cycles can be saved by not storing
1947 register values that will never be needed again. This optimization
1948 was added to speed up context switching in a kernel application. */
1950 && (TREE_NOTHROW (current_function_decl
)
1951 || !(flag_unwind_tables
1952 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
)))
1953 && TREE_THIS_VOLATILE (current_function_decl
))
1954 type
|= ARM_FT_VOLATILE
;
1956 if (cfun
->static_chain_decl
!= NULL
)
1957 type
|= ARM_FT_NESTED
;
1959 attr
= DECL_ATTRIBUTES (current_function_decl
);
1961 a
= lookup_attribute ("naked", attr
);
1963 type
|= ARM_FT_NAKED
;
1965 a
= lookup_attribute ("isr", attr
);
1967 a
= lookup_attribute ("interrupt", attr
);
1970 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
1972 type
|= arm_isr_value (TREE_VALUE (a
));
1977 /* Returns the type of the current function. */
1980 arm_current_func_type (void)
1982 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
1983 cfun
->machine
->func_type
= arm_compute_func_type ();
1985 return cfun
->machine
->func_type
;
1989 arm_allocate_stack_slots_for_args (void)
1991 /* Naked functions should not allocate stack slots for arguments. */
1992 return !IS_NAKED (arm_current_func_type ());
1996 /* Output assembler code for a block containing the constant parts
1997 of a trampoline, leaving space for the variable parts.
1999 On the ARM, (if r8 is the static chain regnum, and remembering that
2000 referencing pc adds an offset of 8) the trampoline looks like:
2003 .word static chain value
2004 .word function's address
2005 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2008 arm_asm_trampoline_template (FILE *f
)
2012 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2013 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2015 else if (TARGET_THUMB2
)
2017 /* The Thumb-2 trampoline is similar to the arm implementation.
2018 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2019 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2020 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2021 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2025 ASM_OUTPUT_ALIGN (f
, 2);
2026 fprintf (f
, "\t.code\t16\n");
2027 fprintf (f
, ".Ltrampoline_start:\n");
2028 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2029 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2030 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2031 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2032 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2033 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2035 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2036 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2039 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2042 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
2044 rtx fnaddr
, mem
, a_tramp
;
2046 emit_block_move (m_tramp
, assemble_trampoline_template (),
2047 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
2049 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
2050 emit_move_insn (mem
, chain_value
);
2052 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
2053 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
2054 emit_move_insn (mem
, fnaddr
);
2056 a_tramp
= XEXP (m_tramp
, 0);
2057 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
2058 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
2059 plus_constant (a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
2062 /* Thumb trampolines should be entered in thumb mode, so set
2063 the bottom bit of the address. */
2066 arm_trampoline_adjust_address (rtx addr
)
2069 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2070 NULL
, 0, OPTAB_LIB_WIDEN
);
2074 /* Return 1 if it is possible to return using a single instruction.
2075 If SIBLING is non-null, this is a test for a return before a sibling
2076 call. SIBLING is the call insn, so we can examine its register usage. */
2079 use_return_insn (int iscond
, rtx sibling
)
2082 unsigned int func_type
;
2083 unsigned long saved_int_regs
;
2084 unsigned HOST_WIDE_INT stack_adjust
;
2085 arm_stack_offsets
*offsets
;
2087 /* Never use a return instruction before reload has run. */
2088 if (!reload_completed
)
2091 func_type
= arm_current_func_type ();
2093 /* Naked, volatile and stack alignment functions need special
2095 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2098 /* So do interrupt functions that use the frame pointer and Thumb
2099 interrupt functions. */
2100 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2103 offsets
= arm_get_frame_offsets ();
2104 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2106 /* As do variadic functions. */
2107 if (crtl
->args
.pretend_args_size
2108 || cfun
->machine
->uses_anonymous_args
2109 /* Or if the function calls __builtin_eh_return () */
2110 || crtl
->calls_eh_return
2111 /* Or if the function calls alloca */
2112 || cfun
->calls_alloca
2113 /* Or if there is a stack adjustment. However, if the stack pointer
2114 is saved on the stack, we can use a pre-incrementing stack load. */
2115 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2116 && stack_adjust
== 4)))
2119 saved_int_regs
= offsets
->saved_regs_mask
;
2121 /* Unfortunately, the insn
2123 ldmib sp, {..., sp, ...}
2125 triggers a bug on most SA-110 based devices, such that the stack
2126 pointer won't be correctly restored if the instruction takes a
2127 page fault. We work around this problem by popping r3 along with
2128 the other registers, since that is never slower than executing
2129 another instruction.
2131 We test for !arm_arch5 here, because code for any architecture
2132 less than this could potentially be run on one of the buggy
2134 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2136 /* Validate that r3 is a call-clobbered register (always true in
2137 the default abi) ... */
2138 if (!call_used_regs
[3])
2141 /* ... that it isn't being used for a return value ... */
2142 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2145 /* ... or for a tail-call argument ... */
2148 gcc_assert (GET_CODE (sibling
) == CALL_INSN
);
2150 if (find_regno_fusage (sibling
, USE
, 3))
2154 /* ... and that there are no call-saved registers in r0-r2
2155 (always true in the default ABI). */
2156 if (saved_int_regs
& 0x7)
2160 /* Can't be done if interworking with Thumb, and any registers have been
2162 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2165 /* On StrongARM, conditional returns are expensive if they aren't
2166 taken and multiple registers have been stacked. */
2167 if (iscond
&& arm_tune_strongarm
)
2169 /* Conditional return when just the LR is stored is a simple
2170 conditional-load instruction, that's not expensive. */
2171 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2175 && arm_pic_register
!= INVALID_REGNUM
2176 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2180 /* If there are saved registers but the LR isn't saved, then we need
2181 two instructions for the return. */
2182 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2185 /* Can't be done if any of the FPA regs are pushed,
2186 since this also requires an insn. */
2187 if (TARGET_HARD_FLOAT
&& TARGET_FPA
)
2188 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
2189 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2192 /* Likewise VFP regs. */
2193 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2194 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2195 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2198 if (TARGET_REALLY_IWMMXT
)
2199 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2200 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2206 /* Return TRUE if int I is a valid immediate ARM constant. */
2209 const_ok_for_arm (HOST_WIDE_INT i
)
2213 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2214 be all zero, or all one. */
2215 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2216 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2217 != ((~(unsigned HOST_WIDE_INT
) 0)
2218 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2221 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2223 /* Fast return for 0 and small values. We must do this for zero, since
2224 the code below can't handle that one case. */
2225 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2228 /* Get the number of trailing zeros. */
2229 lowbit
= ffs((int) i
) - 1;
2231 /* Only even shifts are allowed in ARM mode so round down to the
2232 nearest even number. */
2236 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2241 /* Allow rotated constants in ARM mode. */
2243 && ((i
& ~0xc000003f) == 0
2244 || (i
& ~0xf000000f) == 0
2245 || (i
& ~0xfc000003) == 0))
2252 /* Allow repeated pattern. */
2255 if (i
== v
|| i
== (v
| (v
<< 8)))
2262 /* Return true if I is a valid constant for the operation CODE. */
2264 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2266 if (const_ok_for_arm (i
))
2290 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2292 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2298 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2302 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2309 /* Emit a sequence of insns to handle a large constant.
2310 CODE is the code of the operation required, it can be any of SET, PLUS,
2311 IOR, AND, XOR, MINUS;
2312 MODE is the mode in which the operation is being performed;
2313 VAL is the integer to operate on;
2314 SOURCE is the other operand (a register, or a null-pointer for SET);
2315 SUBTARGETS means it is safe to create scratch registers if that will
2316 either produce a simpler sequence, or we will want to cse the values.
2317 Return value is the number of insns emitted. */
2319 /* ??? Tweak this for thumb2. */
2321 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2322 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2326 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2327 cond
= COND_EXEC_TEST (PATTERN (insn
));
2331 if (subtargets
|| code
== SET
2332 || (GET_CODE (target
) == REG
&& GET_CODE (source
) == REG
2333 && REGNO (target
) != REGNO (source
)))
2335 /* After arm_reorg has been called, we can't fix up expensive
2336 constants by pushing them into memory so we must synthesize
2337 them in-line, regardless of the cost. This is only likely to
2338 be more costly on chips that have load delay slots and we are
2339 compiling without running the scheduler (so no splitting
2340 occurred before the final instruction emission).
2342 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2344 if (!after_arm_reorg
2346 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2348 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2353 /* Currently SET is the only monadic value for CODE, all
2354 the rest are diadic. */
2355 if (TARGET_USE_MOVT
)
2356 arm_emit_movpair (target
, GEN_INT (val
));
2358 emit_set_insn (target
, GEN_INT (val
));
2364 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2366 if (TARGET_USE_MOVT
)
2367 arm_emit_movpair (temp
, GEN_INT (val
));
2369 emit_set_insn (temp
, GEN_INT (val
));
2371 /* For MINUS, the value is subtracted from, since we never
2372 have subtraction of a constant. */
2374 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2376 emit_set_insn (target
,
2377 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2383 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2387 /* Return the number of instructions required to synthesize the given
2388 constant, if we start emitting them from bit-position I. */
2390 count_insns_for_constant (HOST_WIDE_INT remainder
, int i
)
2392 HOST_WIDE_INT temp1
;
2393 int step_size
= TARGET_ARM
? 2 : 1;
2396 gcc_assert (TARGET_ARM
|| i
== 0);
2404 if (remainder
& (((1 << step_size
) - 1) << (i
- step_size
)))
2409 temp1
= remainder
& ((0x0ff << end
)
2410 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
2411 remainder
&= ~temp1
;
2416 } while (remainder
);
2421 find_best_start (unsigned HOST_WIDE_INT remainder
)
2423 int best_consecutive_zeros
= 0;
2427 /* If we aren't targetting ARM, the best place to start is always at
2432 for (i
= 0; i
< 32; i
+= 2)
2434 int consecutive_zeros
= 0;
2436 if (!(remainder
& (3 << i
)))
2438 while ((i
< 32) && !(remainder
& (3 << i
)))
2440 consecutive_zeros
+= 2;
2443 if (consecutive_zeros
> best_consecutive_zeros
)
2445 best_consecutive_zeros
= consecutive_zeros
;
2446 best_start
= i
- consecutive_zeros
;
2452 /* So long as it won't require any more insns to do so, it's
2453 desirable to emit a small constant (in bits 0...9) in the last
2454 insn. This way there is more chance that it can be combined with
2455 a later addressing insn to form a pre-indexed load or store
2456 operation. Consider:
2458 *((volatile int *)0xe0000100) = 1;
2459 *((volatile int *)0xe0000110) = 2;
2461 We want this to wind up as:
2465 str rB, [rA, #0x100]
2467 str rB, [rA, #0x110]
2469 rather than having to synthesize both large constants from scratch.
2471 Therefore, we calculate how many insns would be required to emit
2472 the constant starting from `best_start', and also starting from
2473 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2474 yield a shorter sequence, we may as well use zero. */
2476 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < remainder
)
2477 && (count_insns_for_constant (remainder
, 0) <=
2478 count_insns_for_constant (remainder
, best_start
)))
2484 /* Emit an instruction with the indicated PATTERN. If COND is
2485 non-NULL, conditionalize the execution of the instruction on COND
2489 emit_constant_insn (rtx cond
, rtx pattern
)
2492 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2493 emit_insn (pattern
);
2496 /* As above, but extra parameter GENERATE which, if clear, suppresses
2498 /* ??? This needs more work for thumb2. */
2501 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2502 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2507 int final_invert
= 0;
2508 int can_negate_initial
= 0;
2510 int num_bits_set
= 0;
2511 int set_sign_bit_copies
= 0;
2512 int clear_sign_bit_copies
= 0;
2513 int clear_zero_bit_copies
= 0;
2514 int set_zero_bit_copies
= 0;
2516 unsigned HOST_WIDE_INT temp1
, temp2
;
2517 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2518 int step_size
= TARGET_ARM
? 2 : 1;
2520 /* Find out which operations are safe for a given CODE. Also do a quick
2521 check for degenerate cases; these can occur when DImode operations
2532 can_negate_initial
= 1;
2536 if (remainder
== 0xffffffff)
2539 emit_constant_insn (cond
,
2540 gen_rtx_SET (VOIDmode
, target
,
2541 GEN_INT (ARM_SIGN_EXTEND (val
))));
2547 if (reload_completed
&& rtx_equal_p (target
, source
))
2551 emit_constant_insn (cond
,
2552 gen_rtx_SET (VOIDmode
, target
, source
));
2564 emit_constant_insn (cond
,
2565 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2568 if (remainder
== 0xffffffff)
2570 if (reload_completed
&& rtx_equal_p (target
, source
))
2573 emit_constant_insn (cond
,
2574 gen_rtx_SET (VOIDmode
, target
, source
));
2583 if (reload_completed
&& rtx_equal_p (target
, source
))
2586 emit_constant_insn (cond
,
2587 gen_rtx_SET (VOIDmode
, target
, source
));
2591 if (remainder
== 0xffffffff)
2594 emit_constant_insn (cond
,
2595 gen_rtx_SET (VOIDmode
, target
,
2596 gen_rtx_NOT (mode
, source
)));
2602 /* We treat MINUS as (val - source), since (source - val) is always
2603 passed as (source + (-val)). */
2607 emit_constant_insn (cond
,
2608 gen_rtx_SET (VOIDmode
, target
,
2609 gen_rtx_NEG (mode
, source
)));
2612 if (const_ok_for_arm (val
))
2615 emit_constant_insn (cond
,
2616 gen_rtx_SET (VOIDmode
, target
,
2617 gen_rtx_MINUS (mode
, GEN_INT (val
),
2629 /* If we can do it in one insn get out quickly. */
2630 if (const_ok_for_arm (val
)
2631 || (can_negate_initial
&& const_ok_for_arm (-val
))
2632 || (can_invert
&& const_ok_for_arm (~val
)))
2635 emit_constant_insn (cond
,
2636 gen_rtx_SET (VOIDmode
, target
,
2638 ? gen_rtx_fmt_ee (code
, mode
, source
,
2644 /* Calculate a few attributes that may be useful for specific
2646 /* Count number of leading zeros. */
2647 for (i
= 31; i
>= 0; i
--)
2649 if ((remainder
& (1 << i
)) == 0)
2650 clear_sign_bit_copies
++;
2655 /* Count number of leading 1's. */
2656 for (i
= 31; i
>= 0; i
--)
2658 if ((remainder
& (1 << i
)) != 0)
2659 set_sign_bit_copies
++;
2664 /* Count number of trailing zero's. */
2665 for (i
= 0; i
<= 31; i
++)
2667 if ((remainder
& (1 << i
)) == 0)
2668 clear_zero_bit_copies
++;
2673 /* Count number of trailing 1's. */
2674 for (i
= 0; i
<= 31; i
++)
2676 if ((remainder
& (1 << i
)) != 0)
2677 set_zero_bit_copies
++;
2685 /* See if we can use movw. */
2686 if (arm_arch_thumb2
&& (remainder
& 0xffff0000) == 0)
2689 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
2694 /* See if we can do this by sign_extending a constant that is known
2695 to be negative. This is a good, way of doing it, since the shift
2696 may well merge into a subsequent insn. */
2697 if (set_sign_bit_copies
> 1)
2699 if (const_ok_for_arm
2700 (temp1
= ARM_SIGN_EXTEND (remainder
2701 << (set_sign_bit_copies
- 1))))
2705 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2706 emit_constant_insn (cond
,
2707 gen_rtx_SET (VOIDmode
, new_src
,
2709 emit_constant_insn (cond
,
2710 gen_ashrsi3 (target
, new_src
,
2711 GEN_INT (set_sign_bit_copies
- 1)));
2715 /* For an inverted constant, we will need to set the low bits,
2716 these will be shifted out of harm's way. */
2717 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
2718 if (const_ok_for_arm (~temp1
))
2722 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2723 emit_constant_insn (cond
,
2724 gen_rtx_SET (VOIDmode
, new_src
,
2726 emit_constant_insn (cond
,
2727 gen_ashrsi3 (target
, new_src
,
2728 GEN_INT (set_sign_bit_copies
- 1)));
2734 /* See if we can calculate the value as the difference between two
2735 valid immediates. */
2736 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
2738 int topshift
= clear_sign_bit_copies
& ~1;
2740 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
2741 & (0xff000000 >> topshift
));
2743 /* If temp1 is zero, then that means the 9 most significant
2744 bits of remainder were 1 and we've caused it to overflow.
2745 When topshift is 0 we don't need to do anything since we
2746 can borrow from 'bit 32'. */
2747 if (temp1
== 0 && topshift
!= 0)
2748 temp1
= 0x80000000 >> (topshift
- 1);
2750 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
2752 if (const_ok_for_arm (temp2
))
2756 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2757 emit_constant_insn (cond
,
2758 gen_rtx_SET (VOIDmode
, new_src
,
2760 emit_constant_insn (cond
,
2761 gen_addsi3 (target
, new_src
,
2769 /* See if we can generate this by setting the bottom (or the top)
2770 16 bits, and then shifting these into the other half of the
2771 word. We only look for the simplest cases, to do more would cost
2772 too much. Be careful, however, not to generate this when the
2773 alternative would take fewer insns. */
2774 if (val
& 0xffff0000)
2776 temp1
= remainder
& 0xffff0000;
2777 temp2
= remainder
& 0x0000ffff;
2779 /* Overlaps outside this range are best done using other methods. */
2780 for (i
= 9; i
< 24; i
++)
2782 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
2783 && !const_ok_for_arm (temp2
))
2785 rtx new_src
= (subtargets
2786 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2788 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
2789 source
, subtargets
, generate
);
2797 gen_rtx_ASHIFT (mode
, source
,
2804 /* Don't duplicate cases already considered. */
2805 for (i
= 17; i
< 24; i
++)
2807 if (((temp1
| (temp1
>> i
)) == remainder
)
2808 && !const_ok_for_arm (temp1
))
2810 rtx new_src
= (subtargets
2811 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2813 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
2814 source
, subtargets
, generate
);
2819 gen_rtx_SET (VOIDmode
, target
,
2822 gen_rtx_LSHIFTRT (mode
, source
,
2833 /* If we have IOR or XOR, and the constant can be loaded in a
2834 single instruction, and we can find a temporary to put it in,
2835 then this can be done in two instructions instead of 3-4. */
2837 /* TARGET can't be NULL if SUBTARGETS is 0 */
2838 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
2840 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
2844 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2846 emit_constant_insn (cond
,
2847 gen_rtx_SET (VOIDmode
, sub
,
2849 emit_constant_insn (cond
,
2850 gen_rtx_SET (VOIDmode
, target
,
2851 gen_rtx_fmt_ee (code
, mode
,
2862 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2863 and the remainder 0s for e.g. 0xfff00000)
2864 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2866 This can be done in 2 instructions by using shifts with mov or mvn.
2871 mvn r0, r0, lsr #12 */
2872 if (set_sign_bit_copies
> 8
2873 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
2877 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2878 rtx shift
= GEN_INT (set_sign_bit_copies
);
2882 gen_rtx_SET (VOIDmode
, sub
,
2884 gen_rtx_ASHIFT (mode
,
2889 gen_rtx_SET (VOIDmode
, target
,
2891 gen_rtx_LSHIFTRT (mode
, sub
,
2898 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2900 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2902 For eg. r0 = r0 | 0xfff
2907 if (set_zero_bit_copies
> 8
2908 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
2912 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2913 rtx shift
= GEN_INT (set_zero_bit_copies
);
2917 gen_rtx_SET (VOIDmode
, sub
,
2919 gen_rtx_LSHIFTRT (mode
,
2924 gen_rtx_SET (VOIDmode
, target
,
2926 gen_rtx_ASHIFT (mode
, sub
,
2932 /* This will never be reached for Thumb2 because orn is a valid
2933 instruction. This is for Thumb1 and the ARM 32 bit cases.
2935 x = y | constant (such that ~constant is a valid constant)
2937 x = ~(~y & ~constant).
2939 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
2943 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2944 emit_constant_insn (cond
,
2945 gen_rtx_SET (VOIDmode
, sub
,
2946 gen_rtx_NOT (mode
, source
)));
2949 sub
= gen_reg_rtx (mode
);
2950 emit_constant_insn (cond
,
2951 gen_rtx_SET (VOIDmode
, sub
,
2952 gen_rtx_AND (mode
, source
,
2954 emit_constant_insn (cond
,
2955 gen_rtx_SET (VOIDmode
, target
,
2956 gen_rtx_NOT (mode
, sub
)));
2963 /* See if two shifts will do 2 or more insn's worth of work. */
2964 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
2966 HOST_WIDE_INT shift_mask
= ((0xffffffff
2967 << (32 - clear_sign_bit_copies
))
2970 if ((remainder
| shift_mask
) != 0xffffffff)
2974 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2975 insns
= arm_gen_constant (AND
, mode
, cond
,
2976 remainder
| shift_mask
,
2977 new_src
, source
, subtargets
, 1);
2982 rtx targ
= subtargets
? NULL_RTX
: target
;
2983 insns
= arm_gen_constant (AND
, mode
, cond
,
2984 remainder
| shift_mask
,
2985 targ
, source
, subtargets
, 0);
2991 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2992 rtx shift
= GEN_INT (clear_sign_bit_copies
);
2994 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
2995 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
3001 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
3003 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
3005 if ((remainder
| shift_mask
) != 0xffffffff)
3009 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3011 insns
= arm_gen_constant (AND
, mode
, cond
,
3012 remainder
| shift_mask
,
3013 new_src
, source
, subtargets
, 1);
3018 rtx targ
= subtargets
? NULL_RTX
: target
;
3020 insns
= arm_gen_constant (AND
, mode
, cond
,
3021 remainder
| shift_mask
,
3022 targ
, source
, subtargets
, 0);
3028 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3029 rtx shift
= GEN_INT (clear_zero_bit_copies
);
3031 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
3032 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
3044 for (i
= 0; i
< 32; i
++)
3045 if (remainder
& (1 << i
))
3049 || (code
!= IOR
&& can_invert
&& num_bits_set
> 16))
3050 remainder
^= 0xffffffff;
3051 else if (code
== PLUS
&& num_bits_set
> 16)
3052 remainder
= (-remainder
) & 0xffffffff;
3054 /* For XOR, if more than half the bits are set and there's a sequence
3055 of more than 8 consecutive ones in the pattern then we can XOR by the
3056 inverted constant and then invert the final result; this may save an
3057 instruction and might also lead to the final mvn being merged with
3058 some other operation. */
3059 else if (code
== XOR
&& num_bits_set
> 16
3060 && (count_insns_for_constant (remainder
^ 0xffffffff,
3062 (remainder
^ 0xffffffff))
3063 < count_insns_for_constant (remainder
,
3064 find_best_start (remainder
))))
3066 remainder
^= 0xffffffff;
3075 /* Now try and find a way of doing the job in either two or three
3077 We start by looking for the largest block of zeros that are aligned on
3078 a 2-bit boundary, we then fill up the temps, wrapping around to the
3079 top of the word when we drop off the bottom.
3080 In the worst case this code should produce no more than four insns.
3081 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3082 best place to start. */
3084 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3087 /* Now start emitting the insns. */
3088 i
= find_best_start (remainder
);
3095 if (remainder
& (3 << (i
- 2)))
3100 temp1
= remainder
& ((0x0ff << end
)
3101 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
3102 remainder
&= ~temp1
;
3106 rtx new_src
, temp1_rtx
;
3108 if (code
== SET
|| code
== MINUS
)
3110 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3111 if (can_invert
&& code
!= MINUS
)
3116 if ((final_invert
|| remainder
) && subtargets
)
3117 new_src
= gen_reg_rtx (mode
);
3122 else if (can_negate
)
3126 temp1
= trunc_int_for_mode (temp1
, mode
);
3127 temp1_rtx
= GEN_INT (temp1
);
3131 else if (code
== MINUS
)
3132 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3134 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3136 emit_constant_insn (cond
,
3137 gen_rtx_SET (VOIDmode
, new_src
,
3147 else if (code
== MINUS
)
3153 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3163 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3164 gen_rtx_NOT (mode
, source
)));
3171 /* Canonicalize a comparison so that we are more likely to recognize it.
3172 This can be done for a few constant compares, where we can make the
3173 immediate value easier to load. */
3176 arm_canonicalize_comparison (enum rtx_code code
, enum machine_mode mode
,
3179 unsigned HOST_WIDE_INT i
= INTVAL (*op1
);
3180 unsigned HOST_WIDE_INT maxval
;
3181 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3192 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3194 *op1
= GEN_INT (i
+ 1);
3195 return code
== GT
? GE
: LT
;
3202 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3204 *op1
= GEN_INT (i
- 1);
3205 return code
== GE
? GT
: LE
;
3211 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3212 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3214 *op1
= GEN_INT (i
+ 1);
3215 return code
== GTU
? GEU
: LTU
;
3222 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3224 *op1
= GEN_INT (i
- 1);
3225 return code
== GEU
? GTU
: LEU
;
3237 /* Define how to find the value returned by a function. */
3240 arm_function_value(const_tree type
, const_tree func
,
3241 bool outgoing ATTRIBUTE_UNUSED
)
3243 enum machine_mode mode
;
3244 int unsignedp ATTRIBUTE_UNUSED
;
3245 rtx r ATTRIBUTE_UNUSED
;
3247 mode
= TYPE_MODE (type
);
3249 if (TARGET_AAPCS_BASED
)
3250 return aapcs_allocate_return_reg (mode
, type
, func
);
3252 /* Promote integer types. */
3253 if (INTEGRAL_TYPE_P (type
))
3254 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3256 /* Promotes small structs returned in a register to full-word size
3257 for big-endian AAPCS. */
3258 if (arm_return_in_msb (type
))
3260 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3261 if (size
% UNITS_PER_WORD
!= 0)
3263 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3264 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3268 return LIBCALL_VALUE (mode
);
3272 libcall_eq (const void *p1
, const void *p2
)
3274 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3278 libcall_hash (const void *p1
)
3280 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3284 add_libcall (htab_t htab
, rtx libcall
)
3286 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3290 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3292 static bool init_done
= false;
3293 static htab_t libcall_htab
;
3299 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3301 add_libcall (libcall_htab
,
3302 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3303 add_libcall (libcall_htab
,
3304 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3305 add_libcall (libcall_htab
,
3306 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3307 add_libcall (libcall_htab
,
3308 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3310 add_libcall (libcall_htab
,
3311 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3312 add_libcall (libcall_htab
,
3313 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3314 add_libcall (libcall_htab
,
3315 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3316 add_libcall (libcall_htab
,
3317 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3319 add_libcall (libcall_htab
,
3320 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3321 add_libcall (libcall_htab
,
3322 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3323 add_libcall (libcall_htab
,
3324 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3325 add_libcall (libcall_htab
,
3326 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3327 add_libcall (libcall_htab
,
3328 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3329 add_libcall (libcall_htab
,
3330 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3333 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3337 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3339 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3340 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3342 /* The following libcalls return their result in integer registers,
3343 even though they return a floating point value. */
3344 if (arm_libcall_uses_aapcs_base (libcall
))
3345 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3349 return LIBCALL_VALUE (mode
);
3352 /* Determine the amount of memory needed to store the possible return
3353 registers of an untyped call. */
3355 arm_apply_result_size (void)
3361 if (TARGET_HARD_FLOAT_ABI
)
3367 if (TARGET_MAVERICK
)
3370 if (TARGET_IWMMXT_ABI
)
3377 /* Decide whether TYPE should be returned in memory (true)
3378 or in a register (false). FNTYPE is the type of the function making
3381 arm_return_in_memory (const_tree type
, const_tree fntype
)
3385 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3387 if (TARGET_AAPCS_BASED
)
3389 /* Simple, non-aggregate types (ie not including vectors and
3390 complex) are always returned in a register (or registers).
3391 We don't care about which register here, so we can short-cut
3392 some of the detail. */
3393 if (!AGGREGATE_TYPE_P (type
)
3394 && TREE_CODE (type
) != VECTOR_TYPE
3395 && TREE_CODE (type
) != COMPLEX_TYPE
)
3398 /* Any return value that is no larger than one word can be
3400 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3403 /* Check any available co-processors to see if they accept the
3404 type as a register candidate (VFP, for example, can return
3405 some aggregates in consecutive registers). These aren't
3406 available if the call is variadic. */
3407 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3410 /* Vector values should be returned using ARM registers, not
3411 memory (unless they're over 16 bytes, which will break since
3412 we only have four call-clobbered registers to play with). */
3413 if (TREE_CODE (type
) == VECTOR_TYPE
)
3414 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3416 /* The rest go in memory. */
3420 if (TREE_CODE (type
) == VECTOR_TYPE
)
3421 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3423 if (!AGGREGATE_TYPE_P (type
) &&
3424 (TREE_CODE (type
) != VECTOR_TYPE
))
3425 /* All simple types are returned in registers. */
3428 if (arm_abi
!= ARM_ABI_APCS
)
3430 /* ATPCS and later return aggregate types in memory only if they are
3431 larger than a word (or are variable size). */
3432 return (size
< 0 || size
> UNITS_PER_WORD
);
3435 /* For the arm-wince targets we choose to be compatible with Microsoft's
3436 ARM and Thumb compilers, which always return aggregates in memory. */
3438 /* All structures/unions bigger than one word are returned in memory.
3439 Also catch the case where int_size_in_bytes returns -1. In this case
3440 the aggregate is either huge or of variable size, and in either case
3441 we will want to return it via memory and not in a register. */
3442 if (size
< 0 || size
> UNITS_PER_WORD
)
3445 if (TREE_CODE (type
) == RECORD_TYPE
)
3449 /* For a struct the APCS says that we only return in a register
3450 if the type is 'integer like' and every addressable element
3451 has an offset of zero. For practical purposes this means
3452 that the structure can have at most one non bit-field element
3453 and that this element must be the first one in the structure. */
3455 /* Find the first field, ignoring non FIELD_DECL things which will
3456 have been created by C++. */
3457 for (field
= TYPE_FIELDS (type
);
3458 field
&& TREE_CODE (field
) != FIELD_DECL
;
3459 field
= TREE_CHAIN (field
))
3463 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3465 /* Check that the first field is valid for returning in a register. */
3467 /* ... Floats are not allowed */
3468 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3471 /* ... Aggregates that are not themselves valid for returning in
3472 a register are not allowed. */
3473 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3476 /* Now check the remaining fields, if any. Only bitfields are allowed,
3477 since they are not addressable. */
3478 for (field
= TREE_CHAIN (field
);
3480 field
= TREE_CHAIN (field
))
3482 if (TREE_CODE (field
) != FIELD_DECL
)
3485 if (!DECL_BIT_FIELD_TYPE (field
))
3492 if (TREE_CODE (type
) == UNION_TYPE
)
3496 /* Unions can be returned in registers if every element is
3497 integral, or can be returned in an integer register. */
3498 for (field
= TYPE_FIELDS (type
);
3500 field
= TREE_CHAIN (field
))
3502 if (TREE_CODE (field
) != FIELD_DECL
)
3505 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3508 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3514 #endif /* not ARM_WINCE */
3516 /* Return all other types in memory. */
3520 /* Indicate whether or not words of a double are in big-endian order. */
3523 arm_float_words_big_endian (void)
3525 if (TARGET_MAVERICK
)
3528 /* For FPA, float words are always big-endian. For VFP, floats words
3529 follow the memory system mode. */
3537 return (TARGET_BIG_END
? 1 : 0);
3542 const struct pcs_attribute_arg
3546 } pcs_attribute_args
[] =
3548 {"aapcs", ARM_PCS_AAPCS
},
3549 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
3551 /* We could recognize these, but changes would be needed elsewhere
3552 * to implement them. */
3553 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
3554 {"atpcs", ARM_PCS_ATPCS
},
3555 {"apcs", ARM_PCS_APCS
},
3557 {NULL
, ARM_PCS_UNKNOWN
}
3561 arm_pcs_from_attribute (tree attr
)
3563 const struct pcs_attribute_arg
*ptr
;
3566 /* Get the value of the argument. */
3567 if (TREE_VALUE (attr
) == NULL_TREE
3568 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
3569 return ARM_PCS_UNKNOWN
;
3571 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
3573 /* Check it against the list of known arguments. */
3574 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3575 if (streq (arg
, ptr
->arg
))
3578 /* An unrecognized interrupt type. */
3579 return ARM_PCS_UNKNOWN
;
3582 /* Get the PCS variant to use for this call. TYPE is the function's type
3583 specification, DECL is the specific declartion. DECL may be null if
3584 the call could be indirect or if this is a library call. */
3586 arm_get_pcs_model (const_tree type
, const_tree decl
)
3588 bool user_convention
= false;
3589 enum arm_pcs user_pcs
= arm_pcs_default
;
3594 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
3597 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
3598 user_convention
= true;
3601 if (TARGET_AAPCS_BASED
)
3603 /* Detect varargs functions. These always use the base rules
3604 (no argument is ever a candidate for a co-processor
3606 bool base_rules
= (TYPE_ARG_TYPES (type
) != 0
3607 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type
)))
3608 != void_type_node
));
3610 if (user_convention
)
3612 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
3613 sorry ("Non-AAPCS derived PCS variant");
3614 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
3615 error ("Variadic functions must use the base AAPCS variant");
3619 return ARM_PCS_AAPCS
;
3620 else if (user_convention
)
3622 else if (decl
&& flag_unit_at_a_time
)
3624 /* Local functions never leak outside this compilation unit,
3625 so we are free to use whatever conventions are
3627 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3628 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3630 return ARM_PCS_AAPCS_LOCAL
;
3633 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
3634 sorry ("PCS variant");
3636 /* For everything else we use the target's default. */
3637 return arm_pcs_default
;
3642 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
3643 const_tree fntype ATTRIBUTE_UNUSED
,
3644 rtx libcall ATTRIBUTE_UNUSED
,
3645 const_tree fndecl ATTRIBUTE_UNUSED
)
3647 /* Record the unallocated VFP registers. */
3648 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
3649 pcum
->aapcs_vfp_reg_alloc
= 0;
3652 /* Walk down the type tree of TYPE counting consecutive base elements.
3653 If *MODEP is VOIDmode, then set it to the first valid floating point
3654 type. If a non-floating point type is found, or if a floating point
3655 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3656 otherwise return the count in the sub-tree. */
3658 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
3660 enum machine_mode mode
;
3663 switch (TREE_CODE (type
))
3666 mode
= TYPE_MODE (type
);
3667 if (mode
!= DFmode
&& mode
!= SFmode
)
3670 if (*modep
== VOIDmode
)
3679 mode
= TYPE_MODE (TREE_TYPE (type
));
3680 if (mode
!= DFmode
&& mode
!= SFmode
)
3683 if (*modep
== VOIDmode
)
3692 /* Use V2SImode and V4SImode as representatives of all 64-bit
3693 and 128-bit vector types, whether or not those modes are
3694 supported with the present options. */
3695 size
= int_size_in_bytes (type
);
3708 if (*modep
== VOIDmode
)
3711 /* Vector modes are considered to be opaque: two vectors are
3712 equivalent for the purposes of being homogeneous aggregates
3713 if they are the same size. */
3722 tree index
= TYPE_DOMAIN (type
);
3724 /* Can't handle incomplete types. */
3725 if (!COMPLETE_TYPE_P(type
))
3728 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
3731 || !TYPE_MAX_VALUE (index
)
3732 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
3733 || !TYPE_MIN_VALUE (index
)
3734 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
3738 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
3739 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
3741 /* There must be no padding. */
3742 if (!host_integerp (TYPE_SIZE (type
), 1)
3743 || (tree_low_cst (TYPE_SIZE (type
), 1)
3744 != count
* GET_MODE_BITSIZE (*modep
)))
3756 /* Can't handle incomplete types. */
3757 if (!COMPLETE_TYPE_P(type
))
3760 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3762 if (TREE_CODE (field
) != FIELD_DECL
)
3765 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3771 /* There must be no padding. */
3772 if (!host_integerp (TYPE_SIZE (type
), 1)
3773 || (tree_low_cst (TYPE_SIZE (type
), 1)
3774 != count
* GET_MODE_BITSIZE (*modep
)))
3781 case QUAL_UNION_TYPE
:
3783 /* These aren't very interesting except in a degenerate case. */
3788 /* Can't handle incomplete types. */
3789 if (!COMPLETE_TYPE_P(type
))
3792 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3794 if (TREE_CODE (field
) != FIELD_DECL
)
3797 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3800 count
= count
> sub_count
? count
: sub_count
;
3803 /* There must be no padding. */
3804 if (!host_integerp (TYPE_SIZE (type
), 1)
3805 || (tree_low_cst (TYPE_SIZE (type
), 1)
3806 != count
* GET_MODE_BITSIZE (*modep
)))
3819 /* Return true if PCS_VARIANT should use VFP registers. */
3821 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
3823 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
3826 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
3829 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
3830 (TARGET_VFP_DOUBLE
|| !is_double
));
3834 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
3835 enum machine_mode mode
, const_tree type
,
3836 enum machine_mode
*base_mode
, int *count
)
3838 enum machine_mode new_mode
= VOIDmode
;
3840 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
3841 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3842 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
3847 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
3850 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
3852 else if (type
&& (mode
== BLKmode
|| TREE_CODE (type
) == VECTOR_TYPE
))
3854 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
3856 if (ag_count
> 0 && ag_count
<= 4)
3865 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
3868 *base_mode
= new_mode
;
3873 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
3874 enum machine_mode mode
, const_tree type
)
3876 int count ATTRIBUTE_UNUSED
;
3877 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
3879 if (!use_vfp_abi (pcs_variant
, false))
3881 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
3886 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
3889 if (!use_vfp_abi (pcum
->pcs_variant
, false))
3892 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
3893 &pcum
->aapcs_vfp_rmode
,
3894 &pcum
->aapcs_vfp_rcount
);
3898 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
3899 const_tree type ATTRIBUTE_UNUSED
)
3901 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
3902 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
3905 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
3906 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
3908 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
3909 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
3912 int rcount
= pcum
->aapcs_vfp_rcount
;
3914 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
3918 /* Avoid using unsupported vector modes. */
3919 if (rmode
== V2SImode
)
3921 else if (rmode
== V4SImode
)
3928 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
3929 for (i
= 0; i
< rcount
; i
++)
3931 rtx tmp
= gen_rtx_REG (rmode
,
3932 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
3933 tmp
= gen_rtx_EXPR_LIST
3935 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
3936 XVECEXP (par
, 0, i
) = tmp
;
3939 pcum
->aapcs_reg
= par
;
3942 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
3949 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
3950 enum machine_mode mode
,
3951 const_tree type ATTRIBUTE_UNUSED
)
3953 if (!use_vfp_abi (pcs_variant
, false))
3956 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
3959 enum machine_mode ag_mode
;
3964 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
3969 if (ag_mode
== V2SImode
)
3971 else if (ag_mode
== V4SImode
)
3977 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
3978 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
3979 for (i
= 0; i
< count
; i
++)
3981 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
3982 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
3983 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
3984 XVECEXP (par
, 0, i
) = tmp
;
3990 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
3994 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
3995 enum machine_mode mode ATTRIBUTE_UNUSED
,
3996 const_tree type ATTRIBUTE_UNUSED
)
3998 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
3999 pcum
->aapcs_vfp_reg_alloc
= 0;
4003 #define AAPCS_CP(X) \
4005 aapcs_ ## X ## _cum_init, \
4006 aapcs_ ## X ## _is_call_candidate, \
4007 aapcs_ ## X ## _allocate, \
4008 aapcs_ ## X ## _is_return_candidate, \
4009 aapcs_ ## X ## _allocate_return_reg, \
4010 aapcs_ ## X ## _advance \
4013 /* Table of co-processors that can be used to pass arguments in
4014 registers. Idealy no arugment should be a candidate for more than
4015 one co-processor table entry, but the table is processed in order
4016 and stops after the first match. If that entry then fails to put
4017 the argument into a co-processor register, the argument will go on
4021 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4022 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4024 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4025 BLKmode) is a candidate for this co-processor's registers; this
4026 function should ignore any position-dependent state in
4027 CUMULATIVE_ARGS and only use call-type dependent information. */
4028 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4030 /* Return true if the argument does get a co-processor register; it
4031 should set aapcs_reg to an RTX of the register allocated as is
4032 required for a return from FUNCTION_ARG. */
4033 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4035 /* Return true if a result of mode MODE (or type TYPE if MODE is
4036 BLKmode) is can be returned in this co-processor's registers. */
4037 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4039 /* Allocate and return an RTX element to hold the return type of a
4040 call, this routine must not fail and will only be called if
4041 is_return_candidate returned true with the same parameters. */
4042 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4044 /* Finish processing this argument and prepare to start processing
4046 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4047 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4055 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4060 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4061 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4068 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4070 /* We aren't passed a decl, so we can't check that a call is local.
4071 However, it isn't clear that that would be a win anyway, since it
4072 might limit some tail-calling opportunities. */
4073 enum arm_pcs pcs_variant
;
4077 const_tree fndecl
= NULL_TREE
;
4079 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4082 fntype
= TREE_TYPE (fntype
);
4085 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4088 pcs_variant
= arm_pcs_default
;
4090 if (pcs_variant
!= ARM_PCS_AAPCS
)
4094 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4095 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4104 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4107 /* We aren't passed a decl, so we can't check that a call is local.
4108 However, it isn't clear that that would be a win anyway, since it
4109 might limit some tail-calling opportunities. */
4110 enum arm_pcs pcs_variant
;
4111 int unsignedp ATTRIBUTE_UNUSED
;
4115 const_tree fndecl
= NULL_TREE
;
4117 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4120 fntype
= TREE_TYPE (fntype
);
4123 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4126 pcs_variant
= arm_pcs_default
;
4128 /* Promote integer types. */
4129 if (type
&& INTEGRAL_TYPE_P (type
))
4130 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4132 if (pcs_variant
!= ARM_PCS_AAPCS
)
4136 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4137 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4139 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4143 /* Promotes small structs returned in a register to full-word size
4144 for big-endian AAPCS. */
4145 if (type
&& arm_return_in_msb (type
))
4147 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4148 if (size
% UNITS_PER_WORD
!= 0)
4150 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4151 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4155 return gen_rtx_REG (mode
, R0_REGNUM
);
4159 aapcs_libcall_value (enum machine_mode mode
)
4161 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4164 /* Lay out a function argument using the AAPCS rules. The rule
4165 numbers referred to here are those in the AAPCS. */
4167 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4168 tree type
, int named
)
4173 /* We only need to do this once per argument. */
4174 if (pcum
->aapcs_arg_processed
)
4177 pcum
->aapcs_arg_processed
= true;
4179 /* Special case: if named is false then we are handling an incoming
4180 anonymous argument which is on the stack. */
4184 /* Is this a potential co-processor register candidate? */
4185 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4187 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4188 pcum
->aapcs_cprc_slot
= slot
;
4190 /* We don't have to apply any of the rules from part B of the
4191 preparation phase, these are handled elsewhere in the
4196 /* A Co-processor register candidate goes either in its own
4197 class of registers or on the stack. */
4198 if (!pcum
->aapcs_cprc_failed
[slot
])
4200 /* C1.cp - Try to allocate the argument to co-processor
4202 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4205 /* C2.cp - Put the argument on the stack and note that we
4206 can't assign any more candidates in this slot. We also
4207 need to note that we have allocated stack space, so that
4208 we won't later try to split a non-cprc candidate between
4209 core registers and the stack. */
4210 pcum
->aapcs_cprc_failed
[slot
] = true;
4211 pcum
->can_split
= false;
4214 /* We didn't get a register, so this argument goes on the
4216 gcc_assert (pcum
->can_split
== false);
4221 /* C3 - For double-word aligned arguments, round the NCRN up to the
4222 next even number. */
4223 ncrn
= pcum
->aapcs_ncrn
;
4224 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4227 nregs
= ARM_NUM_REGS2(mode
, type
);
4229 /* Sigh, this test should really assert that nregs > 0, but a GCC
4230 extension allows empty structs and then gives them empty size; it
4231 then allows such a structure to be passed by value. For some of
4232 the code below we have to pretend that such an argument has
4233 non-zero size so that we 'locate' it correctly either in
4234 registers or on the stack. */
4235 gcc_assert (nregs
>= 0);
4237 nregs2
= nregs
? nregs
: 1;
4239 /* C4 - Argument fits entirely in core registers. */
4240 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4242 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4243 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4247 /* C5 - Some core registers left and there are no arguments already
4248 on the stack: split this argument between the remaining core
4249 registers and the stack. */
4250 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4252 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4253 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4254 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4258 /* C6 - NCRN is set to 4. */
4259 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4261 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4265 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4266 for a call to a function whose data type is FNTYPE.
4267 For a library call, FNTYPE is NULL. */
4269 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4271 tree fndecl ATTRIBUTE_UNUSED
)
4273 /* Long call handling. */
4275 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4277 pcum
->pcs_variant
= arm_pcs_default
;
4279 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4281 if (arm_libcall_uses_aapcs_base (libname
))
4282 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4284 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4285 pcum
->aapcs_reg
= NULL_RTX
;
4286 pcum
->aapcs_partial
= 0;
4287 pcum
->aapcs_arg_processed
= false;
4288 pcum
->aapcs_cprc_slot
= -1;
4289 pcum
->can_split
= true;
4291 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4295 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4297 pcum
->aapcs_cprc_failed
[i
] = false;
4298 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4306 /* On the ARM, the offset starts at 0. */
4308 pcum
->iwmmxt_nregs
= 0;
4309 pcum
->can_split
= true;
4311 /* Varargs vectors are treated the same as long long.
4312 named_count avoids having to change the way arm handles 'named' */
4313 pcum
->named_count
= 0;
4316 if (TARGET_REALLY_IWMMXT
&& fntype
)
4320 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4322 fn_arg
= TREE_CHAIN (fn_arg
))
4323 pcum
->named_count
+= 1;
4325 if (! pcum
->named_count
)
4326 pcum
->named_count
= INT_MAX
;
4331 /* Return true if mode/type need doubleword alignment. */
4333 arm_needs_doubleword_align (enum machine_mode mode
, tree type
)
4335 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4336 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4340 /* Determine where to put an argument to a function.
4341 Value is zero to push the argument on the stack,
4342 or a hard register in which to store the argument.
4344 MODE is the argument's machine mode.
4345 TYPE is the data type of the argument (as a tree).
4346 This is null for libcalls where that information may
4348 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4349 the preceding args and about the function being called.
4350 NAMED is nonzero if this argument is a named parameter
4351 (otherwise it is an extra parameter matching an ellipsis). */
4354 arm_function_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4355 tree type
, int named
)
4359 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4360 a call insn (op3 of a call_value insn). */
4361 if (mode
== VOIDmode
)
4364 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4366 aapcs_layout_arg (pcum
, mode
, type
, named
);
4367 return pcum
->aapcs_reg
;
4370 /* Varargs vectors are treated the same as long long.
4371 named_count avoids having to change the way arm handles 'named' */
4372 if (TARGET_IWMMXT_ABI
4373 && arm_vector_mode_supported_p (mode
)
4374 && pcum
->named_count
> pcum
->nargs
+ 1)
4376 if (pcum
->iwmmxt_nregs
<= 9)
4377 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4380 pcum
->can_split
= false;
4385 /* Put doubleword aligned quantities in even register pairs. */
4387 && ARM_DOUBLEWORD_ALIGN
4388 && arm_needs_doubleword_align (mode
, type
))
4391 if (mode
== VOIDmode
)
4392 /* Pick an arbitrary value for operand 2 of the call insn. */
4395 /* Only allow splitting an arg between regs and memory if all preceding
4396 args were allocated to regs. For args passed by reference we only count
4397 the reference pointer. */
4398 if (pcum
->can_split
)
4401 nregs
= ARM_NUM_REGS2 (mode
, type
);
4403 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4406 return gen_rtx_REG (mode
, pcum
->nregs
);
4410 arm_arg_partial_bytes (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4411 tree type
, bool named
)
4413 int nregs
= pcum
->nregs
;
4415 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4417 aapcs_layout_arg (pcum
, mode
, type
, named
);
4418 return pcum
->aapcs_partial
;
4421 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4424 if (NUM_ARG_REGS
> nregs
4425 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4427 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4433 arm_function_arg_advance (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4434 tree type
, bool named
)
4436 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4438 aapcs_layout_arg (pcum
, mode
, type
, named
);
4440 if (pcum
->aapcs_cprc_slot
>= 0)
4442 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4444 pcum
->aapcs_cprc_slot
= -1;
4447 /* Generic stuff. */
4448 pcum
->aapcs_arg_processed
= false;
4449 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4450 pcum
->aapcs_reg
= NULL_RTX
;
4451 pcum
->aapcs_partial
= 0;
4456 if (arm_vector_mode_supported_p (mode
)
4457 && pcum
->named_count
> pcum
->nargs
4458 && TARGET_IWMMXT_ABI
)
4459 pcum
->iwmmxt_nregs
+= 1;
4461 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4465 /* Variable sized types are passed by reference. This is a GCC
4466 extension to the ARM ABI. */
4469 arm_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4470 enum machine_mode mode ATTRIBUTE_UNUSED
,
4471 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4473 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4476 /* Encode the current state of the #pragma [no_]long_calls. */
4479 OFF
, /* No #pragma [no_]long_calls is in effect. */
4480 LONG
, /* #pragma long_calls is in effect. */
4481 SHORT
/* #pragma no_long_calls is in effect. */
4484 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4487 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4489 arm_pragma_long_calls
= LONG
;
4493 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4495 arm_pragma_long_calls
= SHORT
;
4499 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4501 arm_pragma_long_calls
= OFF
;
4504 /* Handle an attribute requiring a FUNCTION_DECL;
4505 arguments as in struct attribute_spec.handler. */
4507 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4508 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4510 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4512 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4514 *no_add_attrs
= true;
4520 /* Handle an "interrupt" or "isr" attribute;
4521 arguments as in struct attribute_spec.handler. */
4523 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
4528 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4530 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4532 *no_add_attrs
= true;
4534 /* FIXME: the argument if any is checked for type attributes;
4535 should it be checked for decl ones? */
4539 if (TREE_CODE (*node
) == FUNCTION_TYPE
4540 || TREE_CODE (*node
) == METHOD_TYPE
)
4542 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
4544 warning (OPT_Wattributes
, "%qE attribute ignored",
4546 *no_add_attrs
= true;
4549 else if (TREE_CODE (*node
) == POINTER_TYPE
4550 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
4551 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
4552 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
4554 *node
= build_variant_type_copy (*node
);
4555 TREE_TYPE (*node
) = build_type_attribute_variant
4557 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
4558 *no_add_attrs
= true;
4562 /* Possibly pass this attribute on from the type to a decl. */
4563 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
4564 | (int) ATTR_FLAG_FUNCTION_NEXT
4565 | (int) ATTR_FLAG_ARRAY_NEXT
))
4567 *no_add_attrs
= true;
4568 return tree_cons (name
, args
, NULL_TREE
);
4572 warning (OPT_Wattributes
, "%qE attribute ignored",
4581 /* Handle a "pcs" attribute; arguments as in struct
4582 attribute_spec.handler. */
4584 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
4585 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4587 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
4589 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
4590 *no_add_attrs
= true;
4595 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4596 /* Handle the "notshared" attribute. This attribute is another way of
4597 requesting hidden visibility. ARM's compiler supports
4598 "__declspec(notshared)"; we support the same thing via an
4602 arm_handle_notshared_attribute (tree
*node
,
4603 tree name ATTRIBUTE_UNUSED
,
4604 tree args ATTRIBUTE_UNUSED
,
4605 int flags ATTRIBUTE_UNUSED
,
4608 tree decl
= TYPE_NAME (*node
);
4612 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
4613 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
4614 *no_add_attrs
= false;
4620 /* Return 0 if the attributes for two types are incompatible, 1 if they
4621 are compatible, and 2 if they are nearly compatible (which causes a
4622 warning to be generated). */
4624 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
4628 /* Check for mismatch of non-default calling convention. */
4629 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
4632 /* Check for mismatched call attributes. */
4633 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4634 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4635 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4636 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4638 /* Only bother to check if an attribute is defined. */
4639 if (l1
| l2
| s1
| s2
)
4641 /* If one type has an attribute, the other must have the same attribute. */
4642 if ((l1
!= l2
) || (s1
!= s2
))
4645 /* Disallow mixed attributes. */
4646 if ((l1
& s2
) || (l2
& s1
))
4650 /* Check for mismatched ISR attribute. */
4651 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
4653 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
4654 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
4656 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
4663 /* Assigns default attributes to newly defined type. This is used to
4664 set short_call/long_call attributes for function types of
4665 functions defined inside corresponding #pragma scopes. */
4667 arm_set_default_type_attributes (tree type
)
4669 /* Add __attribute__ ((long_call)) to all functions, when
4670 inside #pragma long_calls or __attribute__ ((short_call)),
4671 when inside #pragma no_long_calls. */
4672 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
4674 tree type_attr_list
, attr_name
;
4675 type_attr_list
= TYPE_ATTRIBUTES (type
);
4677 if (arm_pragma_long_calls
== LONG
)
4678 attr_name
= get_identifier ("long_call");
4679 else if (arm_pragma_long_calls
== SHORT
)
4680 attr_name
= get_identifier ("short_call");
4684 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
4685 TYPE_ATTRIBUTES (type
) = type_attr_list
;
4689 /* Return true if DECL is known to be linked into section SECTION. */
4692 arm_function_in_section_p (tree decl
, section
*section
)
4694 /* We can only be certain about functions defined in the same
4695 compilation unit. */
4696 if (!TREE_STATIC (decl
))
4699 /* Make sure that SYMBOL always binds to the definition in this
4700 compilation unit. */
4701 if (!targetm
.binds_local_p (decl
))
4704 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4705 if (!DECL_SECTION_NAME (decl
))
4707 /* Make sure that we will not create a unique section for DECL. */
4708 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
4712 return function_section (decl
) == section
;
4715 /* Return nonzero if a 32-bit "long_call" should be generated for
4716 a call from the current function to DECL. We generate a long_call
4719 a. has an __attribute__((long call))
4720 or b. is within the scope of a #pragma long_calls
4721 or c. the -mlong-calls command line switch has been specified
4723 However we do not generate a long call if the function:
4725 d. has an __attribute__ ((short_call))
4726 or e. is inside the scope of a #pragma no_long_calls
4727 or f. is defined in the same section as the current function. */
4730 arm_is_long_call_p (tree decl
)
4735 return TARGET_LONG_CALLS
;
4737 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
4738 if (lookup_attribute ("short_call", attrs
))
4741 /* For "f", be conservative, and only cater for cases in which the
4742 whole of the current function is placed in the same section. */
4743 if (!flag_reorder_blocks_and_partition
4744 && TREE_CODE (decl
) == FUNCTION_DECL
4745 && arm_function_in_section_p (decl
, current_function_section ()))
4748 if (lookup_attribute ("long_call", attrs
))
4751 return TARGET_LONG_CALLS
;
4754 /* Return nonzero if it is ok to make a tail-call to DECL. */
4756 arm_function_ok_for_sibcall (tree decl
, tree exp
)
4758 unsigned long func_type
;
4760 if (cfun
->machine
->sibcall_blocked
)
4763 /* Never tailcall something for which we have no decl, or if we
4764 are generating code for Thumb-1. */
4765 if (decl
== NULL
|| TARGET_THUMB1
)
4768 /* The PIC register is live on entry to VxWorks PLT entries, so we
4769 must make the call before restoring the PIC register. */
4770 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
4773 /* Cannot tail-call to long calls, since these are out of range of
4774 a branch instruction. */
4775 if (arm_is_long_call_p (decl
))
4778 /* If we are interworking and the function is not declared static
4779 then we can't tail-call it unless we know that it exists in this
4780 compilation unit (since it might be a Thumb routine). */
4781 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
4784 func_type
= arm_current_func_type ();
4785 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4786 if (IS_INTERRUPT (func_type
))
4789 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4791 /* Check that the return value locations are the same. For
4792 example that we aren't returning a value from the sibling in
4793 a VFP register but then need to transfer it to a core
4797 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
4798 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4800 if (!rtx_equal_p (a
, b
))
4804 /* Never tailcall if function may be called with a misaligned SP. */
4805 if (IS_STACKALIGN (func_type
))
4808 /* Everything else is ok. */
4813 /* Addressing mode support functions. */
4815 /* Return nonzero if X is a legitimate immediate operand when compiling
4816 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4818 legitimate_pic_operand_p (rtx x
)
4820 if (GET_CODE (x
) == SYMBOL_REF
4821 || (GET_CODE (x
) == CONST
4822 && GET_CODE (XEXP (x
, 0)) == PLUS
4823 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
4829 /* Record that the current function needs a PIC register. Initialize
4830 cfun->machine->pic_reg if we have not already done so. */
4833 require_pic_register (void)
4835 /* A lot of the logic here is made obscure by the fact that this
4836 routine gets called as part of the rtx cost estimation process.
4837 We don't want those calls to affect any assumptions about the real
4838 function; and further, we can't call entry_of_function() until we
4839 start the real expansion process. */
4840 if (!crtl
->uses_pic_offset_table
)
4842 gcc_assert (can_create_pseudo_p ());
4843 if (arm_pic_register
!= INVALID_REGNUM
)
4845 if (!cfun
->machine
->pic_reg
)
4846 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
4848 /* Play games to avoid marking the function as needing pic
4849 if we are being called as part of the cost-estimation
4851 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
4852 crtl
->uses_pic_offset_table
= 1;
4858 if (!cfun
->machine
->pic_reg
)
4859 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
4861 /* Play games to avoid marking the function as needing pic
4862 if we are being called as part of the cost-estimation
4864 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
4866 crtl
->uses_pic_offset_table
= 1;
4869 arm_load_pic_register (0UL);
4873 /* We can be called during expansion of PHI nodes, where
4874 we can't yet emit instructions directly in the final
4875 insn stream. Queue the insns on the entry edge, they will
4876 be committed after everything else is expanded. */
4877 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
4884 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
4886 if (GET_CODE (orig
) == SYMBOL_REF
4887 || GET_CODE (orig
) == LABEL_REF
)
4889 rtx pic_ref
, address
;
4894 gcc_assert (can_create_pseudo_p ());
4895 reg
= gen_reg_rtx (Pmode
);
4896 address
= gen_reg_rtx (Pmode
);
4901 /* VxWorks does not impose a fixed gap between segments; the run-time
4902 gap can be different from the object-file gap. We therefore can't
4903 use GOTOFF unless we are absolutely sure that the symbol is in the
4904 same segment as the GOT. Unfortunately, the flexibility of linker
4905 scripts means that we can't be sure of that in general, so assume
4906 that GOTOFF is never valid on VxWorks. */
4907 if ((GET_CODE (orig
) == LABEL_REF
4908 || (GET_CODE (orig
) == SYMBOL_REF
&&
4909 SYMBOL_REF_LOCAL_P (orig
)))
4911 && !TARGET_VXWORKS_RTP
)
4912 insn
= arm_pic_static_addr (orig
, reg
);
4915 /* If this function doesn't have a pic register, create one now. */
4916 require_pic_register ();
4919 emit_insn (gen_pic_load_addr_32bit (address
, orig
));
4920 else /* TARGET_THUMB1 */
4921 emit_insn (gen_pic_load_addr_thumb1 (address
, orig
));
4923 pic_ref
= gen_const_mem (Pmode
,
4924 gen_rtx_PLUS (Pmode
, cfun
->machine
->pic_reg
,
4926 insn
= emit_move_insn (reg
, pic_ref
);
4929 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4931 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
4935 else if (GET_CODE (orig
) == CONST
)
4939 if (GET_CODE (XEXP (orig
, 0)) == PLUS
4940 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
4943 /* Handle the case where we have: const (UNSPEC_TLS). */
4944 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
4945 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
4948 /* Handle the case where we have:
4949 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4951 if (GET_CODE (XEXP (orig
, 0)) == PLUS
4952 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
4953 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
4955 gcc_assert (GET_CODE (XEXP (XEXP (orig
, 0), 1)) == CONST_INT
);
4961 gcc_assert (can_create_pseudo_p ());
4962 reg
= gen_reg_rtx (Pmode
);
4965 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
4967 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
4968 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
4969 base
== reg
? 0 : reg
);
4971 if (GET_CODE (offset
) == CONST_INT
)
4973 /* The base register doesn't really matter, we only want to
4974 test the index for the appropriate mode. */
4975 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
4977 gcc_assert (can_create_pseudo_p ());
4978 offset
= force_reg (Pmode
, offset
);
4981 if (GET_CODE (offset
) == CONST_INT
)
4982 return plus_constant (base
, INTVAL (offset
));
4985 if (GET_MODE_SIZE (mode
) > 4
4986 && (GET_MODE_CLASS (mode
) == MODE_INT
4987 || TARGET_SOFT_FLOAT
))
4989 emit_insn (gen_addsi3 (reg
, base
, offset
));
4993 return gen_rtx_PLUS (Pmode
, base
, offset
);
5000 /* Find a spare register to use during the prolog of a function. */
5003 thumb_find_work_register (unsigned long pushed_regs_mask
)
5007 /* Check the argument registers first as these are call-used. The
5008 register allocation order means that sometimes r3 might be used
5009 but earlier argument registers might not, so check them all. */
5010 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5011 if (!df_regs_ever_live_p (reg
))
5014 /* Before going on to check the call-saved registers we can try a couple
5015 more ways of deducing that r3 is available. The first is when we are
5016 pushing anonymous arguments onto the stack and we have less than 4
5017 registers worth of fixed arguments(*). In this case r3 will be part of
5018 the variable argument list and so we can be sure that it will be
5019 pushed right at the start of the function. Hence it will be available
5020 for the rest of the prologue.
5021 (*): ie crtl->args.pretend_args_size is greater than 0. */
5022 if (cfun
->machine
->uses_anonymous_args
5023 && crtl
->args
.pretend_args_size
> 0)
5024 return LAST_ARG_REGNUM
;
5026 /* The other case is when we have fixed arguments but less than 4 registers
5027 worth. In this case r3 might be used in the body of the function, but
5028 it is not being used to convey an argument into the function. In theory
5029 we could just check crtl->args.size to see how many bytes are
5030 being passed in argument registers, but it seems that it is unreliable.
5031 Sometimes it will have the value 0 when in fact arguments are being
5032 passed. (See testcase execute/20021111-1.c for an example). So we also
5033 check the args_info.nregs field as well. The problem with this field is
5034 that it makes no allowances for arguments that are passed to the
5035 function but which are not used. Hence we could miss an opportunity
5036 when a function has an unused argument in r3. But it is better to be
5037 safe than to be sorry. */
5038 if (! cfun
->machine
->uses_anonymous_args
5039 && crtl
->args
.size
>= 0
5040 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5041 && crtl
->args
.info
.nregs
< 4)
5042 return LAST_ARG_REGNUM
;
5044 /* Otherwise look for a call-saved register that is going to be pushed. */
5045 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5046 if (pushed_regs_mask
& (1 << reg
))
5051 /* Thumb-2 can use high regs. */
5052 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5053 if (pushed_regs_mask
& (1 << reg
))
5056 /* Something went wrong - thumb_compute_save_reg_mask()
5057 should have arranged for a suitable register to be pushed. */
5061 static GTY(()) int pic_labelno
;
5063 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5067 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5069 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5071 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5074 gcc_assert (flag_pic
);
5076 pic_reg
= cfun
->machine
->pic_reg
;
5077 if (TARGET_VXWORKS_RTP
)
5079 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5080 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5081 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5083 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5085 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5086 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5090 /* We use an UNSPEC rather than a LABEL_REF because this label
5091 never appears in the code stream. */
5093 labelno
= GEN_INT (pic_labelno
++);
5094 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5095 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5097 /* On the ARM the PC register contains 'dot + 8' at the time of the
5098 addition, on the Thumb it is 'dot + 4'. */
5099 pic_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5100 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5102 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5106 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5108 emit_insn (gen_pic_add_dot_plus_eight (pic_reg
, pic_reg
, labelno
));
5110 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5112 else /* TARGET_THUMB1 */
5114 if (arm_pic_register
!= INVALID_REGNUM
5115 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5117 /* We will have pushed the pic register, so we should always be
5118 able to find a work register. */
5119 pic_tmp
= gen_rtx_REG (SImode
,
5120 thumb_find_work_register (saved_regs
));
5121 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5122 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5125 emit_insn (gen_pic_load_addr_thumb1 (pic_reg
, pic_rtx
));
5126 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5130 /* Need to emit this whether or not we obey regdecls,
5131 since setjmp/longjmp can cause life info to screw up. */
5135 /* Generate code to load the address of a static var when flag_pic is set. */
5137 arm_pic_static_addr (rtx orig
, rtx reg
)
5139 rtx l1
, labelno
, offset_rtx
, insn
;
5141 gcc_assert (flag_pic
);
5143 /* We use an UNSPEC rather than a LABEL_REF because this label
5144 never appears in the code stream. */
5145 labelno
= GEN_INT (pic_labelno
++);
5146 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5147 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5149 /* On the ARM the PC register contains 'dot + 8' at the time of the
5150 addition, on the Thumb it is 'dot + 4'. */
5151 offset_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5152 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5153 UNSPEC_SYMBOL_OFFSET
);
5154 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5158 emit_insn (gen_pic_load_addr_32bit (reg
, offset_rtx
));
5160 insn
= emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5162 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5164 else /* TARGET_THUMB1 */
5166 emit_insn (gen_pic_load_addr_thumb1 (reg
, offset_rtx
));
5167 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5173 /* Return nonzero if X is valid as an ARM state addressing register. */
5175 arm_address_register_rtx_p (rtx x
, int strict_p
)
5179 if (GET_CODE (x
) != REG
)
5185 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5187 return (regno
<= LAST_ARM_REGNUM
5188 || regno
>= FIRST_PSEUDO_REGISTER
5189 || regno
== FRAME_POINTER_REGNUM
5190 || regno
== ARG_POINTER_REGNUM
);
5193 /* Return TRUE if this rtx is the difference of a symbol and a label,
5194 and will reduce to a PC-relative relocation in the object file.
5195 Expressions like this can be left alone when generating PIC, rather
5196 than forced through the GOT. */
5198 pcrel_constant_p (rtx x
)
5200 if (GET_CODE (x
) == MINUS
)
5201 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5206 /* Return nonzero if X is a valid ARM state address operand. */
5208 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5212 enum rtx_code code
= GET_CODE (x
);
5214 if (arm_address_register_rtx_p (x
, strict_p
))
5217 use_ldrd
= (TARGET_LDRD
5219 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5221 if (code
== POST_INC
|| code
== PRE_DEC
5222 || ((code
== PRE_INC
|| code
== POST_DEC
)
5223 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5224 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5226 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5227 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5228 && GET_CODE (XEXP (x
, 1)) == PLUS
5229 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5231 rtx addend
= XEXP (XEXP (x
, 1), 1);
5233 /* Don't allow ldrd post increment by register because it's hard
5234 to fixup invalid register choices. */
5236 && GET_CODE (x
) == POST_MODIFY
5237 && GET_CODE (addend
) == REG
)
5240 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5241 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5244 /* After reload constants split into minipools will have addresses
5245 from a LABEL_REF. */
5246 else if (reload_completed
5247 && (code
== LABEL_REF
5249 && GET_CODE (XEXP (x
, 0)) == PLUS
5250 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5251 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5254 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5257 else if (code
== PLUS
)
5259 rtx xop0
= XEXP (x
, 0);
5260 rtx xop1
= XEXP (x
, 1);
5262 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5263 && GET_CODE(xop1
) == CONST_INT
5264 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5265 || (arm_address_register_rtx_p (xop1
, strict_p
)
5266 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5270 /* Reload currently can't handle MINUS, so disable this for now */
5271 else if (GET_CODE (x
) == MINUS
)
5273 rtx xop0
= XEXP (x
, 0);
5274 rtx xop1
= XEXP (x
, 1);
5276 return (arm_address_register_rtx_p (xop0
, strict_p
)
5277 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5281 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5282 && code
== SYMBOL_REF
5283 && CONSTANT_POOL_ADDRESS_P (x
)
5285 && symbol_mentioned_p (get_pool_constant (x
))
5286 && ! pcrel_constant_p (get_pool_constant (x
))))
5292 /* Return nonzero if X is a valid Thumb-2 address operand. */
5294 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5297 enum rtx_code code
= GET_CODE (x
);
5299 if (arm_address_register_rtx_p (x
, strict_p
))
5302 use_ldrd
= (TARGET_LDRD
5304 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5306 if (code
== POST_INC
|| code
== PRE_DEC
5307 || ((code
== PRE_INC
|| code
== POST_DEC
)
5308 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5309 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5311 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5312 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5313 && GET_CODE (XEXP (x
, 1)) == PLUS
5314 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5316 /* Thumb-2 only has autoincrement by constant. */
5317 rtx addend
= XEXP (XEXP (x
, 1), 1);
5318 HOST_WIDE_INT offset
;
5320 if (GET_CODE (addend
) != CONST_INT
)
5323 offset
= INTVAL(addend
);
5324 if (GET_MODE_SIZE (mode
) <= 4)
5325 return (offset
> -256 && offset
< 256);
5327 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5328 && (offset
& 3) == 0);
5331 /* After reload constants split into minipools will have addresses
5332 from a LABEL_REF. */
5333 else if (reload_completed
5334 && (code
== LABEL_REF
5336 && GET_CODE (XEXP (x
, 0)) == PLUS
5337 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5338 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5341 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5344 else if (code
== PLUS
)
5346 rtx xop0
= XEXP (x
, 0);
5347 rtx xop1
= XEXP (x
, 1);
5349 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5350 && thumb2_legitimate_index_p (mode
, xop1
, strict_p
))
5351 || (arm_address_register_rtx_p (xop1
, strict_p
)
5352 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5355 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5356 && code
== SYMBOL_REF
5357 && CONSTANT_POOL_ADDRESS_P (x
)
5359 && symbol_mentioned_p (get_pool_constant (x
))
5360 && ! pcrel_constant_p (get_pool_constant (x
))))
5366 /* Return nonzero if INDEX is valid for an address index operand in
5369 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5372 HOST_WIDE_INT range
;
5373 enum rtx_code code
= GET_CODE (index
);
5375 /* Standard coprocessor addressing modes. */
5376 if (TARGET_HARD_FLOAT
5377 && (TARGET_FPA
|| TARGET_MAVERICK
)
5378 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
5379 || (TARGET_MAVERICK
&& mode
== DImode
)))
5380 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5381 && INTVAL (index
) > -1024
5382 && (INTVAL (index
) & 3) == 0);
5385 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
5386 return (code
== CONST_INT
5387 && INTVAL (index
) < 1016
5388 && INTVAL (index
) > -1024
5389 && (INTVAL (index
) & 3) == 0);
5391 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5392 return (code
== CONST_INT
5393 && INTVAL (index
) < 1024
5394 && INTVAL (index
) > -1024
5395 && (INTVAL (index
) & 3) == 0);
5397 if (arm_address_register_rtx_p (index
, strict_p
)
5398 && (GET_MODE_SIZE (mode
) <= 4))
5401 if (mode
== DImode
|| mode
== DFmode
)
5403 if (code
== CONST_INT
)
5405 HOST_WIDE_INT val
= INTVAL (index
);
5408 return val
> -256 && val
< 256;
5410 return val
> -4096 && val
< 4092;
5413 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5416 if (GET_MODE_SIZE (mode
) <= 4
5420 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5424 rtx xiop0
= XEXP (index
, 0);
5425 rtx xiop1
= XEXP (index
, 1);
5427 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5428 && power_of_two_operand (xiop1
, SImode
))
5429 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5430 && power_of_two_operand (xiop0
, SImode
)));
5432 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5433 || code
== ASHIFT
|| code
== ROTATERT
)
5435 rtx op
= XEXP (index
, 1);
5437 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5438 && GET_CODE (op
) == CONST_INT
5440 && INTVAL (op
) <= 31);
5444 /* For ARM v4 we may be doing a sign-extend operation during the
5450 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5456 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5458 return (code
== CONST_INT
5459 && INTVAL (index
) < range
5460 && INTVAL (index
) > -range
);
5463 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5464 index operand. i.e. 1, 2, 4 or 8. */
5466 thumb2_index_mul_operand (rtx op
)
5470 if (GET_CODE(op
) != CONST_INT
)
5474 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5477 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5479 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5481 enum rtx_code code
= GET_CODE (index
);
5483 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5484 /* Standard coprocessor addressing modes. */
5485 if (TARGET_HARD_FLOAT
5486 && (TARGET_FPA
|| TARGET_MAVERICK
)
5487 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
5488 || (TARGET_MAVERICK
&& mode
== DImode
)))
5489 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5490 && INTVAL (index
) > -1024
5491 && (INTVAL (index
) & 3) == 0);
5493 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5495 /* For DImode assume values will usually live in core regs
5496 and only allow LDRD addressing modes. */
5497 if (!TARGET_LDRD
|| mode
!= DImode
)
5498 return (code
== CONST_INT
5499 && INTVAL (index
) < 1024
5500 && INTVAL (index
) > -1024
5501 && (INTVAL (index
) & 3) == 0);
5505 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
5506 return (code
== CONST_INT
5507 && INTVAL (index
) < 1016
5508 && INTVAL (index
) > -1024
5509 && (INTVAL (index
) & 3) == 0);
5511 if (arm_address_register_rtx_p (index
, strict_p
)
5512 && (GET_MODE_SIZE (mode
) <= 4))
5515 if (mode
== DImode
|| mode
== DFmode
)
5517 if (code
== CONST_INT
)
5519 HOST_WIDE_INT val
= INTVAL (index
);
5520 /* ??? Can we assume ldrd for thumb2? */
5521 /* Thumb-2 ldrd only has reg+const addressing modes. */
5522 /* ldrd supports offsets of +-1020.
5523 However the ldr fallback does not. */
5524 return val
> -256 && val
< 256 && (val
& 3) == 0;
5532 rtx xiop0
= XEXP (index
, 0);
5533 rtx xiop1
= XEXP (index
, 1);
5535 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5536 && thumb2_index_mul_operand (xiop1
))
5537 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5538 && thumb2_index_mul_operand (xiop0
)));
5540 else if (code
== ASHIFT
)
5542 rtx op
= XEXP (index
, 1);
5544 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5545 && GET_CODE (op
) == CONST_INT
5547 && INTVAL (op
) <= 3);
5550 return (code
== CONST_INT
5551 && INTVAL (index
) < 4096
5552 && INTVAL (index
) > -256);
5555 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5557 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
5561 if (GET_CODE (x
) != REG
)
5567 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
5569 return (regno
<= LAST_LO_REGNUM
5570 || regno
> LAST_VIRTUAL_REGISTER
5571 || regno
== FRAME_POINTER_REGNUM
5572 || (GET_MODE_SIZE (mode
) >= 4
5573 && (regno
== STACK_POINTER_REGNUM
5574 || regno
>= FIRST_PSEUDO_REGISTER
5575 || x
== hard_frame_pointer_rtx
5576 || x
== arg_pointer_rtx
)));
5579 /* Return nonzero if x is a legitimate index register. This is the case
5580 for any base register that can access a QImode object. */
5582 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
5584 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
5587 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5589 The AP may be eliminated to either the SP or the FP, so we use the
5590 least common denominator, e.g. SImode, and offsets from 0 to 64.
5592 ??? Verify whether the above is the right approach.
5594 ??? Also, the FP may be eliminated to the SP, so perhaps that
5595 needs special handling also.
5597 ??? Look at how the mips16 port solves this problem. It probably uses
5598 better ways to solve some of these problems.
5600 Although it is not incorrect, we don't accept QImode and HImode
5601 addresses based on the frame pointer or arg pointer until the
5602 reload pass starts. This is so that eliminating such addresses
5603 into stack based ones won't produce impossible code. */
5605 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5607 /* ??? Not clear if this is right. Experiment. */
5608 if (GET_MODE_SIZE (mode
) < 4
5609 && !(reload_in_progress
|| reload_completed
)
5610 && (reg_mentioned_p (frame_pointer_rtx
, x
)
5611 || reg_mentioned_p (arg_pointer_rtx
, x
)
5612 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
5613 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
5614 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
5615 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
5618 /* Accept any base register. SP only in SImode or larger. */
5619 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
5622 /* This is PC relative data before arm_reorg runs. */
5623 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
5624 && GET_CODE (x
) == SYMBOL_REF
5625 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
5628 /* This is PC relative data after arm_reorg runs. */
5629 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
5631 && (GET_CODE (x
) == LABEL_REF
5632 || (GET_CODE (x
) == CONST
5633 && GET_CODE (XEXP (x
, 0)) == PLUS
5634 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5635 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5638 /* Post-inc indexing only supported for SImode and larger. */
5639 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
5640 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
5643 else if (GET_CODE (x
) == PLUS
)
5645 /* REG+REG address can be any two index registers. */
5646 /* We disallow FRAME+REG addressing since we know that FRAME
5647 will be replaced with STACK, and SP relative addressing only
5648 permits SP+OFFSET. */
5649 if (GET_MODE_SIZE (mode
) <= 4
5650 && XEXP (x
, 0) != frame_pointer_rtx
5651 && XEXP (x
, 1) != frame_pointer_rtx
5652 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5653 && thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
))
5656 /* REG+const has 5-7 bit offset for non-SP registers. */
5657 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5658 || XEXP (x
, 0) == arg_pointer_rtx
)
5659 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5660 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
5663 /* REG+const has 10-bit offset for SP, but only SImode and
5664 larger is supported. */
5665 /* ??? Should probably check for DI/DFmode overflow here
5666 just like GO_IF_LEGITIMATE_OFFSET does. */
5667 else if (GET_CODE (XEXP (x
, 0)) == REG
5668 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
5669 && GET_MODE_SIZE (mode
) >= 4
5670 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5671 && INTVAL (XEXP (x
, 1)) >= 0
5672 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
5673 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5676 else if (GET_CODE (XEXP (x
, 0)) == REG
5677 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
5678 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
5679 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
5680 && REGNO (XEXP (x
, 0)) <= LAST_VIRTUAL_REGISTER
))
5681 && GET_MODE_SIZE (mode
) >= 4
5682 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5683 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5687 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5688 && GET_MODE_SIZE (mode
) == 4
5689 && GET_CODE (x
) == SYMBOL_REF
5690 && CONSTANT_POOL_ADDRESS_P (x
)
5692 && symbol_mentioned_p (get_pool_constant (x
))
5693 && ! pcrel_constant_p (get_pool_constant (x
))))
5699 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5700 instruction of mode MODE. */
5702 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
5704 switch (GET_MODE_SIZE (mode
))
5707 return val
>= 0 && val
< 32;
5710 return val
>= 0 && val
< 64 && (val
& 1) == 0;
5714 && (val
+ GET_MODE_SIZE (mode
)) <= 128
5720 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
5723 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
5724 else if (TARGET_THUMB2
)
5725 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
5726 else /* if (TARGET_THUMB1) */
5727 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
5730 /* Build the SYMBOL_REF for __tls_get_addr. */
5732 static GTY(()) rtx tls_get_addr_libfunc
;
5735 get_tls_get_addr (void)
5737 if (!tls_get_addr_libfunc
)
5738 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
5739 return tls_get_addr_libfunc
;
5743 arm_load_tp (rtx target
)
5746 target
= gen_reg_rtx (SImode
);
5750 /* Can return in any reg. */
5751 emit_insn (gen_load_tp_hard (target
));
5755 /* Always returned in r0. Immediately copy the result into a pseudo,
5756 otherwise other uses of r0 (e.g. setting up function arguments) may
5757 clobber the value. */
5761 emit_insn (gen_load_tp_soft ());
5763 tmp
= gen_rtx_REG (SImode
, 0);
5764 emit_move_insn (target
, tmp
);
5770 load_tls_operand (rtx x
, rtx reg
)
5774 if (reg
== NULL_RTX
)
5775 reg
= gen_reg_rtx (SImode
);
5777 tmp
= gen_rtx_CONST (SImode
, x
);
5779 emit_move_insn (reg
, tmp
);
5785 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
5787 rtx insns
, label
, labelno
, sum
;
5791 labelno
= GEN_INT (pic_labelno
++);
5792 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5793 label
= gen_rtx_CONST (VOIDmode
, label
);
5795 sum
= gen_rtx_UNSPEC (Pmode
,
5796 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
5797 GEN_INT (TARGET_ARM
? 8 : 4)),
5799 reg
= load_tls_operand (sum
, reg
);
5802 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5803 else if (TARGET_THUMB2
)
5804 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5805 else /* TARGET_THUMB1 */
5806 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5808 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
, LCT_PURE
, /* LCT_CONST? */
5809 Pmode
, 1, reg
, Pmode
);
5811 insns
= get_insns ();
5818 legitimize_tls_address (rtx x
, rtx reg
)
5820 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
5821 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
5825 case TLS_MODEL_GLOBAL_DYNAMIC
:
5826 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
5827 dest
= gen_reg_rtx (Pmode
);
5828 emit_libcall_block (insns
, dest
, ret
, x
);
5831 case TLS_MODEL_LOCAL_DYNAMIC
:
5832 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
5834 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5835 share the LDM result with other LD model accesses. */
5836 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
5838 dest
= gen_reg_rtx (Pmode
);
5839 emit_libcall_block (insns
, dest
, ret
, eqv
);
5841 /* Load the addend. */
5842 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
, GEN_INT (TLS_LDO32
)),
5844 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
5845 return gen_rtx_PLUS (Pmode
, dest
, addend
);
5847 case TLS_MODEL_INITIAL_EXEC
:
5848 labelno
= GEN_INT (pic_labelno
++);
5849 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5850 label
= gen_rtx_CONST (VOIDmode
, label
);
5851 sum
= gen_rtx_UNSPEC (Pmode
,
5852 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
5853 GEN_INT (TARGET_ARM
? 8 : 4)),
5855 reg
= load_tls_operand (sum
, reg
);
5858 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
5859 else if (TARGET_THUMB2
)
5860 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
5863 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5864 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
5867 tp
= arm_load_tp (NULL_RTX
);
5869 return gen_rtx_PLUS (Pmode
, tp
, reg
);
5871 case TLS_MODEL_LOCAL_EXEC
:
5872 tp
= arm_load_tp (NULL_RTX
);
5874 reg
= gen_rtx_UNSPEC (Pmode
,
5875 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
5877 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
5879 return gen_rtx_PLUS (Pmode
, tp
, reg
);
5886 /* Try machine-dependent ways of modifying an illegitimate address
5887 to be legitimate. If we find one, return the new, valid address. */
5889 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
5893 /* TODO: legitimize_address for Thumb2. */
5896 return thumb_legitimize_address (x
, orig_x
, mode
);
5899 if (arm_tls_symbol_p (x
))
5900 return legitimize_tls_address (x
, NULL_RTX
);
5902 if (GET_CODE (x
) == PLUS
)
5904 rtx xop0
= XEXP (x
, 0);
5905 rtx xop1
= XEXP (x
, 1);
5907 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
5908 xop0
= force_reg (SImode
, xop0
);
5910 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
5911 xop1
= force_reg (SImode
, xop1
);
5913 if (ARM_BASE_REGISTER_RTX_P (xop0
)
5914 && GET_CODE (xop1
) == CONST_INT
)
5916 HOST_WIDE_INT n
, low_n
;
5920 /* VFP addressing modes actually allow greater offsets, but for
5921 now we just stick with the lowest common denominator. */
5923 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
5935 low_n
= ((mode
) == TImode
? 0
5936 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
5940 base_reg
= gen_reg_rtx (SImode
);
5941 val
= force_operand (plus_constant (xop0
, n
), NULL_RTX
);
5942 emit_move_insn (base_reg
, val
);
5943 x
= plus_constant (base_reg
, low_n
);
5945 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
5946 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
5949 /* XXX We don't allow MINUS any more -- see comment in
5950 arm_legitimate_address_outer_p (). */
5951 else if (GET_CODE (x
) == MINUS
)
5953 rtx xop0
= XEXP (x
, 0);
5954 rtx xop1
= XEXP (x
, 1);
5956 if (CONSTANT_P (xop0
))
5957 xop0
= force_reg (SImode
, xop0
);
5959 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
5960 xop1
= force_reg (SImode
, xop1
);
5962 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
5963 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
5966 /* Make sure to take full advantage of the pre-indexed addressing mode
5967 with absolute addresses which often allows for the base register to
5968 be factorized for multiple adjacent memory references, and it might
5969 even allows for the mini pool to be avoided entirely. */
5970 else if (GET_CODE (x
) == CONST_INT
&& optimize
> 0)
5973 HOST_WIDE_INT mask
, base
, index
;
5976 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5977 use a 8-bit index. So let's use a 12-bit index for SImode only and
5978 hope that arm_gen_constant will enable ldrb to use more bits. */
5979 bits
= (mode
== SImode
) ? 12 : 8;
5980 mask
= (1 << bits
) - 1;
5981 base
= INTVAL (x
) & ~mask
;
5982 index
= INTVAL (x
) & mask
;
5983 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
5985 /* It'll most probably be more efficient to generate the base
5986 with more bits set and use a negative index instead. */
5990 base_reg
= force_reg (SImode
, GEN_INT (base
));
5991 x
= plus_constant (base_reg
, index
);
5996 /* We need to find and carefully transform any SYMBOL and LABEL
5997 references; so go back to the original address expression. */
5998 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6000 if (new_x
!= orig_x
)
6008 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6009 to be legitimate. If we find one, return the new, valid address. */
6011 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6013 if (arm_tls_symbol_p (x
))
6014 return legitimize_tls_address (x
, NULL_RTX
);
6016 if (GET_CODE (x
) == PLUS
6017 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6018 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6019 || INTVAL (XEXP (x
, 1)) < 0))
6021 rtx xop0
= XEXP (x
, 0);
6022 rtx xop1
= XEXP (x
, 1);
6023 HOST_WIDE_INT offset
= INTVAL (xop1
);
6025 /* Try and fold the offset into a biasing of the base register and
6026 then offsetting that. Don't do this when optimizing for space
6027 since it can cause too many CSEs. */
6028 if (optimize_size
&& offset
>= 0
6029 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6031 HOST_WIDE_INT delta
;
6034 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6035 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6036 delta
= 31 * GET_MODE_SIZE (mode
);
6038 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6040 xop0
= force_operand (plus_constant (xop0
, offset
- delta
),
6042 x
= plus_constant (xop0
, delta
);
6044 else if (offset
< 0 && offset
> -256)
6045 /* Small negative offsets are best done with a subtract before the
6046 dereference, forcing these into a register normally takes two
6048 x
= force_operand (x
, NULL_RTX
);
6051 /* For the remaining cases, force the constant into a register. */
6052 xop1
= force_reg (SImode
, xop1
);
6053 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6056 else if (GET_CODE (x
) == PLUS
6057 && s_register_operand (XEXP (x
, 1), SImode
)
6058 && !s_register_operand (XEXP (x
, 0), SImode
))
6060 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6062 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6067 /* We need to find and carefully transform any SYMBOL and LABEL
6068 references; so go back to the original address expression. */
6069 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6071 if (new_x
!= orig_x
)
6079 thumb_legitimize_reload_address (rtx
*x_p
,
6080 enum machine_mode mode
,
6081 int opnum
, int type
,
6082 int ind_levels ATTRIBUTE_UNUSED
)
6086 if (GET_CODE (x
) == PLUS
6087 && GET_MODE_SIZE (mode
) < 4
6088 && REG_P (XEXP (x
, 0))
6089 && XEXP (x
, 0) == stack_pointer_rtx
6090 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6091 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6096 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6097 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6101 /* If both registers are hi-regs, then it's better to reload the
6102 entire expression rather than each register individually. That
6103 only requires one reload register rather than two. */
6104 if (GET_CODE (x
) == PLUS
6105 && REG_P (XEXP (x
, 0))
6106 && REG_P (XEXP (x
, 1))
6107 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6108 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6113 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6114 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6121 /* Test for various thread-local symbols. */
6123 /* Return TRUE if X is a thread-local symbol. */
6126 arm_tls_symbol_p (rtx x
)
6128 if (! TARGET_HAVE_TLS
)
6131 if (GET_CODE (x
) != SYMBOL_REF
)
6134 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6137 /* Helper for arm_tls_referenced_p. */
6140 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6142 if (GET_CODE (*x
) == SYMBOL_REF
)
6143 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6145 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6146 TLS offsets, not real symbol references. */
6147 if (GET_CODE (*x
) == UNSPEC
6148 && XINT (*x
, 1) == UNSPEC_TLS
)
6154 /* Return TRUE if X contains any TLS symbol references. */
6157 arm_tls_referenced_p (rtx x
)
6159 if (! TARGET_HAVE_TLS
)
6162 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6165 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6168 arm_cannot_force_const_mem (rtx x
)
6172 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6174 split_const (x
, &base
, &offset
);
6175 if (GET_CODE (base
) == SYMBOL_REF
6176 && !offset_within_block_p (base
, INTVAL (offset
)))
6179 return arm_tls_referenced_p (x
);
6182 #define REG_OR_SUBREG_REG(X) \
6183 (GET_CODE (X) == REG \
6184 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6186 #define REG_OR_SUBREG_RTX(X) \
6187 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6189 #ifndef COSTS_N_INSNS
6190 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6193 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6195 enum machine_mode mode
= GET_MODE (x
);
6208 return COSTS_N_INSNS (1);
6211 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6214 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
6221 return COSTS_N_INSNS (2) + cycles
;
6223 return COSTS_N_INSNS (1) + 16;
6226 return (COSTS_N_INSNS (1)
6227 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
6228 + GET_CODE (SET_DEST (x
)) == MEM
));
6233 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
6235 if (thumb_shiftable_const (INTVAL (x
)))
6236 return COSTS_N_INSNS (2);
6237 return COSTS_N_INSNS (3);
6239 else if ((outer
== PLUS
|| outer
== COMPARE
)
6240 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
6242 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
6243 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
6244 return COSTS_N_INSNS (1);
6245 else if (outer
== AND
)
6248 /* This duplicates the tests in the andsi3 expander. */
6249 for (i
= 9; i
<= 31; i
++)
6250 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
6251 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
6252 return COSTS_N_INSNS (2);
6254 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
6255 || outer
== LSHIFTRT
)
6257 return COSTS_N_INSNS (2);
6263 return COSTS_N_INSNS (3);
6281 /* XXX another guess. */
6282 /* Memory costs quite a lot for the first word, but subsequent words
6283 load at the equivalent of a single insn each. */
6284 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
6285 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
6290 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6295 /* XXX still guessing. */
6296 switch (GET_MODE (XEXP (x
, 0)))
6299 return (1 + (mode
== DImode
? 4 : 0)
6300 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
6303 return (4 + (mode
== DImode
? 4 : 0)
6304 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
6307 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
6319 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
6321 enum machine_mode mode
= GET_MODE (x
);
6322 enum rtx_code subcode
;
6324 enum rtx_code code
= GET_CODE (x
);
6330 /* Memory costs quite a lot for the first word, but subsequent words
6331 load at the equivalent of a single insn each. */
6332 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
6339 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
6340 *total
= COSTS_N_INSNS (2);
6341 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
6342 *total
= COSTS_N_INSNS (4);
6344 *total
= COSTS_N_INSNS (20);
6348 if (GET_CODE (XEXP (x
, 1)) == REG
)
6349 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
6350 else if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6351 *total
= rtx_cost (XEXP (x
, 1), code
, speed
);
6357 *total
+= COSTS_N_INSNS (4);
6362 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
6363 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6366 *total
+= COSTS_N_INSNS (3);
6370 *total
+= COSTS_N_INSNS (1);
6371 /* Increase the cost of complex shifts because they aren't any faster,
6372 and reduce dual issue opportunities. */
6373 if (arm_tune_cortex_a9
6374 && outer
!= SET
&& GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6382 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6384 if (TARGET_HARD_FLOAT
&& (mode
== SFmode
|| mode
== DFmode
))
6385 *total
= COSTS_N_INSNS (1);
6387 *total
= COSTS_N_INSNS (20);
6390 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
6391 /* Thumb2 does not have RSB, so all arguments must be
6392 registers (subtracting a constant is canonicalized as
6393 addition of the negated constant). */
6399 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
6400 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6401 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6403 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6407 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6408 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6410 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6417 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6419 if (TARGET_HARD_FLOAT
6421 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6423 *total
= COSTS_N_INSNS (1);
6424 if (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
6425 && arm_const_double_rtx (XEXP (x
, 0)))
6427 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6431 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6432 && arm_const_double_rtx (XEXP (x
, 1)))
6434 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6440 *total
= COSTS_N_INSNS (20);
6444 *total
= COSTS_N_INSNS (1);
6445 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6446 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6448 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6452 subcode
= GET_CODE (XEXP (x
, 1));
6453 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6454 || subcode
== LSHIFTRT
6455 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6457 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6458 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6462 /* A shift as a part of RSB costs no more than RSB itself. */
6463 if (GET_CODE (XEXP (x
, 0)) == MULT
6464 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6466 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, speed
);
6467 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6472 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
6474 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6475 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6479 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
6480 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
6482 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6483 if (GET_CODE (XEXP (XEXP (x
, 1), 0)) == REG
6484 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
6485 *total
+= COSTS_N_INSNS (1);
6493 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
6494 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
6495 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
6497 *total
= COSTS_N_INSNS (1);
6498 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
6500 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6504 /* MLA: All arguments must be registers. We filter out
6505 multiplication by a power of two, so that we fall down into
6507 if (GET_CODE (XEXP (x
, 0)) == MULT
6508 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6510 /* The cost comes from the cost of the multiply. */
6514 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6516 if (TARGET_HARD_FLOAT
6518 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6520 *total
= COSTS_N_INSNS (1);
6521 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6522 && arm_const_double_rtx (XEXP (x
, 1)))
6524 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6531 *total
= COSTS_N_INSNS (20);
6535 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
6536 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
6538 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, speed
);
6539 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
6540 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
6541 *total
+= COSTS_N_INSNS (1);
6547 case AND
: case XOR
: case IOR
:
6549 /* Normally the frame registers will be spilt into reg+const during
6550 reload, so it is a bad idea to combine them with other instructions,
6551 since then they might not be moved outside of loops. As a compromise
6552 we allow integration with ops that have a constant as their second
6554 if ((REG_OR_SUBREG_REG (XEXP (x
, 0))
6555 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
6556 && GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6557 || (REG_OR_SUBREG_REG (XEXP (x
, 0))
6558 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))))
6563 *total
+= COSTS_N_INSNS (2);
6564 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6565 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6567 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6574 *total
+= COSTS_N_INSNS (1);
6575 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6576 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6578 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6581 subcode
= GET_CODE (XEXP (x
, 0));
6582 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6583 || subcode
== LSHIFTRT
6584 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6586 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6587 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6592 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6594 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6595 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6599 if (subcode
== UMIN
|| subcode
== UMAX
6600 || subcode
== SMIN
|| subcode
== SMAX
)
6602 *total
= COSTS_N_INSNS (3);
6609 /* This should have been handled by the CPU specific routines. */
6613 if (arm_arch3m
&& mode
== SImode
6614 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6615 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6616 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
6617 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
6618 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6619 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
6621 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, speed
);
6624 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6628 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6630 if (TARGET_HARD_FLOAT
6632 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6634 *total
= COSTS_N_INSNS (1);
6637 *total
= COSTS_N_INSNS (2);
6643 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
6644 if (mode
== SImode
&& code
== NOT
)
6646 subcode
= GET_CODE (XEXP (x
, 0));
6647 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6648 || subcode
== LSHIFTRT
6649 || subcode
== ROTATE
|| subcode
== ROTATERT
6651 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
6653 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6654 /* Register shifts cost an extra cycle. */
6655 if (GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
6656 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
6665 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6667 *total
= COSTS_N_INSNS (4);
6671 operand
= XEXP (x
, 0);
6673 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
6674 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
6675 && GET_CODE (XEXP (operand
, 0)) == REG
6676 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
6677 *total
+= COSTS_N_INSNS (1);
6678 *total
+= (rtx_cost (XEXP (x
, 1), code
, speed
)
6679 + rtx_cost (XEXP (x
, 2), code
, speed
));
6683 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6685 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6691 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6692 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6694 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6700 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6701 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6703 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6723 /* SCC insns. In the case where the comparison has already been
6724 performed, then they cost 2 instructions. Otherwise they need
6725 an additional comparison before them. */
6726 *total
= COSTS_N_INSNS (2);
6727 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
6734 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
6740 *total
+= COSTS_N_INSNS (1);
6741 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6742 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6744 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6748 subcode
= GET_CODE (XEXP (x
, 0));
6749 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6750 || subcode
== LSHIFTRT
6751 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6753 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6754 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6759 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6761 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6762 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6772 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6773 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
6774 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6775 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6779 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6781 if (TARGET_HARD_FLOAT
6783 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6785 *total
= COSTS_N_INSNS (1);
6788 *total
= COSTS_N_INSNS (20);
6791 *total
= COSTS_N_INSNS (1);
6793 *total
+= COSTS_N_INSNS (3);
6797 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6801 *total
+= COSTS_N_INSNS (1);
6803 if (GET_MODE (XEXP (x
, 0)) != SImode
)
6807 if (GET_CODE (XEXP (x
, 0)) != MEM
)
6808 *total
+= COSTS_N_INSNS (1);
6810 else if (!arm_arch4
|| GET_CODE (XEXP (x
, 0)) != MEM
)
6811 *total
+= COSTS_N_INSNS (2);
6820 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6823 *total
+= COSTS_N_INSNS (1);
6825 if (GET_MODE (XEXP (x
, 0)) != SImode
)
6829 if (GET_CODE (XEXP (x
, 0)) != MEM
)
6830 *total
+= COSTS_N_INSNS (1);
6832 else if (!arm_arch4
|| GET_CODE (XEXP (x
, 0)) != MEM
)
6833 *total
+= COSTS_N_INSNS (GET_MODE (XEXP (x
, 0)) == QImode
?
6840 switch (GET_MODE (XEXP (x
, 0)))
6847 *total
= COSTS_N_INSNS (1);
6857 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6861 if (const_ok_for_arm (INTVAL (x
))
6862 || const_ok_for_arm (~INTVAL (x
)))
6863 *total
= COSTS_N_INSNS (1);
6865 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
6866 INTVAL (x
), NULL_RTX
,
6873 *total
= COSTS_N_INSNS (3);
6877 *total
= COSTS_N_INSNS (1);
6881 *total
= COSTS_N_INSNS (1);
6882 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6886 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
6887 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
6888 *total
= COSTS_N_INSNS (1);
6890 *total
= COSTS_N_INSNS (4);
6894 *total
= COSTS_N_INSNS (4);
6899 /* Estimates the size cost of thumb1 instructions.
6900 For now most of the code is copied from thumb1_rtx_costs. We need more
6901 fine grain tuning when we have more related test cases. */
6903 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6905 enum machine_mode mode
= GET_MODE (x
);
6918 return COSTS_N_INSNS (1);
6921 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6923 /* Thumb1 mul instruction can't operate on const. We must Load it
6924 into a register first. */
6925 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
6926 return COSTS_N_INSNS (1) + const_size
;
6928 return COSTS_N_INSNS (1);
6931 return (COSTS_N_INSNS (1)
6932 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
6933 + GET_CODE (SET_DEST (x
)) == MEM
));
6938 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
6940 if (thumb_shiftable_const (INTVAL (x
)))
6941 return COSTS_N_INSNS (2);
6942 return COSTS_N_INSNS (3);
6944 else if ((outer
== PLUS
|| outer
== COMPARE
)
6945 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
6947 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
6948 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
6949 return COSTS_N_INSNS (1);
6950 else if (outer
== AND
)
6953 /* This duplicates the tests in the andsi3 expander. */
6954 for (i
= 9; i
<= 31; i
++)
6955 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
6956 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
6957 return COSTS_N_INSNS (2);
6959 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
6960 || outer
== LSHIFTRT
)
6962 return COSTS_N_INSNS (2);
6968 return COSTS_N_INSNS (3);
6986 /* XXX another guess. */
6987 /* Memory costs quite a lot for the first word, but subsequent words
6988 load at the equivalent of a single insn each. */
6989 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
6990 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
6995 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7000 /* XXX still guessing. */
7001 switch (GET_MODE (XEXP (x
, 0)))
7004 return (1 + (mode
== DImode
? 4 : 0)
7005 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7008 return (4 + (mode
== DImode
? 4 : 0)
7009 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7012 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7023 /* RTX costs when optimizing for size. */
7025 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7028 enum machine_mode mode
= GET_MODE (x
);
7031 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7035 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7039 /* A memory access costs 1 insn if the mode is small, or the address is
7040 a single register, otherwise it costs one insn per word. */
7041 if (REG_P (XEXP (x
, 0)))
7042 *total
= COSTS_N_INSNS (1);
7044 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7051 /* Needs a libcall, so it costs about this. */
7052 *total
= COSTS_N_INSNS (2);
7056 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
7058 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, false);
7066 if (mode
== DImode
&& GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7068 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, false);
7071 else if (mode
== SImode
)
7073 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, false);
7074 /* Slightly disparage register shifts, but not by much. */
7075 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7076 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, false);
7080 /* Needs a libcall. */
7081 *total
= COSTS_N_INSNS (2);
7085 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7086 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7088 *total
= COSTS_N_INSNS (1);
7094 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7095 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7097 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7098 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7099 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7100 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7101 || subcode1
== ASHIFTRT
)
7103 /* It's just the cost of the two operands. */
7108 *total
= COSTS_N_INSNS (1);
7112 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7116 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7117 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7119 *total
= COSTS_N_INSNS (1);
7123 /* A shift as a part of ADD costs nothing. */
7124 if (GET_CODE (XEXP (x
, 0)) == MULT
7125 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7127 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7128 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, false);
7129 *total
+= rtx_cost (XEXP (x
, 1), code
, false);
7134 case AND
: case XOR
: case IOR
:
7137 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7139 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7140 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7141 || (code
== AND
&& subcode
== NOT
))
7143 /* It's just the cost of the two operands. */
7149 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7153 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7157 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7158 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7160 *total
= COSTS_N_INSNS (1);
7166 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7175 if (cc_register (XEXP (x
, 0), VOIDmode
))
7178 *total
= COSTS_N_INSNS (1);
7182 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7183 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7184 *total
= COSTS_N_INSNS (1);
7186 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
7191 if (GET_MODE_SIZE (GET_MODE (XEXP (x
, 0))) < 4)
7193 if (!(arm_arch4
&& MEM_P (XEXP (x
, 0))))
7194 *total
+= COSTS_N_INSNS (arm_arch6
? 1 : 2);
7197 *total
+= COSTS_N_INSNS (1);
7202 if (!(arm_arch4
&& MEM_P (XEXP (x
, 0))))
7204 switch (GET_MODE (XEXP (x
, 0)))
7207 *total
+= COSTS_N_INSNS (1);
7211 *total
+= COSTS_N_INSNS (arm_arch6
? 1 : 2);
7217 *total
+= COSTS_N_INSNS (2);
7222 *total
+= COSTS_N_INSNS (1);
7227 if (const_ok_for_arm (INTVAL (x
)))
7228 /* A multiplication by a constant requires another instruction
7229 to load the constant to a register. */
7230 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
7232 else if (const_ok_for_arm (~INTVAL (x
)))
7233 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
7234 else if (const_ok_for_arm (-INTVAL (x
)))
7236 if (outer_code
== COMPARE
|| outer_code
== PLUS
7237 || outer_code
== MINUS
)
7240 *total
= COSTS_N_INSNS (1);
7243 *total
= COSTS_N_INSNS (2);
7249 *total
= COSTS_N_INSNS (2);
7253 *total
= COSTS_N_INSNS (4);
7258 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7259 cost of these slightly. */
7260 *total
= COSTS_N_INSNS (1) + 1;
7264 if (mode
!= VOIDmode
)
7265 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7267 *total
= COSTS_N_INSNS (4); /* How knows? */
7272 /* RTX costs when optimizing for size. */
7274 arm_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
7278 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
7279 (enum rtx_code
) outer_code
, total
);
7281 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
7282 (enum rtx_code
) outer_code
,
7286 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7287 supported on any "slowmul" cores, so it can be ignored. */
7290 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7291 int *total
, bool speed
)
7293 enum machine_mode mode
= GET_MODE (x
);
7297 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7304 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
7307 *total
= COSTS_N_INSNS (20);
7311 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7313 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7314 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7315 int cost
, const_ok
= const_ok_for_arm (i
);
7316 int j
, booth_unit_size
;
7318 /* Tune as appropriate. */
7319 cost
= const_ok
? 4 : 8;
7320 booth_unit_size
= 2;
7321 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7323 i
>>= booth_unit_size
;
7327 *total
= COSTS_N_INSNS (cost
);
7328 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7332 *total
= COSTS_N_INSNS (20);
7336 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
7341 /* RTX cost for cores with a fast multiply unit (M variants). */
7344 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7345 int *total
, bool speed
)
7347 enum machine_mode mode
= GET_MODE (x
);
7351 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7355 /* ??? should thumb2 use different costs? */
7359 /* There is no point basing this on the tuning, since it is always the
7360 fast variant if it exists at all. */
7362 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7363 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7364 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7366 *total
= COSTS_N_INSNS(2);
7373 *total
= COSTS_N_INSNS (5);
7377 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7379 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7380 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7381 int cost
, const_ok
= const_ok_for_arm (i
);
7382 int j
, booth_unit_size
;
7384 /* Tune as appropriate. */
7385 cost
= const_ok
? 4 : 8;
7386 booth_unit_size
= 8;
7387 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7389 i
>>= booth_unit_size
;
7393 *total
= COSTS_N_INSNS(cost
);
7399 *total
= COSTS_N_INSNS (4);
7403 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7405 if (TARGET_HARD_FLOAT
7407 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7409 *total
= COSTS_N_INSNS (1);
7414 /* Requires a lib call */
7415 *total
= COSTS_N_INSNS (20);
7419 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7424 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7425 so it can be ignored. */
7428 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7429 int *total
, bool speed
)
7431 enum machine_mode mode
= GET_MODE (x
);
7435 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7442 if (GET_CODE (XEXP (x
, 0)) != MULT
)
7443 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7445 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7446 will stall until the multiplication is complete. */
7447 *total
= COSTS_N_INSNS (3);
7451 /* There is no point basing this on the tuning, since it is always the
7452 fast variant if it exists at all. */
7454 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7455 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7456 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7458 *total
= COSTS_N_INSNS (2);
7465 *total
= COSTS_N_INSNS (5);
7469 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7471 /* If operand 1 is a constant we can more accurately
7472 calculate the cost of the multiply. The multiplier can
7473 retire 15 bits on the first cycle and a further 12 on the
7474 second. We do, of course, have to load the constant into
7475 a register first. */
7476 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7477 /* There's a general overhead of one cycle. */
7479 unsigned HOST_WIDE_INT masked_const
;
7484 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
7486 masked_const
= i
& 0xffff8000;
7487 if (masked_const
!= 0)
7490 masked_const
= i
& 0xf8000000;
7491 if (masked_const
!= 0)
7494 *total
= COSTS_N_INSNS (cost
);
7500 *total
= COSTS_N_INSNS (3);
7504 /* Requires a lib call */
7505 *total
= COSTS_N_INSNS (20);
7509 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7514 /* RTX costs for 9e (and later) cores. */
7517 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7518 int *total
, bool speed
)
7520 enum machine_mode mode
= GET_MODE (x
);
7527 *total
= COSTS_N_INSNS (3);
7531 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7539 /* There is no point basing this on the tuning, since it is always the
7540 fast variant if it exists at all. */
7542 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7543 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7544 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7546 *total
= COSTS_N_INSNS (2);
7553 *total
= COSTS_N_INSNS (5);
7559 *total
= COSTS_N_INSNS (2);
7563 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7565 if (TARGET_HARD_FLOAT
7567 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7569 *total
= COSTS_N_INSNS (1);
7574 *total
= COSTS_N_INSNS (20);
7578 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7581 /* All address computations that can be done are free, but rtx cost returns
7582 the same for practically all of them. So we weight the different types
7583 of address here in the order (most pref first):
7584 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7586 arm_arm_address_cost (rtx x
)
7588 enum rtx_code c
= GET_CODE (x
);
7590 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
7592 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
7597 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7600 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
7610 arm_thumb_address_cost (rtx x
)
7612 enum rtx_code c
= GET_CODE (x
);
7617 && GET_CODE (XEXP (x
, 0)) == REG
7618 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7625 arm_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
7627 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
7631 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
7635 /* Some true dependencies can have a higher cost depending
7636 on precisely how certain input operands are used. */
7638 && REG_NOTE_KIND (link
) == 0
7639 && recog_memoized (insn
) >= 0
7640 && recog_memoized (dep
) >= 0)
7642 int shift_opnum
= get_attr_shift (insn
);
7643 enum attr_type attr_type
= get_attr_type (dep
);
7645 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7646 operand for INSN. If we have a shifted input operand and the
7647 instruction we depend on is another ALU instruction, then we may
7648 have to account for an additional stall. */
7649 if (shift_opnum
!= 0
7650 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
7652 rtx shifted_operand
;
7655 /* Get the shifted operand. */
7656 extract_insn (insn
);
7657 shifted_operand
= recog_data
.operand
[shift_opnum
];
7659 /* Iterate over all the operands in DEP. If we write an operand
7660 that overlaps with SHIFTED_OPERAND, then we have increase the
7661 cost of this dependency. */
7663 preprocess_constraints ();
7664 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
7666 /* We can ignore strict inputs. */
7667 if (recog_data
.operand_type
[opno
] == OP_IN
)
7670 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
7677 /* XXX This is not strictly true for the FPA. */
7678 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
7679 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7682 /* Call insns don't incur a stall, even if they follow a load. */
7683 if (REG_NOTE_KIND (link
) == 0
7684 && GET_CODE (insn
) == CALL_INSN
)
7687 if ((i_pat
= single_set (insn
)) != NULL
7688 && GET_CODE (SET_SRC (i_pat
)) == MEM
7689 && (d_pat
= single_set (dep
)) != NULL
7690 && GET_CODE (SET_DEST (d_pat
)) == MEM
)
7692 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
7693 /* This is a load after a store, there is no conflict if the load reads
7694 from a cached area. Assume that loads from the stack, and from the
7695 constant pool are cached, and that others will miss. This is a
7698 if ((GET_CODE (src_mem
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (src_mem
))
7699 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
7700 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
7701 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
7708 static int fp_consts_inited
= 0;
7710 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7711 static const char * const strings_fp
[8] =
7714 "4", "5", "0.5", "10"
7717 static REAL_VALUE_TYPE values_fp
[8];
7720 init_fp_table (void)
7726 fp_consts_inited
= 1;
7728 fp_consts_inited
= 8;
7730 for (i
= 0; i
< fp_consts_inited
; i
++)
7732 r
= REAL_VALUE_ATOF (strings_fp
[i
], DFmode
);
7737 /* Return TRUE if rtx X is a valid immediate FP constant. */
7739 arm_const_double_rtx (rtx x
)
7744 if (!fp_consts_inited
)
7747 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7748 if (REAL_VALUE_MINUS_ZERO (r
))
7751 for (i
= 0; i
< fp_consts_inited
; i
++)
7752 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
7758 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7760 neg_const_double_rtx_ok_for_fpa (rtx x
)
7765 if (!fp_consts_inited
)
7768 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7769 r
= real_value_negate (&r
);
7770 if (REAL_VALUE_MINUS_ZERO (r
))
7773 for (i
= 0; i
< 8; i
++)
7774 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
7781 /* VFPv3 has a fairly wide range of representable immediates, formed from
7782 "quarter-precision" floating-point values. These can be evaluated using this
7783 formula (with ^ for exponentiation):
7787 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7788 16 <= n <= 31 and 0 <= r <= 7.
7790 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7792 - A (most-significant) is the sign bit.
7793 - BCD are the exponent (encoded as r XOR 3).
7794 - EFGH are the mantissa (encoded as n - 16).
7797 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7798 fconst[sd] instruction, or -1 if X isn't suitable. */
7800 vfp3_const_double_index (rtx x
)
7802 REAL_VALUE_TYPE r
, m
;
7804 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
7805 unsigned HOST_WIDE_INT mask
;
7806 HOST_WIDE_INT m1
, m2
;
7807 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7809 if (!TARGET_VFP3
|| GET_CODE (x
) != CONST_DOUBLE
)
7812 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7814 /* We can't represent these things, so detect them first. */
7815 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
7818 /* Extract sign, exponent and mantissa. */
7819 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
7820 r
= real_value_abs (&r
);
7821 exponent
= REAL_EXP (&r
);
7822 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7823 highest (sign) bit, with a fixed binary point at bit point_pos.
7824 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7825 bits for the mantissa, this may fail (low bits would be lost). */
7826 real_ldexp (&m
, &r
, point_pos
- exponent
);
7827 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
7831 /* If there are bits set in the low part of the mantissa, we can't
7832 represent this value. */
7836 /* Now make it so that mantissa contains the most-significant bits, and move
7837 the point_pos to indicate that the least-significant bits have been
7839 point_pos
-= HOST_BITS_PER_WIDE_INT
;
7842 /* We can permit four significant bits of mantissa only, plus a high bit
7843 which is always 1. */
7844 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
7845 if ((mantissa
& mask
) != 0)
7848 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7849 mantissa
>>= point_pos
- 5;
7851 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7852 floating-point immediate zero with Neon using an integer-zero load, but
7853 that case is handled elsewhere.) */
7857 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
7859 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7860 normalized significands are in the range [1, 2). (Our mantissa is shifted
7861 left 4 places at this point relative to normalized IEEE754 values). GCC
7862 internally uses [0.5, 1) (see real.c), so the exponent returned from
7863 REAL_EXP must be altered. */
7864 exponent
= 5 - exponent
;
7866 if (exponent
< 0 || exponent
> 7)
7869 /* Sign, mantissa and exponent are now in the correct form to plug into the
7870 formula described in the comment above. */
7871 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
7874 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7876 vfp3_const_double_rtx (rtx x
)
7881 return vfp3_const_double_index (x
) != -1;
7884 /* Recognize immediates which can be used in various Neon instructions. Legal
7885 immediates are described by the following table (for VMVN variants, the
7886 bitwise inverse of the constant shown is recognized. In either case, VMOV
7887 is output and the correct instruction to use for a given constant is chosen
7888 by the assembler). The constant shown is replicated across all elements of
7889 the destination vector.
7891 insn elems variant constant (binary)
7892 ---- ----- ------- -----------------
7893 vmov i32 0 00000000 00000000 00000000 abcdefgh
7894 vmov i32 1 00000000 00000000 abcdefgh 00000000
7895 vmov i32 2 00000000 abcdefgh 00000000 00000000
7896 vmov i32 3 abcdefgh 00000000 00000000 00000000
7897 vmov i16 4 00000000 abcdefgh
7898 vmov i16 5 abcdefgh 00000000
7899 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7900 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7901 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7902 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7903 vmvn i16 10 00000000 abcdefgh
7904 vmvn i16 11 abcdefgh 00000000
7905 vmov i32 12 00000000 00000000 abcdefgh 11111111
7906 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7907 vmov i32 14 00000000 abcdefgh 11111111 11111111
7908 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7910 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7911 eeeeeeee ffffffff gggggggg hhhhhhhh
7912 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7914 For case 18, B = !b. Representable values are exactly those accepted by
7915 vfp3_const_double_index, but are output as floating-point numbers rather
7918 Variants 0-5 (inclusive) may also be used as immediates for the second
7919 operand of VORR/VBIC instructions.
7921 The INVERSE argument causes the bitwise inverse of the given operand to be
7922 recognized instead (used for recognizing legal immediates for the VAND/VORN
7923 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7924 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7925 output, rather than the real insns vbic/vorr).
7927 INVERSE makes no difference to the recognition of float vectors.
7929 The return value is the variant of immediate as shown in the above table, or
7930 -1 if the given value doesn't match any of the listed patterns.
7933 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
7934 rtx
*modconst
, int *elementwidth
)
7936 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7938 for (i = 0; i < idx; i += (STRIDE)) \
7943 immtype = (CLASS); \
7944 elsize = (ELSIZE); \
7948 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
7949 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
7950 unsigned char bytes
[16];
7951 int immtype
= -1, matches
;
7952 unsigned int invmask
= inverse
? 0xff : 0;
7954 /* Vectors of float constants. */
7955 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
7957 rtx el0
= CONST_VECTOR_ELT (op
, 0);
7960 if (!vfp3_const_double_rtx (el0
))
7963 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
7965 for (i
= 1; i
< n_elts
; i
++)
7967 rtx elt
= CONST_VECTOR_ELT (op
, i
);
7970 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
7972 if (!REAL_VALUES_EQUAL (r0
, re
))
7977 *modconst
= CONST_VECTOR_ELT (op
, 0);
7985 /* Splat vector constant out into a byte vector. */
7986 for (i
= 0; i
< n_elts
; i
++)
7988 rtx el
= CONST_VECTOR_ELT (op
, i
);
7989 unsigned HOST_WIDE_INT elpart
;
7990 unsigned int part
, parts
;
7992 if (GET_CODE (el
) == CONST_INT
)
7994 elpart
= INTVAL (el
);
7997 else if (GET_CODE (el
) == CONST_DOUBLE
)
7999 elpart
= CONST_DOUBLE_LOW (el
);
8005 for (part
= 0; part
< parts
; part
++)
8008 for (byte
= 0; byte
< innersize
; byte
++)
8010 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8011 elpart
>>= BITS_PER_UNIT
;
8013 if (GET_CODE (el
) == CONST_DOUBLE
)
8014 elpart
= CONST_DOUBLE_HIGH (el
);
8019 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8023 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8024 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8026 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8027 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8029 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8030 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8032 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8033 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
8035 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
8037 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
8039 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8040 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8042 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8043 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8045 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8046 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8048 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8049 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
8051 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
8053 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
8055 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8056 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8058 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8059 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8061 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8062 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8064 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8065 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8067 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
8069 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8070 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
8078 *elementwidth
= elsize
;
8082 unsigned HOST_WIDE_INT imm
= 0;
8084 /* Un-invert bytes of recognized vector, if necessary. */
8086 for (i
= 0; i
< idx
; i
++)
8087 bytes
[i
] ^= invmask
;
8091 /* FIXME: Broken on 32-bit H_W_I hosts. */
8092 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8094 for (i
= 0; i
< 8; i
++)
8095 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8096 << (i
* BITS_PER_UNIT
);
8098 *modconst
= GEN_INT (imm
);
8102 unsigned HOST_WIDE_INT imm
= 0;
8104 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8105 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8107 *modconst
= GEN_INT (imm
);
8115 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8116 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8117 float elements), and a modified constant (whatever should be output for a
8118 VMOV) in *MODCONST. */
8121 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
8122 rtx
*modconst
, int *elementwidth
)
8126 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
8132 *modconst
= tmpconst
;
8135 *elementwidth
= tmpwidth
;
8140 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8141 the immediate is valid, write a constant suitable for using as an operand
8142 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8143 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8146 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
8147 rtx
*modconst
, int *elementwidth
)
8151 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
8153 if (retval
< 0 || retval
> 5)
8157 *modconst
= tmpconst
;
8160 *elementwidth
= tmpwidth
;
8165 /* Return a string suitable for output of Neon immediate logic operation
8169 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
8170 int inverse
, int quad
)
8172 int width
, is_valid
;
8173 static char templ
[40];
8175 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
8177 gcc_assert (is_valid
!= 0);
8180 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
8182 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
8187 /* Output a sequence of pairwise operations to implement a reduction.
8188 NOTE: We do "too much work" here, because pairwise operations work on two
8189 registers-worth of operands in one go. Unfortunately we can't exploit those
8190 extra calculations to do the full operation in fewer steps, I don't think.
8191 Although all vector elements of the result but the first are ignored, we
8192 actually calculate the same result in each of the elements. An alternative
8193 such as initially loading a vector with zero to use as each of the second
8194 operands would use up an additional register and take an extra instruction,
8195 for no particular gain. */
8198 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
8199 rtx (*reduc
) (rtx
, rtx
, rtx
))
8201 enum machine_mode inner
= GET_MODE_INNER (mode
);
8202 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
8205 for (i
= parts
/ 2; i
>= 1; i
/= 2)
8207 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
8208 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
8213 /* If VALS is a vector constant that can be loaded into a register
8214 using VDUP, generate instructions to do so and return an RTX to
8215 assign to the register. Otherwise return NULL_RTX. */
8218 neon_vdup_constant (rtx vals
)
8220 enum machine_mode mode
= GET_MODE (vals
);
8221 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8222 int n_elts
= GET_MODE_NUNITS (mode
);
8223 bool all_same
= true;
8227 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
8230 for (i
= 0; i
< n_elts
; ++i
)
8232 x
= XVECEXP (vals
, 0, i
);
8233 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8238 /* The elements are not all the same. We could handle repeating
8239 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8240 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8244 /* We can load this constant by using VDUP and a constant in a
8245 single ARM register. This will be cheaper than a vector
8248 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8249 return gen_rtx_UNSPEC (mode
, gen_rtvec (1, x
),
8253 /* Generate code to load VALS, which is a PARALLEL containing only
8254 constants (for vec_init) or CONST_VECTOR, efficiently into a
8255 register. Returns an RTX to copy into the register, or NULL_RTX
8256 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8259 neon_make_constant (rtx vals
)
8261 enum machine_mode mode
= GET_MODE (vals
);
8263 rtx const_vec
= NULL_RTX
;
8264 int n_elts
= GET_MODE_NUNITS (mode
);
8268 if (GET_CODE (vals
) == CONST_VECTOR
)
8270 else if (GET_CODE (vals
) == PARALLEL
)
8272 /* A CONST_VECTOR must contain only CONST_INTs and
8273 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8274 Only store valid constants in a CONST_VECTOR. */
8275 for (i
= 0; i
< n_elts
; ++i
)
8277 rtx x
= XVECEXP (vals
, 0, i
);
8278 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8281 if (n_const
== n_elts
)
8282 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8287 if (const_vec
!= NULL
8288 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
8289 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8291 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
8292 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8293 pipeline cycle; creating the constant takes one or two ARM
8296 else if (const_vec
!= NULL_RTX
)
8297 /* Load from constant pool. On Cortex-A8 this takes two cycles
8298 (for either double or quad vectors). We can not take advantage
8299 of single-cycle VLD1 because we need a PC-relative addressing
8303 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8304 We can not construct an initializer. */
8308 /* Initialize vector TARGET to VALS. */
8311 neon_expand_vector_init (rtx target
, rtx vals
)
8313 enum machine_mode mode
= GET_MODE (target
);
8314 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8315 int n_elts
= GET_MODE_NUNITS (mode
);
8316 int n_var
= 0, one_var
= -1;
8317 bool all_same
= true;
8321 for (i
= 0; i
< n_elts
; ++i
)
8323 x
= XVECEXP (vals
, 0, i
);
8324 if (!CONSTANT_P (x
))
8325 ++n_var
, one_var
= i
;
8327 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8333 rtx constant
= neon_make_constant (vals
);
8334 if (constant
!= NULL_RTX
)
8336 emit_move_insn (target
, constant
);
8341 /* Splat a single non-constant element if we can. */
8342 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
8344 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8345 emit_insn (gen_rtx_SET (VOIDmode
, target
,
8346 gen_rtx_UNSPEC (mode
, gen_rtvec (1, x
),
8351 /* One field is non-constant. Load constant then overwrite varying
8352 field. This is more efficient than using the stack. */
8355 rtx copy
= copy_rtx (vals
);
8358 /* Load constant part of vector, substitute neighboring value for
8360 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
8361 neon_expand_vector_init (target
, copy
);
8363 /* Insert variable. */
8364 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8365 ops
= gen_rtvec (3, x
, target
, GEN_INT (one_var
));
8366 emit_insn (gen_rtx_SET (VOIDmode
, target
,
8367 gen_rtx_UNSPEC (mode
, ops
, UNSPEC_VSET_LANE
)));
8371 /* Construct the vector in memory one field at a time
8372 and load the whole vector. */
8373 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), 0);
8374 for (i
= 0; i
< n_elts
; i
++)
8375 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8376 i
* GET_MODE_SIZE (inner_mode
)),
8377 XVECEXP (vals
, 0, i
));
8378 emit_move_insn (target
, mem
);
8381 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8382 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8383 reported source locations are bogus. */
8386 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8391 gcc_assert (GET_CODE (operand
) == CONST_INT
);
8393 lane
= INTVAL (operand
);
8395 if (lane
< low
|| lane
>= high
)
8399 /* Bounds-check lanes. */
8402 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8404 bounds_check (operand
, low
, high
, "lane out of range");
8407 /* Bounds-check constants. */
8410 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8412 bounds_check (operand
, low
, high
, "constant out of range");
8416 neon_element_bits (enum machine_mode mode
)
8419 return GET_MODE_BITSIZE (mode
);
8421 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
8425 /* Predicates for `match_operand' and `match_operator'. */
8427 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8429 cirrus_memory_offset (rtx op
)
8431 /* Reject eliminable registers. */
8432 if (! (reload_in_progress
|| reload_completed
)
8433 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8434 || reg_mentioned_p (arg_pointer_rtx
, op
)
8435 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8436 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8437 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8438 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8441 if (GET_CODE (op
) == MEM
)
8447 /* Match: (mem (reg)). */
8448 if (GET_CODE (ind
) == REG
)
8454 if (GET_CODE (ind
) == PLUS
8455 && GET_CODE (XEXP (ind
, 0)) == REG
8456 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8457 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
)
8464 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8465 WB is true if full writeback address modes are allowed and is false
8466 if limited writeback address modes (POST_INC and PRE_DEC) are
8470 arm_coproc_mem_operand (rtx op
, bool wb
)
8474 /* Reject eliminable registers. */
8475 if (! (reload_in_progress
|| reload_completed
)
8476 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8477 || reg_mentioned_p (arg_pointer_rtx
, op
)
8478 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8479 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8480 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8481 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8484 /* Constants are converted into offsets from labels. */
8485 if (GET_CODE (op
) != MEM
)
8490 if (reload_completed
8491 && (GET_CODE (ind
) == LABEL_REF
8492 || (GET_CODE (ind
) == CONST
8493 && GET_CODE (XEXP (ind
, 0)) == PLUS
8494 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8495 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8498 /* Match: (mem (reg)). */
8499 if (GET_CODE (ind
) == REG
)
8500 return arm_address_register_rtx_p (ind
, 0);
8502 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8503 acceptable in any case (subject to verification by
8504 arm_address_register_rtx_p). We need WB to be true to accept
8505 PRE_INC and POST_DEC. */
8506 if (GET_CODE (ind
) == POST_INC
8507 || GET_CODE (ind
) == PRE_DEC
8509 && (GET_CODE (ind
) == PRE_INC
8510 || GET_CODE (ind
) == POST_DEC
)))
8511 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8514 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
8515 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
8516 && GET_CODE (XEXP (ind
, 1)) == PLUS
8517 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
8518 ind
= XEXP (ind
, 1);
8523 if (GET_CODE (ind
) == PLUS
8524 && GET_CODE (XEXP (ind
, 0)) == REG
8525 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8526 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8527 && INTVAL (XEXP (ind
, 1)) > -1024
8528 && INTVAL (XEXP (ind
, 1)) < 1024
8529 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8535 /* Return TRUE if OP is a memory operand which we can load or store a vector
8536 to/from. TYPE is one of the following values:
8537 0 - Vector load/stor (vldr)
8538 1 - Core registers (ldm)
8539 2 - Element/structure loads (vld1)
8542 neon_vector_mem_operand (rtx op
, int type
)
8546 /* Reject eliminable registers. */
8547 if (! (reload_in_progress
|| reload_completed
)
8548 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8549 || reg_mentioned_p (arg_pointer_rtx
, op
)
8550 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8551 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8552 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8553 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8556 /* Constants are converted into offsets from labels. */
8557 if (GET_CODE (op
) != MEM
)
8562 if (reload_completed
8563 && (GET_CODE (ind
) == LABEL_REF
8564 || (GET_CODE (ind
) == CONST
8565 && GET_CODE (XEXP (ind
, 0)) == PLUS
8566 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8567 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8570 /* Match: (mem (reg)). */
8571 if (GET_CODE (ind
) == REG
)
8572 return arm_address_register_rtx_p (ind
, 0);
8574 /* Allow post-increment with Neon registers. */
8575 if (type
!= 1 && (GET_CODE (ind
) == POST_INC
|| GET_CODE (ind
) == PRE_DEC
))
8576 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8578 /* FIXME: vld1 allows register post-modify. */
8584 && GET_CODE (ind
) == PLUS
8585 && GET_CODE (XEXP (ind
, 0)) == REG
8586 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8587 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8588 && INTVAL (XEXP (ind
, 1)) > -1024
8589 && INTVAL (XEXP (ind
, 1)) < 1016
8590 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8596 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8599 neon_struct_mem_operand (rtx op
)
8603 /* Reject eliminable registers. */
8604 if (! (reload_in_progress
|| reload_completed
)
8605 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8606 || reg_mentioned_p (arg_pointer_rtx
, op
)
8607 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8608 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8609 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8610 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8613 /* Constants are converted into offsets from labels. */
8614 if (GET_CODE (op
) != MEM
)
8619 if (reload_completed
8620 && (GET_CODE (ind
) == LABEL_REF
8621 || (GET_CODE (ind
) == CONST
8622 && GET_CODE (XEXP (ind
, 0)) == PLUS
8623 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8624 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8627 /* Match: (mem (reg)). */
8628 if (GET_CODE (ind
) == REG
)
8629 return arm_address_register_rtx_p (ind
, 0);
8634 /* Return true if X is a register that will be eliminated later on. */
8636 arm_eliminable_register (rtx x
)
8638 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
8639 || REGNO (x
) == ARG_POINTER_REGNUM
8640 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
8641 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
8644 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8645 coprocessor registers. Otherwise return NO_REGS. */
8648 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
8652 if (!TARGET_NEON_FP16
)
8653 return GENERAL_REGS
;
8654 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
8656 return GENERAL_REGS
;
8660 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
8661 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8662 && neon_vector_mem_operand (x
, 0))
8665 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
8668 return GENERAL_REGS
;
8671 /* Values which must be returned in the most-significant end of the return
8675 arm_return_in_msb (const_tree valtype
)
8677 return (TARGET_AAPCS_BASED
8679 && (AGGREGATE_TYPE_P (valtype
)
8680 || TREE_CODE (valtype
) == COMPLEX_TYPE
));
8683 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8684 Use by the Cirrus Maverick code which has to workaround
8685 a hardware bug triggered by such instructions. */
8687 arm_memory_load_p (rtx insn
)
8689 rtx body
, lhs
, rhs
;;
8691 if (insn
== NULL_RTX
|| GET_CODE (insn
) != INSN
)
8694 body
= PATTERN (insn
);
8696 if (GET_CODE (body
) != SET
)
8699 lhs
= XEXP (body
, 0);
8700 rhs
= XEXP (body
, 1);
8702 lhs
= REG_OR_SUBREG_RTX (lhs
);
8704 /* If the destination is not a general purpose
8705 register we do not have to worry. */
8706 if (GET_CODE (lhs
) != REG
8707 || REGNO_REG_CLASS (REGNO (lhs
)) != GENERAL_REGS
)
8710 /* As well as loads from memory we also have to react
8711 to loads of invalid constants which will be turned
8712 into loads from the minipool. */
8713 return (GET_CODE (rhs
) == MEM
8714 || GET_CODE (rhs
) == SYMBOL_REF
8715 || note_invalid_constants (insn
, -1, false));
8718 /* Return TRUE if INSN is a Cirrus instruction. */
8720 arm_cirrus_insn_p (rtx insn
)
8722 enum attr_cirrus attr
;
8724 /* get_attr cannot accept USE or CLOBBER. */
8726 || GET_CODE (insn
) != INSN
8727 || GET_CODE (PATTERN (insn
)) == USE
8728 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
8731 attr
= get_attr_cirrus (insn
);
8733 return attr
!= CIRRUS_NOT
;
8736 /* Cirrus reorg for invalid instruction combinations. */
8738 cirrus_reorg (rtx first
)
8740 enum attr_cirrus attr
;
8741 rtx body
= PATTERN (first
);
8745 /* Any branch must be followed by 2 non Cirrus instructions. */
8746 if (GET_CODE (first
) == JUMP_INSN
&& GET_CODE (body
) != RETURN
)
8749 t
= next_nonnote_insn (first
);
8751 if (arm_cirrus_insn_p (t
))
8754 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
8758 emit_insn_after (gen_nop (), first
);
8763 /* (float (blah)) is in parallel with a clobber. */
8764 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
8765 body
= XVECEXP (body
, 0, 0);
8767 if (GET_CODE (body
) == SET
)
8769 rtx lhs
= XEXP (body
, 0), rhs
= XEXP (body
, 1);
8771 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8772 be followed by a non Cirrus insn. */
8773 if (get_attr_cirrus (first
) == CIRRUS_DOUBLE
)
8775 if (arm_cirrus_insn_p (next_nonnote_insn (first
)))
8776 emit_insn_after (gen_nop (), first
);
8780 else if (arm_memory_load_p (first
))
8782 unsigned int arm_regno
;
8784 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8785 ldr/cfmv64hr combination where the Rd field is the same
8786 in both instructions must be split with a non Cirrus
8793 /* Get Arm register number for ldr insn. */
8794 if (GET_CODE (lhs
) == REG
)
8795 arm_regno
= REGNO (lhs
);
8798 gcc_assert (GET_CODE (rhs
) == REG
);
8799 arm_regno
= REGNO (rhs
);
8803 first
= next_nonnote_insn (first
);
8805 if (! arm_cirrus_insn_p (first
))
8808 body
= PATTERN (first
);
8810 /* (float (blah)) is in parallel with a clobber. */
8811 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0))
8812 body
= XVECEXP (body
, 0, 0);
8814 if (GET_CODE (body
) == FLOAT
)
8815 body
= XEXP (body
, 0);
8817 if (get_attr_cirrus (first
) == CIRRUS_MOVE
8818 && GET_CODE (XEXP (body
, 1)) == REG
8819 && arm_regno
== REGNO (XEXP (body
, 1)))
8820 emit_insn_after (gen_nop (), first
);
8826 /* get_attr cannot accept USE or CLOBBER. */
8828 || GET_CODE (first
) != INSN
8829 || GET_CODE (PATTERN (first
)) == USE
8830 || GET_CODE (PATTERN (first
)) == CLOBBER
)
8833 attr
= get_attr_cirrus (first
);
8835 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8836 must be followed by a non-coprocessor instruction. */
8837 if (attr
== CIRRUS_COMPARE
)
8841 t
= next_nonnote_insn (first
);
8843 if (arm_cirrus_insn_p (t
))
8846 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
8850 emit_insn_after (gen_nop (), first
);
8856 /* Return TRUE if X references a SYMBOL_REF. */
8858 symbol_mentioned_p (rtx x
)
8863 if (GET_CODE (x
) == SYMBOL_REF
)
8866 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8867 are constant offsets, not symbols. */
8868 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8871 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8873 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8879 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8880 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
8883 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
8890 /* Return TRUE if X references a LABEL_REF. */
8892 label_mentioned_p (rtx x
)
8897 if (GET_CODE (x
) == LABEL_REF
)
8900 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8901 instruction, but they are constant offsets, not symbols. */
8902 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8905 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8906 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8912 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8913 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
8916 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
8924 tls_mentioned_p (rtx x
)
8926 switch (GET_CODE (x
))
8929 return tls_mentioned_p (XEXP (x
, 0));
8932 if (XINT (x
, 1) == UNSPEC_TLS
)
8940 /* Must not copy any rtx that uses a pc-relative address. */
8943 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
8945 if (GET_CODE (*x
) == UNSPEC
8946 && XINT (*x
, 1) == UNSPEC_PIC_BASE
)
8952 arm_cannot_copy_insn_p (rtx insn
)
8954 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
8960 enum rtx_code code
= GET_CODE (x
);
8977 /* Return 1 if memory locations are adjacent. */
8979 adjacent_mem_locations (rtx a
, rtx b
)
8981 /* We don't guarantee to preserve the order of these memory refs. */
8982 if (volatile_refs_p (a
) || volatile_refs_p (b
))
8985 if ((GET_CODE (XEXP (a
, 0)) == REG
8986 || (GET_CODE (XEXP (a
, 0)) == PLUS
8987 && GET_CODE (XEXP (XEXP (a
, 0), 1)) == CONST_INT
))
8988 && (GET_CODE (XEXP (b
, 0)) == REG
8989 || (GET_CODE (XEXP (b
, 0)) == PLUS
8990 && GET_CODE (XEXP (XEXP (b
, 0), 1)) == CONST_INT
)))
8992 HOST_WIDE_INT val0
= 0, val1
= 0;
8996 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
8998 reg0
= XEXP (XEXP (a
, 0), 0);
8999 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9004 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9006 reg1
= XEXP (XEXP (b
, 0), 0);
9007 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9012 /* Don't accept any offset that will require multiple
9013 instructions to handle, since this would cause the
9014 arith_adjacentmem pattern to output an overlong sequence. */
9015 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9018 /* Don't allow an eliminable register: register elimination can make
9019 the offset too large. */
9020 if (arm_eliminable_register (reg0
))
9023 val_diff
= val1
- val0
;
9027 /* If the target has load delay slots, then there's no benefit
9028 to using an ldm instruction unless the offset is zero and
9029 we are optimizing for size. */
9030 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9031 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9032 && (val_diff
== 4 || val_diff
== -4));
9035 return ((REGNO (reg0
) == REGNO (reg1
))
9036 && (val_diff
== 4 || val_diff
== -4));
9042 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9043 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9044 instruction. ADD_OFFSET is nonzero if the base address register needs
9045 to be modified with an add instruction before we can use it. */
9048 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
9049 int nops
, HOST_WIDE_INT add_offset
)
9051 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9052 if the offset isn't small enough. The reason 2 ldrs are faster
9053 is because these ARMs are able to do more than one cache access
9054 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9055 whilst the ARM8 has a double bandwidth cache. This means that
9056 these cores can do both an instruction fetch and a data fetch in
9057 a single cycle, so the trick of calculating the address into a
9058 scratch register (one of the result regs) and then doing a load
9059 multiple actually becomes slower (and no smaller in code size).
9060 That is the transformation
9062 ldr rd1, [rbase + offset]
9063 ldr rd2, [rbase + offset + 4]
9067 add rd1, rbase, offset
9068 ldmia rd1, {rd1, rd2}
9070 produces worse code -- '3 cycles + any stalls on rd2' instead of
9071 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9072 access per cycle, the first sequence could never complete in less
9073 than 6 cycles, whereas the ldm sequence would only take 5 and
9074 would make better use of sequential accesses if not hitting the
9077 We cheat here and test 'arm_ld_sched' which we currently know to
9078 only be true for the ARM8, ARM9 and StrongARM. If this ever
9079 changes, then the test below needs to be reworked. */
9080 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
9086 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9087 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9088 an array ORDER which describes the sequence to use when accessing the
9089 offsets that produces an ascending order. In this sequence, each
9090 offset must be larger by exactly 4 than the previous one. ORDER[0]
9091 must have been filled in with the lowest offset by the caller.
9092 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9093 we use to verify that ORDER produces an ascending order of registers.
9094 Return true if it was possible to construct such an order, false if
9098 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
9102 for (i
= 1; i
< nops
; i
++)
9106 order
[i
] = order
[i
- 1];
9107 for (j
= 0; j
< nops
; j
++)
9108 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
9110 /* We must find exactly one offset that is higher than the
9111 previous one by 4. */
9112 if (order
[i
] != order
[i
- 1])
9116 if (order
[i
] == order
[i
- 1])
9118 /* The register numbers must be ascending. */
9119 if (unsorted_regs
!= NULL
9120 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
9127 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *base
,
9128 HOST_WIDE_INT
*load_offset
)
9130 int unsorted_regs
[MAX_LDM_STM_OPS
];
9131 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9132 int order
[MAX_LDM_STM_OPS
];
9136 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9137 easily extended if required. */
9138 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9140 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9142 /* Loop over the operands and check that the memory references are
9143 suitable (i.e. immediate offsets from the same base register). At
9144 the same time, extract the target register, and the memory
9146 for (i
= 0; i
< nops
; i
++)
9151 /* Convert a subreg of a mem into the mem itself. */
9152 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9153 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9155 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9157 /* Don't reorder volatile memory references; it doesn't seem worth
9158 looking for the case where the order is ok anyway. */
9159 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9162 offset
= const0_rtx
;
9164 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9165 || (GET_CODE (reg
) == SUBREG
9166 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9167 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9168 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9170 || (GET_CODE (reg
) == SUBREG
9171 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9172 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9176 base_reg
= REGNO (reg
);
9179 if (base_reg
!= (int) REGNO (reg
))
9180 /* Not addressed from the same base register. */
9183 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
9184 ? REGNO (operands
[i
])
9185 : REGNO (SUBREG_REG (operands
[i
])));
9187 /* If it isn't an integer register, or if it overwrites the
9188 base register but isn't the last insn in the list, then
9189 we can't do this. */
9190 if (unsorted_regs
[i
] < 0 || unsorted_regs
[i
] > 14
9191 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
9194 unsorted_offsets
[i
] = INTVAL (offset
);
9195 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9199 /* Not a suitable memory address. */
9203 /* All the useful information has now been extracted from the
9204 operands into unsorted_regs and unsorted_offsets; additionally,
9205 order[0] has been set to the lowest offset in the list. Sort
9206 the offsets into order, verifying that they are adjacent, and
9207 check that the register numbers are ascending. */
9208 if (!compute_offset_order (nops
, unsorted_offsets
, order
, unsorted_regs
))
9215 for (i
= 0; i
< nops
; i
++)
9216 regs
[i
] = unsorted_regs
[order
[i
]];
9218 *load_offset
= unsorted_offsets
[order
[0]];
9221 if (unsorted_offsets
[order
[0]] == 0)
9222 ldm_case
= 1; /* ldmia */
9223 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9224 ldm_case
= 2; /* ldmib */
9225 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9226 ldm_case
= 3; /* ldmda */
9227 else if (unsorted_offsets
[order
[nops
- 1]] == -4)
9228 ldm_case
= 4; /* ldmdb */
9229 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
9230 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
9235 if (!multiple_operation_profitable_p (false, nops
,
9237 ? unsorted_offsets
[order
[0]] : 0))
9244 emit_ldm_seq (rtx
*operands
, int nops
)
9246 int regs
[MAX_LDM_STM_OPS
];
9248 HOST_WIDE_INT offset
;
9252 switch (load_multiple_sequence (operands
, nops
, regs
, &base_reg
, &offset
))
9255 strcpy (buf
, "ldm%(ia%)\t");
9259 strcpy (buf
, "ldm%(ib%)\t");
9263 strcpy (buf
, "ldm%(da%)\t");
9267 strcpy (buf
, "ldm%(db%)\t");
9272 sprintf (buf
, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX
,
9273 reg_names
[regs
[0]], REGISTER_PREFIX
, reg_names
[base_reg
],
9276 sprintf (buf
, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX
,
9277 reg_names
[regs
[0]], REGISTER_PREFIX
, reg_names
[base_reg
],
9279 output_asm_insn (buf
, operands
);
9281 strcpy (buf
, "ldm%(ia%)\t");
9288 sprintf (buf
+ strlen (buf
), "%s%s, {%s%s", REGISTER_PREFIX
,
9289 reg_names
[base_reg
], REGISTER_PREFIX
, reg_names
[regs
[0]]);
9291 for (i
= 1; i
< nops
; i
++)
9292 sprintf (buf
+ strlen (buf
), ", %s%s", REGISTER_PREFIX
,
9293 reg_names
[regs
[i
]]);
9295 strcat (buf
, "}\t%@ phole ldm");
9297 output_asm_insn (buf
, operands
);
9302 store_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *base
,
9303 HOST_WIDE_INT
* load_offset
)
9305 int unsorted_regs
[MAX_LDM_STM_OPS
];
9306 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9307 int order
[MAX_LDM_STM_OPS
];
9311 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9312 easily extended if required. */
9313 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9315 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9317 /* Loop over the operands and check that the memory references are
9318 suitable (i.e. immediate offsets from the same base register). At
9319 the same time, extract the target register, and the memory
9321 for (i
= 0; i
< nops
; i
++)
9326 /* Convert a subreg of a mem into the mem itself. */
9327 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9328 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9330 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9332 /* Don't reorder volatile memory references; it doesn't seem worth
9333 looking for the case where the order is ok anyway. */
9334 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9337 offset
= const0_rtx
;
9339 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9340 || (GET_CODE (reg
) == SUBREG
9341 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9342 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9343 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9345 || (GET_CODE (reg
) == SUBREG
9346 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9347 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9350 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
9351 ? REGNO (operands
[i
])
9352 : REGNO (SUBREG_REG (operands
[i
])));
9354 base_reg
= REGNO (reg
);
9355 else if (base_reg
!= (int) REGNO (reg
))
9356 /* Not addressed from the same base register. */
9359 /* If it isn't an integer register, then we can't do this. */
9360 if (unsorted_regs
[i
] < 0 || unsorted_regs
[i
] > 14)
9363 unsorted_offsets
[i
] = INTVAL (offset
);
9364 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9368 /* Not a suitable memory address. */
9372 /* All the useful information has now been extracted from the
9373 operands into unsorted_regs and unsorted_offsets; additionally,
9374 order[0] has been set to the lowest offset in the list. Sort
9375 the offsets into order, verifying that they are adjacent, and
9376 check that the register numbers are ascending. */
9377 if (!compute_offset_order (nops
, unsorted_offsets
, order
, unsorted_regs
))
9384 for (i
= 0; i
< nops
; i
++)
9385 regs
[i
] = unsorted_regs
[order
[i
]];
9387 *load_offset
= unsorted_offsets
[order
[0]];
9390 if (unsorted_offsets
[order
[0]] == 0)
9391 stm_case
= 1; /* stmia */
9392 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9393 stm_case
= 2; /* stmib */
9394 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9395 stm_case
= 3; /* stmda */
9396 else if (unsorted_offsets
[order
[nops
- 1]] == -4)
9397 stm_case
= 4; /* stmdb */
9401 if (!multiple_operation_profitable_p (false, nops
, 0))
9408 emit_stm_seq (rtx
*operands
, int nops
)
9410 int regs
[MAX_LDM_STM_OPS
];
9412 HOST_WIDE_INT offset
;
9416 switch (store_multiple_sequence (operands
, nops
, regs
, &base_reg
, &offset
))
9419 strcpy (buf
, "stm%(ia%)\t");
9423 strcpy (buf
, "stm%(ib%)\t");
9427 strcpy (buf
, "stm%(da%)\t");
9431 strcpy (buf
, "stm%(db%)\t");
9438 sprintf (buf
+ strlen (buf
), "%s%s, {%s%s", REGISTER_PREFIX
,
9439 reg_names
[base_reg
], REGISTER_PREFIX
, reg_names
[regs
[0]]);
9441 for (i
= 1; i
< nops
; i
++)
9442 sprintf (buf
+ strlen (buf
), ", %s%s", REGISTER_PREFIX
,
9443 reg_names
[regs
[i
]]);
9445 strcat (buf
, "}\t%@ phole stm");
9447 output_asm_insn (buf
, operands
);
9451 /* Routines for use in generating RTL. */
9454 arm_gen_load_multiple (int base_regno
, int count
, rtx from
, int up
,
9455 int write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
9457 HOST_WIDE_INT offset
= *offsetp
;
9460 int sign
= up
? 1 : -1;
9463 /* XScale has load-store double instructions, but they have stricter
9464 alignment requirements than load-store multiple, so we cannot
9467 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9468 the pipeline until completion.
9476 An ldr instruction takes 1-3 cycles, but does not block the
9485 Best case ldr will always win. However, the more ldr instructions
9486 we issue, the less likely we are to be able to schedule them well.
9487 Using ldr instructions also increases code size.
9489 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9490 for counts of 3 or 4 regs. */
9491 if (arm_tune_xscale
&& count
<= 2 && ! optimize_size
)
9497 for (i
= 0; i
< count
; i
++)
9499 addr
= plus_constant (from
, i
* 4 * sign
);
9500 mem
= adjust_automodify_address (basemem
, SImode
, addr
, offset
);
9501 emit_move_insn (gen_rtx_REG (SImode
, base_regno
+ i
), mem
);
9507 emit_move_insn (from
, plus_constant (from
, count
* 4 * sign
));
9517 result
= gen_rtx_PARALLEL (VOIDmode
,
9518 rtvec_alloc (count
+ (write_back
? 1 : 0)));
9521 XVECEXP (result
, 0, 0)
9522 = gen_rtx_SET (VOIDmode
, from
, plus_constant (from
, count
* 4 * sign
));
9527 for (j
= 0; i
< count
; i
++, j
++)
9529 addr
= plus_constant (from
, j
* 4 * sign
);
9530 mem
= adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
9531 XVECEXP (result
, 0, i
)
9532 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, base_regno
+ j
), mem
);
9543 arm_gen_store_multiple (int base_regno
, int count
, rtx to
, int up
,
9544 int write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
9546 HOST_WIDE_INT offset
= *offsetp
;
9549 int sign
= up
? 1 : -1;
9552 /* See arm_gen_load_multiple for discussion of
9553 the pros/cons of ldm/stm usage for XScale. */
9554 if (arm_tune_xscale
&& count
<= 2 && ! optimize_size
)
9560 for (i
= 0; i
< count
; i
++)
9562 addr
= plus_constant (to
, i
* 4 * sign
);
9563 mem
= adjust_automodify_address (basemem
, SImode
, addr
, offset
);
9564 emit_move_insn (mem
, gen_rtx_REG (SImode
, base_regno
+ i
));
9570 emit_move_insn (to
, plus_constant (to
, count
* 4 * sign
));
9580 result
= gen_rtx_PARALLEL (VOIDmode
,
9581 rtvec_alloc (count
+ (write_back
? 1 : 0)));
9584 XVECEXP (result
, 0, 0)
9585 = gen_rtx_SET (VOIDmode
, to
,
9586 plus_constant (to
, count
* 4 * sign
));
9591 for (j
= 0; i
< count
; i
++, j
++)
9593 addr
= plus_constant (to
, j
* 4 * sign
);
9594 mem
= adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
9595 XVECEXP (result
, 0, i
)
9596 = gen_rtx_SET (VOIDmode
, mem
, gen_rtx_REG (SImode
, base_regno
+ j
));
9607 arm_gen_movmemqi (rtx
*operands
)
9609 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
9610 HOST_WIDE_INT srcoffset
, dstoffset
;
9612 rtx src
, dst
, srcbase
, dstbase
;
9613 rtx part_bytes_reg
= NULL
;
9616 if (GET_CODE (operands
[2]) != CONST_INT
9617 || GET_CODE (operands
[3]) != CONST_INT
9618 || INTVAL (operands
[2]) > 64
9619 || INTVAL (operands
[3]) & 3)
9622 dstbase
= operands
[0];
9623 srcbase
= operands
[1];
9625 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
9626 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
9628 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
9629 out_words_to_go
= INTVAL (operands
[2]) / 4;
9630 last_bytes
= INTVAL (operands
[2]) & 3;
9631 dstoffset
= srcoffset
= 0;
9633 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
9634 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
9636 for (i
= 0; in_words_to_go
>= 2; i
+=4)
9638 if (in_words_to_go
> 4)
9639 emit_insn (arm_gen_load_multiple (0, 4, src
, TRUE
, TRUE
,
9640 srcbase
, &srcoffset
));
9642 emit_insn (arm_gen_load_multiple (0, in_words_to_go
, src
, TRUE
,
9643 FALSE
, srcbase
, &srcoffset
));
9645 if (out_words_to_go
)
9647 if (out_words_to_go
> 4)
9648 emit_insn (arm_gen_store_multiple (0, 4, dst
, TRUE
, TRUE
,
9649 dstbase
, &dstoffset
));
9650 else if (out_words_to_go
!= 1)
9651 emit_insn (arm_gen_store_multiple (0, out_words_to_go
,
9655 dstbase
, &dstoffset
));
9658 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
9659 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
9660 if (last_bytes
!= 0)
9662 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
9668 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
9669 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
9672 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9673 if (out_words_to_go
)
9677 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
9678 sreg
= copy_to_reg (mem
);
9680 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
9681 emit_move_insn (mem
, sreg
);
9684 gcc_assert (!in_words_to_go
); /* Sanity check */
9689 gcc_assert (in_words_to_go
> 0);
9691 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
9692 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
9695 gcc_assert (!last_bytes
|| part_bytes_reg
);
9697 if (BYTES_BIG_ENDIAN
&& last_bytes
)
9699 rtx tmp
= gen_reg_rtx (SImode
);
9701 /* The bytes we want are in the top end of the word. */
9702 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
9703 GEN_INT (8 * (4 - last_bytes
))));
9704 part_bytes_reg
= tmp
;
9708 mem
= adjust_automodify_address (dstbase
, QImode
,
9709 plus_constant (dst
, last_bytes
- 1),
9710 dstoffset
+ last_bytes
- 1);
9711 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
9715 tmp
= gen_reg_rtx (SImode
);
9716 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
9717 part_bytes_reg
= tmp
;
9726 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
9727 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
9731 rtx tmp
= gen_reg_rtx (SImode
);
9732 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
9733 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
9734 part_bytes_reg
= tmp
;
9741 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
9742 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
9749 /* Select a dominance comparison mode if possible for a test of the general
9750 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9751 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9752 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9753 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9754 In all cases OP will be either EQ or NE, but we don't need to know which
9755 here. If we are unable to support a dominance comparison we return
9756 CC mode. This will then fail to match for the RTL expressions that
9757 generate this call. */
9759 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
9761 enum rtx_code cond1
, cond2
;
9764 /* Currently we will probably get the wrong result if the individual
9765 comparisons are not simple. This also ensures that it is safe to
9766 reverse a comparison if necessary. */
9767 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
9769 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
9773 /* The if_then_else variant of this tests the second condition if the
9774 first passes, but is true if the first fails. Reverse the first
9775 condition to get a true "inclusive-or" expression. */
9776 if (cond_or
== DOM_CC_NX_OR_Y
)
9777 cond1
= reverse_condition (cond1
);
9779 /* If the comparisons are not equal, and one doesn't dominate the other,
9780 then we can't do this. */
9782 && !comparison_dominates_p (cond1
, cond2
)
9783 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
9788 enum rtx_code temp
= cond1
;
9796 if (cond_or
== DOM_CC_X_AND_Y
)
9801 case EQ
: return CC_DEQmode
;
9802 case LE
: return CC_DLEmode
;
9803 case LEU
: return CC_DLEUmode
;
9804 case GE
: return CC_DGEmode
;
9805 case GEU
: return CC_DGEUmode
;
9806 default: gcc_unreachable ();
9810 if (cond_or
== DOM_CC_X_AND_Y
)
9826 if (cond_or
== DOM_CC_X_AND_Y
)
9842 if (cond_or
== DOM_CC_X_AND_Y
)
9858 if (cond_or
== DOM_CC_X_AND_Y
)
9873 /* The remaining cases only occur when both comparisons are the
9876 gcc_assert (cond1
== cond2
);
9880 gcc_assert (cond1
== cond2
);
9884 gcc_assert (cond1
== cond2
);
9888 gcc_assert (cond1
== cond2
);
9892 gcc_assert (cond1
== cond2
);
9901 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
9903 /* All floating point compares return CCFP if it is an equality
9904 comparison, and CCFPE otherwise. */
9905 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
9925 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
9934 /* A compare with a shifted operand. Because of canonicalization, the
9935 comparison will have to be swapped when we emit the assembler. */
9936 if (GET_MODE (y
) == SImode
9937 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
9938 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
9939 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
9940 || GET_CODE (x
) == ROTATERT
))
9943 /* This operation is performed swapped, but since we only rely on the Z
9944 flag we don't need an additional mode. */
9945 if (GET_MODE (y
) == SImode
9946 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
9947 && GET_CODE (x
) == NEG
9948 && (op
== EQ
|| op
== NE
))
9951 /* This is a special case that is used by combine to allow a
9952 comparison of a shifted byte load to be split into a zero-extend
9953 followed by a comparison of the shifted integer (only valid for
9954 equalities and unsigned inequalities). */
9955 if (GET_MODE (x
) == SImode
9956 && GET_CODE (x
) == ASHIFT
9957 && GET_CODE (XEXP (x
, 1)) == CONST_INT
&& INTVAL (XEXP (x
, 1)) == 24
9958 && GET_CODE (XEXP (x
, 0)) == SUBREG
9959 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == MEM
9960 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
9961 && (op
== EQ
|| op
== NE
9962 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
9963 && GET_CODE (y
) == CONST_INT
)
9966 /* A construct for a conditional compare, if the false arm contains
9967 0, then both conditions must be true, otherwise either condition
9968 must be true. Not all conditions are possible, so CCmode is
9969 returned if it can't be done. */
9970 if (GET_CODE (x
) == IF_THEN_ELSE
9971 && (XEXP (x
, 2) == const0_rtx
9972 || XEXP (x
, 2) == const1_rtx
)
9973 && COMPARISON_P (XEXP (x
, 0))
9974 && COMPARISON_P (XEXP (x
, 1)))
9975 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
9976 INTVAL (XEXP (x
, 2)));
9978 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
9979 if (GET_CODE (x
) == AND
9980 && COMPARISON_P (XEXP (x
, 0))
9981 && COMPARISON_P (XEXP (x
, 1)))
9982 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
9985 if (GET_CODE (x
) == IOR
9986 && COMPARISON_P (XEXP (x
, 0))
9987 && COMPARISON_P (XEXP (x
, 1)))
9988 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
9991 /* An operation (on Thumb) where we want to test for a single bit.
9992 This is done by shifting that bit up into the top bit of a
9993 scratch register; we can then branch on the sign bit. */
9995 && GET_MODE (x
) == SImode
9996 && (op
== EQ
|| op
== NE
)
9997 && GET_CODE (x
) == ZERO_EXTRACT
9998 && XEXP (x
, 1) == const1_rtx
)
10001 /* An operation that sets the condition codes as a side-effect, the
10002 V flag is not set correctly, so we can only use comparisons where
10003 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10005 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10006 if (GET_MODE (x
) == SImode
10008 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
10009 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
10010 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
10011 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
10012 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
10013 || GET_CODE (x
) == LSHIFTRT
10014 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10015 || GET_CODE (x
) == ROTATERT
10016 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
10017 return CC_NOOVmode
;
10019 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
10022 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
10023 && GET_CODE (x
) == PLUS
10024 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
10030 /* X and Y are two things to compare using CODE. Emit the compare insn and
10031 return the rtx for register 0 in the proper mode. FP means this is a
10032 floating point compare: I don't think that it is needed on the arm. */
10034 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
)
10036 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
10037 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
10039 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10044 /* Generate a sequence of insns that will generate the correct return
10045 address mask depending on the physical architecture that the program
10048 arm_gen_return_addr_mask (void)
10050 rtx reg
= gen_reg_rtx (Pmode
);
10052 emit_insn (gen_return_addr_mask (reg
));
10057 arm_reload_in_hi (rtx
*operands
)
10059 rtx ref
= operands
[1];
10061 HOST_WIDE_INT offset
= 0;
10063 if (GET_CODE (ref
) == SUBREG
)
10065 offset
= SUBREG_BYTE (ref
);
10066 ref
= SUBREG_REG (ref
);
10069 if (GET_CODE (ref
) == REG
)
10071 /* We have a pseudo which has been spilt onto the stack; there
10072 are two cases here: the first where there is a simple
10073 stack-slot replacement and a second where the stack-slot is
10074 out of range, or is used as a subreg. */
10075 if (reg_equiv_mem
[REGNO (ref
)])
10077 ref
= reg_equiv_mem
[REGNO (ref
)];
10078 base
= find_replacement (&XEXP (ref
, 0));
10081 /* The slot is out of range, or was dressed up in a SUBREG. */
10082 base
= reg_equiv_address
[REGNO (ref
)];
10085 base
= find_replacement (&XEXP (ref
, 0));
10087 /* Handle the case where the address is too complex to be offset by 1. */
10088 if (GET_CODE (base
) == MINUS
10089 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10091 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10093 emit_set_insn (base_plus
, base
);
10096 else if (GET_CODE (base
) == PLUS
)
10098 /* The addend must be CONST_INT, or we would have dealt with it above. */
10099 HOST_WIDE_INT hi
, lo
;
10101 offset
+= INTVAL (XEXP (base
, 1));
10102 base
= XEXP (base
, 0);
10104 /* Rework the address into a legal sequence of insns. */
10105 /* Valid range for lo is -4095 -> 4095 */
10108 : -((-offset
) & 0xfff));
10110 /* Corner case, if lo is the max offset then we would be out of range
10111 once we have added the additional 1 below, so bump the msb into the
10112 pre-loading insn(s). */
10116 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10117 ^ (HOST_WIDE_INT
) 0x80000000)
10118 - (HOST_WIDE_INT
) 0x80000000);
10120 gcc_assert (hi
+ lo
== offset
);
10124 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10126 /* Get the base address; addsi3 knows how to handle constants
10127 that require more than one insn. */
10128 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
10134 /* Operands[2] may overlap operands[0] (though it won't overlap
10135 operands[1]), that's why we asked for a DImode reg -- so we can
10136 use the bit that does not overlap. */
10137 if (REGNO (operands
[2]) == REGNO (operands
[0]))
10138 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10140 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10142 emit_insn (gen_zero_extendqisi2 (scratch
,
10143 gen_rtx_MEM (QImode
,
10144 plus_constant (base
,
10146 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10147 gen_rtx_MEM (QImode
,
10148 plus_constant (base
,
10150 if (!BYTES_BIG_ENDIAN
)
10151 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10152 gen_rtx_IOR (SImode
,
10155 gen_rtx_SUBREG (SImode
, operands
[0], 0),
10159 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10160 gen_rtx_IOR (SImode
,
10161 gen_rtx_ASHIFT (SImode
, scratch
,
10163 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
10166 /* Handle storing a half-word to memory during reload by synthesizing as two
10167 byte stores. Take care not to clobber the input values until after we
10168 have moved them somewhere safe. This code assumes that if the DImode
10169 scratch in operands[2] overlaps either the input value or output address
10170 in some way, then that value must die in this insn (we absolutely need
10171 two scratch registers for some corner cases). */
10173 arm_reload_out_hi (rtx
*operands
)
10175 rtx ref
= operands
[0];
10176 rtx outval
= operands
[1];
10178 HOST_WIDE_INT offset
= 0;
10180 if (GET_CODE (ref
) == SUBREG
)
10182 offset
= SUBREG_BYTE (ref
);
10183 ref
= SUBREG_REG (ref
);
10186 if (GET_CODE (ref
) == REG
)
10188 /* We have a pseudo which has been spilt onto the stack; there
10189 are two cases here: the first where there is a simple
10190 stack-slot replacement and a second where the stack-slot is
10191 out of range, or is used as a subreg. */
10192 if (reg_equiv_mem
[REGNO (ref
)])
10194 ref
= reg_equiv_mem
[REGNO (ref
)];
10195 base
= find_replacement (&XEXP (ref
, 0));
10198 /* The slot is out of range, or was dressed up in a SUBREG. */
10199 base
= reg_equiv_address
[REGNO (ref
)];
10202 base
= find_replacement (&XEXP (ref
, 0));
10204 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10206 /* Handle the case where the address is too complex to be offset by 1. */
10207 if (GET_CODE (base
) == MINUS
10208 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10210 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10212 /* Be careful not to destroy OUTVAL. */
10213 if (reg_overlap_mentioned_p (base_plus
, outval
))
10215 /* Updating base_plus might destroy outval, see if we can
10216 swap the scratch and base_plus. */
10217 if (!reg_overlap_mentioned_p (scratch
, outval
))
10220 scratch
= base_plus
;
10225 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10227 /* Be conservative and copy OUTVAL into the scratch now,
10228 this should only be necessary if outval is a subreg
10229 of something larger than a word. */
10230 /* XXX Might this clobber base? I can't see how it can,
10231 since scratch is known to overlap with OUTVAL, and
10232 must be wider than a word. */
10233 emit_insn (gen_movhi (scratch_hi
, outval
));
10234 outval
= scratch_hi
;
10238 emit_set_insn (base_plus
, base
);
10241 else if (GET_CODE (base
) == PLUS
)
10243 /* The addend must be CONST_INT, or we would have dealt with it above. */
10244 HOST_WIDE_INT hi
, lo
;
10246 offset
+= INTVAL (XEXP (base
, 1));
10247 base
= XEXP (base
, 0);
10249 /* Rework the address into a legal sequence of insns. */
10250 /* Valid range for lo is -4095 -> 4095 */
10253 : -((-offset
) & 0xfff));
10255 /* Corner case, if lo is the max offset then we would be out of range
10256 once we have added the additional 1 below, so bump the msb into the
10257 pre-loading insn(s). */
10261 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10262 ^ (HOST_WIDE_INT
) 0x80000000)
10263 - (HOST_WIDE_INT
) 0x80000000);
10265 gcc_assert (hi
+ lo
== offset
);
10269 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10271 /* Be careful not to destroy OUTVAL. */
10272 if (reg_overlap_mentioned_p (base_plus
, outval
))
10274 /* Updating base_plus might destroy outval, see if we
10275 can swap the scratch and base_plus. */
10276 if (!reg_overlap_mentioned_p (scratch
, outval
))
10279 scratch
= base_plus
;
10284 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10286 /* Be conservative and copy outval into scratch now,
10287 this should only be necessary if outval is a
10288 subreg of something larger than a word. */
10289 /* XXX Might this clobber base? I can't see how it
10290 can, since scratch is known to overlap with
10292 emit_insn (gen_movhi (scratch_hi
, outval
));
10293 outval
= scratch_hi
;
10297 /* Get the base address; addsi3 knows how to handle constants
10298 that require more than one insn. */
10299 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
10305 if (BYTES_BIG_ENDIAN
)
10307 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
10308 plus_constant (base
, offset
+ 1)),
10309 gen_lowpart (QImode
, outval
)));
10310 emit_insn (gen_lshrsi3 (scratch
,
10311 gen_rtx_SUBREG (SImode
, outval
, 0),
10313 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
10314 gen_lowpart (QImode
, scratch
)));
10318 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
10319 gen_lowpart (QImode
, outval
)));
10320 emit_insn (gen_lshrsi3 (scratch
,
10321 gen_rtx_SUBREG (SImode
, outval
, 0),
10323 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
10324 plus_constant (base
, offset
+ 1)),
10325 gen_lowpart (QImode
, scratch
)));
10329 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10330 (padded to the size of a word) should be passed in a register. */
10333 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
10335 if (TARGET_AAPCS_BASED
)
10336 return must_pass_in_stack_var_size (mode
, type
);
10338 return must_pass_in_stack_var_size_or_pad (mode
, type
);
10342 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10343 Return true if an argument passed on the stack should be padded upwards,
10344 i.e. if the least-significant byte has useful data.
10345 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10346 aggregate types are placed in the lowest memory address. */
10349 arm_pad_arg_upward (enum machine_mode mode
, const_tree type
)
10351 if (!TARGET_AAPCS_BASED
)
10352 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
10354 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
10361 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10362 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10363 byte of the register has useful data, and return the opposite if the
10364 most significant byte does.
10365 For AAPCS, small aggregates and small complex types are always padded
10369 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
,
10370 tree type
, int first ATTRIBUTE_UNUSED
)
10372 if (TARGET_AAPCS_BASED
10373 && BYTES_BIG_ENDIAN
10374 && (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
)
10375 && int_size_in_bytes (type
) <= 4)
10378 /* Otherwise, use default padding. */
10379 return !BYTES_BIG_ENDIAN
;
10383 /* Print a symbolic form of X to the debug file, F. */
10385 arm_print_value (FILE *f
, rtx x
)
10387 switch (GET_CODE (x
))
10390 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
10394 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
10402 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
10404 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
10405 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
10413 fprintf (f
, "\"%s\"", XSTR (x
, 0));
10417 fprintf (f
, "`%s'", XSTR (x
, 0));
10421 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
10425 arm_print_value (f
, XEXP (x
, 0));
10429 arm_print_value (f
, XEXP (x
, 0));
10431 arm_print_value (f
, XEXP (x
, 1));
10439 fprintf (f
, "????");
10444 /* Routines for manipulation of the constant pool. */
10446 /* Arm instructions cannot load a large constant directly into a
10447 register; they have to come from a pc relative load. The constant
10448 must therefore be placed in the addressable range of the pc
10449 relative load. Depending on the precise pc relative load
10450 instruction the range is somewhere between 256 bytes and 4k. This
10451 means that we often have to dump a constant inside a function, and
10452 generate code to branch around it.
10454 It is important to minimize this, since the branches will slow
10455 things down and make the code larger.
10457 Normally we can hide the table after an existing unconditional
10458 branch so that there is no interruption of the flow, but in the
10459 worst case the code looks like this:
10477 We fix this by performing a scan after scheduling, which notices
10478 which instructions need to have their operands fetched from the
10479 constant table and builds the table.
10481 The algorithm starts by building a table of all the constants that
10482 need fixing up and all the natural barriers in the function (places
10483 where a constant table can be dropped without breaking the flow).
10484 For each fixup we note how far the pc-relative replacement will be
10485 able to reach and the offset of the instruction into the function.
10487 Having built the table we then group the fixes together to form
10488 tables that are as large as possible (subject to addressing
10489 constraints) and emit each table of constants after the last
10490 barrier that is within range of all the instructions in the group.
10491 If a group does not contain a barrier, then we forcibly create one
10492 by inserting a jump instruction into the flow. Once the table has
10493 been inserted, the insns are then modified to reference the
10494 relevant entry in the pool.
10496 Possible enhancements to the algorithm (not implemented) are:
10498 1) For some processors and object formats, there may be benefit in
10499 aligning the pools to the start of cache lines; this alignment
10500 would need to be taken into account when calculating addressability
10503 /* These typedefs are located at the start of this file, so that
10504 they can be used in the prototypes there. This comment is to
10505 remind readers of that fact so that the following structures
10506 can be understood more easily.
10508 typedef struct minipool_node Mnode;
10509 typedef struct minipool_fixup Mfix; */
10511 struct minipool_node
10513 /* Doubly linked chain of entries. */
10516 /* The maximum offset into the code that this entry can be placed. While
10517 pushing fixes for forward references, all entries are sorted in order
10518 of increasing max_address. */
10519 HOST_WIDE_INT max_address
;
10520 /* Similarly for an entry inserted for a backwards ref. */
10521 HOST_WIDE_INT min_address
;
10522 /* The number of fixes referencing this entry. This can become zero
10523 if we "unpush" an entry. In this case we ignore the entry when we
10524 come to emit the code. */
10526 /* The offset from the start of the minipool. */
10527 HOST_WIDE_INT offset
;
10528 /* The value in table. */
10530 /* The mode of value. */
10531 enum machine_mode mode
;
10532 /* The size of the value. With iWMMXt enabled
10533 sizes > 4 also imply an alignment of 8-bytes. */
10537 struct minipool_fixup
10541 HOST_WIDE_INT address
;
10543 enum machine_mode mode
;
10547 HOST_WIDE_INT forwards
;
10548 HOST_WIDE_INT backwards
;
10551 /* Fixes less than a word need padding out to a word boundary. */
10552 #define MINIPOOL_FIX_SIZE(mode) \
10553 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10555 static Mnode
* minipool_vector_head
;
10556 static Mnode
* minipool_vector_tail
;
10557 static rtx minipool_vector_label
;
10558 static int minipool_pad
;
10560 /* The linked list of all minipool fixes required for this function. */
10561 Mfix
* minipool_fix_head
;
10562 Mfix
* minipool_fix_tail
;
10563 /* The fix entry for the current minipool, once it has been placed. */
10564 Mfix
* minipool_barrier
;
10566 /* Determines if INSN is the start of a jump table. Returns the end
10567 of the TABLE or NULL_RTX. */
10569 is_jump_table (rtx insn
)
10573 if (GET_CODE (insn
) == JUMP_INSN
10574 && JUMP_LABEL (insn
) != NULL
10575 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
10576 == next_real_insn (insn
))
10578 && GET_CODE (table
) == JUMP_INSN
10579 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
10580 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
10586 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10587 #define JUMP_TABLES_IN_TEXT_SECTION 0
10590 static HOST_WIDE_INT
10591 get_jump_table_size (rtx insn
)
10593 /* ADDR_VECs only take room if read-only data does into the text
10595 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
10597 rtx body
= PATTERN (insn
);
10598 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
10599 HOST_WIDE_INT size
;
10600 HOST_WIDE_INT modesize
;
10602 modesize
= GET_MODE_SIZE (GET_MODE (body
));
10603 size
= modesize
* XVECLEN (body
, elt
);
10607 /* Round up size of TBB table to a halfword boundary. */
10608 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
10611 /* No padding necessary for TBH. */
10614 /* Add two bytes for alignment on Thumb. */
10619 gcc_unreachable ();
10627 /* Move a minipool fix MP from its current location to before MAX_MP.
10628 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10629 constraints may need updating. */
10631 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
10632 HOST_WIDE_INT max_address
)
10634 /* The code below assumes these are different. */
10635 gcc_assert (mp
!= max_mp
);
10637 if (max_mp
== NULL
)
10639 if (max_address
< mp
->max_address
)
10640 mp
->max_address
= max_address
;
10644 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
10645 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
10647 mp
->max_address
= max_address
;
10649 /* Unlink MP from its current position. Since max_mp is non-null,
10650 mp->prev must be non-null. */
10651 mp
->prev
->next
= mp
->next
;
10652 if (mp
->next
!= NULL
)
10653 mp
->next
->prev
= mp
->prev
;
10655 minipool_vector_tail
= mp
->prev
;
10657 /* Re-insert it before MAX_MP. */
10659 mp
->prev
= max_mp
->prev
;
10662 if (mp
->prev
!= NULL
)
10663 mp
->prev
->next
= mp
;
10665 minipool_vector_head
= mp
;
10668 /* Save the new entry. */
10671 /* Scan over the preceding entries and adjust their addresses as
10673 while (mp
->prev
!= NULL
10674 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
10676 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
10683 /* Add a constant to the minipool for a forward reference. Returns the
10684 node added or NULL if the constant will not fit in this pool. */
10686 add_minipool_forward_ref (Mfix
*fix
)
10688 /* If set, max_mp is the first pool_entry that has a lower
10689 constraint than the one we are trying to add. */
10690 Mnode
* max_mp
= NULL
;
10691 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
10694 /* If the minipool starts before the end of FIX->INSN then this FIX
10695 can not be placed into the current pool. Furthermore, adding the
10696 new constant pool entry may cause the pool to start FIX_SIZE bytes
10698 if (minipool_vector_head
&&
10699 (fix
->address
+ get_attr_length (fix
->insn
)
10700 >= minipool_vector_head
->max_address
- fix
->fix_size
))
10703 /* Scan the pool to see if a constant with the same value has
10704 already been added. While we are doing this, also note the
10705 location where we must insert the constant if it doesn't already
10707 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
10709 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
10710 && fix
->mode
== mp
->mode
10711 && (GET_CODE (fix
->value
) != CODE_LABEL
10712 || (CODE_LABEL_NUMBER (fix
->value
)
10713 == CODE_LABEL_NUMBER (mp
->value
)))
10714 && rtx_equal_p (fix
->value
, mp
->value
))
10716 /* More than one fix references this entry. */
10718 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
10721 /* Note the insertion point if necessary. */
10723 && mp
->max_address
> max_address
)
10726 /* If we are inserting an 8-bytes aligned quantity and
10727 we have not already found an insertion point, then
10728 make sure that all such 8-byte aligned quantities are
10729 placed at the start of the pool. */
10730 if (ARM_DOUBLEWORD_ALIGN
10732 && fix
->fix_size
>= 8
10733 && mp
->fix_size
< 8)
10736 max_address
= mp
->max_address
;
10740 /* The value is not currently in the minipool, so we need to create
10741 a new entry for it. If MAX_MP is NULL, the entry will be put on
10742 the end of the list since the placement is less constrained than
10743 any existing entry. Otherwise, we insert the new fix before
10744 MAX_MP and, if necessary, adjust the constraints on the other
10747 mp
->fix_size
= fix
->fix_size
;
10748 mp
->mode
= fix
->mode
;
10749 mp
->value
= fix
->value
;
10751 /* Not yet required for a backwards ref. */
10752 mp
->min_address
= -65536;
10754 if (max_mp
== NULL
)
10756 mp
->max_address
= max_address
;
10758 mp
->prev
= minipool_vector_tail
;
10760 if (mp
->prev
== NULL
)
10762 minipool_vector_head
= mp
;
10763 minipool_vector_label
= gen_label_rtx ();
10766 mp
->prev
->next
= mp
;
10768 minipool_vector_tail
= mp
;
10772 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
10773 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
10775 mp
->max_address
= max_address
;
10778 mp
->prev
= max_mp
->prev
;
10780 if (mp
->prev
!= NULL
)
10781 mp
->prev
->next
= mp
;
10783 minipool_vector_head
= mp
;
10786 /* Save the new entry. */
10789 /* Scan over the preceding entries and adjust their addresses as
10791 while (mp
->prev
!= NULL
10792 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
10794 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
10802 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
10803 HOST_WIDE_INT min_address
)
10805 HOST_WIDE_INT offset
;
10807 /* The code below assumes these are different. */
10808 gcc_assert (mp
!= min_mp
);
10810 if (min_mp
== NULL
)
10812 if (min_address
> mp
->min_address
)
10813 mp
->min_address
= min_address
;
10817 /* We will adjust this below if it is too loose. */
10818 mp
->min_address
= min_address
;
10820 /* Unlink MP from its current position. Since min_mp is non-null,
10821 mp->next must be non-null. */
10822 mp
->next
->prev
= mp
->prev
;
10823 if (mp
->prev
!= NULL
)
10824 mp
->prev
->next
= mp
->next
;
10826 minipool_vector_head
= mp
->next
;
10828 /* Reinsert it after MIN_MP. */
10830 mp
->next
= min_mp
->next
;
10832 if (mp
->next
!= NULL
)
10833 mp
->next
->prev
= mp
;
10835 minipool_vector_tail
= mp
;
10841 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
10843 mp
->offset
= offset
;
10844 if (mp
->refcount
> 0)
10845 offset
+= mp
->fix_size
;
10847 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
10848 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
10854 /* Add a constant to the minipool for a backward reference. Returns the
10855 node added or NULL if the constant will not fit in this pool.
10857 Note that the code for insertion for a backwards reference can be
10858 somewhat confusing because the calculated offsets for each fix do
10859 not take into account the size of the pool (which is still under
10862 add_minipool_backward_ref (Mfix
*fix
)
10864 /* If set, min_mp is the last pool_entry that has a lower constraint
10865 than the one we are trying to add. */
10866 Mnode
*min_mp
= NULL
;
10867 /* This can be negative, since it is only a constraint. */
10868 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
10871 /* If we can't reach the current pool from this insn, or if we can't
10872 insert this entry at the end of the pool without pushing other
10873 fixes out of range, then we don't try. This ensures that we
10874 can't fail later on. */
10875 if (min_address
>= minipool_barrier
->address
10876 || (minipool_vector_tail
->min_address
+ fix
->fix_size
10877 >= minipool_barrier
->address
))
10880 /* Scan the pool to see if a constant with the same value has
10881 already been added. While we are doing this, also note the
10882 location where we must insert the constant if it doesn't already
10884 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
10886 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
10887 && fix
->mode
== mp
->mode
10888 && (GET_CODE (fix
->value
) != CODE_LABEL
10889 || (CODE_LABEL_NUMBER (fix
->value
)
10890 == CODE_LABEL_NUMBER (mp
->value
)))
10891 && rtx_equal_p (fix
->value
, mp
->value
)
10892 /* Check that there is enough slack to move this entry to the
10893 end of the table (this is conservative). */
10894 && (mp
->max_address
10895 > (minipool_barrier
->address
10896 + minipool_vector_tail
->offset
10897 + minipool_vector_tail
->fix_size
)))
10900 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
10903 if (min_mp
!= NULL
)
10904 mp
->min_address
+= fix
->fix_size
;
10907 /* Note the insertion point if necessary. */
10908 if (mp
->min_address
< min_address
)
10910 /* For now, we do not allow the insertion of 8-byte alignment
10911 requiring nodes anywhere but at the start of the pool. */
10912 if (ARM_DOUBLEWORD_ALIGN
10913 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
10918 else if (mp
->max_address
10919 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
10921 /* Inserting before this entry would push the fix beyond
10922 its maximum address (which can happen if we have
10923 re-located a forwards fix); force the new fix to come
10925 if (ARM_DOUBLEWORD_ALIGN
10926 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
10931 min_address
= mp
->min_address
+ fix
->fix_size
;
10934 /* Do not insert a non-8-byte aligned quantity before 8-byte
10935 aligned quantities. */
10936 else if (ARM_DOUBLEWORD_ALIGN
10937 && fix
->fix_size
< 8
10938 && mp
->fix_size
>= 8)
10941 min_address
= mp
->min_address
+ fix
->fix_size
;
10946 /* We need to create a new entry. */
10948 mp
->fix_size
= fix
->fix_size
;
10949 mp
->mode
= fix
->mode
;
10950 mp
->value
= fix
->value
;
10952 mp
->max_address
= minipool_barrier
->address
+ 65536;
10954 mp
->min_address
= min_address
;
10956 if (min_mp
== NULL
)
10959 mp
->next
= minipool_vector_head
;
10961 if (mp
->next
== NULL
)
10963 minipool_vector_tail
= mp
;
10964 minipool_vector_label
= gen_label_rtx ();
10967 mp
->next
->prev
= mp
;
10969 minipool_vector_head
= mp
;
10973 mp
->next
= min_mp
->next
;
10977 if (mp
->next
!= NULL
)
10978 mp
->next
->prev
= mp
;
10980 minipool_vector_tail
= mp
;
10983 /* Save the new entry. */
10991 /* Scan over the following entries and adjust their offsets. */
10992 while (mp
->next
!= NULL
)
10994 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
10995 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
10998 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
11000 mp
->next
->offset
= mp
->offset
;
11009 assign_minipool_offsets (Mfix
*barrier
)
11011 HOST_WIDE_INT offset
= 0;
11014 minipool_barrier
= barrier
;
11016 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11018 mp
->offset
= offset
;
11020 if (mp
->refcount
> 0)
11021 offset
+= mp
->fix_size
;
11025 /* Output the literal table */
11027 dump_minipool (rtx scan
)
11033 if (ARM_DOUBLEWORD_ALIGN
)
11034 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11035 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
11042 fprintf (dump_file
,
11043 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11044 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
11046 scan
= emit_label_after (gen_label_rtx (), scan
);
11047 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
11048 scan
= emit_label_after (minipool_vector_label
, scan
);
11050 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
11052 if (mp
->refcount
> 0)
11056 fprintf (dump_file
,
11057 ";; Offset %u, min %ld, max %ld ",
11058 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
11059 (unsigned long) mp
->max_address
);
11060 arm_print_value (dump_file
, mp
->value
);
11061 fputc ('\n', dump_file
);
11064 switch (mp
->fix_size
)
11066 #ifdef HAVE_consttable_1
11068 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
11072 #ifdef HAVE_consttable_2
11074 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
11078 #ifdef HAVE_consttable_4
11080 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
11084 #ifdef HAVE_consttable_8
11086 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
11090 #ifdef HAVE_consttable_16
11092 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
11097 gcc_unreachable ();
11105 minipool_vector_head
= minipool_vector_tail
= NULL
;
11106 scan
= emit_insn_after (gen_consttable_end (), scan
);
11107 scan
= emit_barrier_after (scan
);
11110 /* Return the cost of forcibly inserting a barrier after INSN. */
11112 arm_barrier_cost (rtx insn
)
11114 /* Basing the location of the pool on the loop depth is preferable,
11115 but at the moment, the basic block information seems to be
11116 corrupt by this stage of the compilation. */
11117 int base_cost
= 50;
11118 rtx next
= next_nonnote_insn (insn
);
11120 if (next
!= NULL
&& GET_CODE (next
) == CODE_LABEL
)
11123 switch (GET_CODE (insn
))
11126 /* It will always be better to place the table before the label, rather
11135 return base_cost
- 10;
11138 return base_cost
+ 10;
11142 /* Find the best place in the insn stream in the range
11143 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11144 Create the barrier by inserting a jump and add a new fix entry for
11147 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
11149 HOST_WIDE_INT count
= 0;
11151 rtx from
= fix
->insn
;
11152 /* The instruction after which we will insert the jump. */
11153 rtx selected
= NULL
;
11155 /* The address at which the jump instruction will be placed. */
11156 HOST_WIDE_INT selected_address
;
11158 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
11159 rtx label
= gen_label_rtx ();
11161 selected_cost
= arm_barrier_cost (from
);
11162 selected_address
= fix
->address
;
11164 while (from
&& count
< max_count
)
11169 /* This code shouldn't have been called if there was a natural barrier
11171 gcc_assert (GET_CODE (from
) != BARRIER
);
11173 /* Count the length of this insn. */
11174 count
+= get_attr_length (from
);
11176 /* If there is a jump table, add its length. */
11177 tmp
= is_jump_table (from
);
11180 count
+= get_jump_table_size (tmp
);
11182 /* Jump tables aren't in a basic block, so base the cost on
11183 the dispatch insn. If we select this location, we will
11184 still put the pool after the table. */
11185 new_cost
= arm_barrier_cost (from
);
11187 if (count
< max_count
11188 && (!selected
|| new_cost
<= selected_cost
))
11191 selected_cost
= new_cost
;
11192 selected_address
= fix
->address
+ count
;
11195 /* Continue after the dispatch table. */
11196 from
= NEXT_INSN (tmp
);
11200 new_cost
= arm_barrier_cost (from
);
11202 if (count
< max_count
11203 && (!selected
|| new_cost
<= selected_cost
))
11206 selected_cost
= new_cost
;
11207 selected_address
= fix
->address
+ count
;
11210 from
= NEXT_INSN (from
);
11213 /* Make sure that we found a place to insert the jump. */
11214 gcc_assert (selected
);
11216 /* Create a new JUMP_INSN that branches around a barrier. */
11217 from
= emit_jump_insn_after (gen_jump (label
), selected
);
11218 JUMP_LABEL (from
) = label
;
11219 barrier
= emit_barrier_after (from
);
11220 emit_label_after (label
, barrier
);
11222 /* Create a minipool barrier entry for the new barrier. */
11223 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
11224 new_fix
->insn
= barrier
;
11225 new_fix
->address
= selected_address
;
11226 new_fix
->next
= fix
->next
;
11227 fix
->next
= new_fix
;
11232 /* Record that there is a natural barrier in the insn stream at
11235 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
11237 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
11240 fix
->address
= address
;
11243 if (minipool_fix_head
!= NULL
)
11244 minipool_fix_tail
->next
= fix
;
11246 minipool_fix_head
= fix
;
11248 minipool_fix_tail
= fix
;
11251 /* Record INSN, which will need fixing up to load a value from the
11252 minipool. ADDRESS is the offset of the insn since the start of the
11253 function; LOC is a pointer to the part of the insn which requires
11254 fixing; VALUE is the constant that must be loaded, which is of type
11257 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
11258 enum machine_mode mode
, rtx value
)
11260 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
11263 fix
->address
= address
;
11266 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
11267 fix
->value
= value
;
11268 fix
->forwards
= get_attr_pool_range (insn
);
11269 fix
->backwards
= get_attr_neg_pool_range (insn
);
11270 fix
->minipool
= NULL
;
11272 /* If an insn doesn't have a range defined for it, then it isn't
11273 expecting to be reworked by this code. Better to stop now than
11274 to generate duff assembly code. */
11275 gcc_assert (fix
->forwards
|| fix
->backwards
);
11277 /* If an entry requires 8-byte alignment then assume all constant pools
11278 require 4 bytes of padding. Trying to do this later on a per-pool
11279 basis is awkward because existing pool entries have to be modified. */
11280 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
11285 fprintf (dump_file
,
11286 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11287 GET_MODE_NAME (mode
),
11288 INSN_UID (insn
), (unsigned long) address
,
11289 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
11290 arm_print_value (dump_file
, fix
->value
);
11291 fprintf (dump_file
, "\n");
11294 /* Add it to the chain of fixes. */
11297 if (minipool_fix_head
!= NULL
)
11298 minipool_fix_tail
->next
= fix
;
11300 minipool_fix_head
= fix
;
11302 minipool_fix_tail
= fix
;
11305 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11306 Returns the number of insns needed, or 99 if we don't know how to
11309 arm_const_double_inline_cost (rtx val
)
11311 rtx lowpart
, highpart
;
11312 enum machine_mode mode
;
11314 mode
= GET_MODE (val
);
11316 if (mode
== VOIDmode
)
11319 gcc_assert (GET_MODE_SIZE (mode
) == 8);
11321 lowpart
= gen_lowpart (SImode
, val
);
11322 highpart
= gen_highpart_mode (SImode
, mode
, val
);
11324 gcc_assert (GET_CODE (lowpart
) == CONST_INT
);
11325 gcc_assert (GET_CODE (highpart
) == CONST_INT
);
11327 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
11328 NULL_RTX
, NULL_RTX
, 0, 0)
11329 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
11330 NULL_RTX
, NULL_RTX
, 0, 0));
11333 /* Return true if it is worthwhile to split a 64-bit constant into two
11334 32-bit operations. This is the case if optimizing for size, or
11335 if we have load delay slots, or if one 32-bit part can be done with
11336 a single data operation. */
11338 arm_const_double_by_parts (rtx val
)
11340 enum machine_mode mode
= GET_MODE (val
);
11343 if (optimize_size
|| arm_ld_sched
)
11346 if (mode
== VOIDmode
)
11349 part
= gen_highpart_mode (SImode
, mode
, val
);
11351 gcc_assert (GET_CODE (part
) == CONST_INT
);
11353 if (const_ok_for_arm (INTVAL (part
))
11354 || const_ok_for_arm (~INTVAL (part
)))
11357 part
= gen_lowpart (SImode
, val
);
11359 gcc_assert (GET_CODE (part
) == CONST_INT
);
11361 if (const_ok_for_arm (INTVAL (part
))
11362 || const_ok_for_arm (~INTVAL (part
)))
11368 /* Scan INSN and note any of its operands that need fixing.
11369 If DO_PUSHES is false we do not actually push any of the fixups
11370 needed. The function returns TRUE if any fixups were needed/pushed.
11371 This is used by arm_memory_load_p() which needs to know about loads
11372 of constants that will be converted into minipool loads. */
11374 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
11376 bool result
= false;
11379 extract_insn (insn
);
11381 if (!constrain_operands (1))
11382 fatal_insn_not_found (insn
);
11384 if (recog_data
.n_alternatives
== 0)
11387 /* Fill in recog_op_alt with information about the constraints of
11389 preprocess_constraints ();
11391 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11393 /* Things we need to fix can only occur in inputs. */
11394 if (recog_data
.operand_type
[opno
] != OP_IN
)
11397 /* If this alternative is a memory reference, then any mention
11398 of constants in this alternative is really to fool reload
11399 into allowing us to accept one there. We need to fix them up
11400 now so that we output the right code. */
11401 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
11403 rtx op
= recog_data
.operand
[opno
];
11405 if (CONSTANT_P (op
))
11408 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
11409 recog_data
.operand_mode
[opno
], op
);
11412 else if (GET_CODE (op
) == MEM
11413 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
11414 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
11418 rtx cop
= avoid_constant_pool_reference (op
);
11420 /* Casting the address of something to a mode narrower
11421 than a word can cause avoid_constant_pool_reference()
11422 to return the pool reference itself. That's no good to
11423 us here. Lets just hope that we can use the
11424 constant pool value directly. */
11426 cop
= get_pool_constant (XEXP (op
, 0));
11428 push_minipool_fix (insn
, address
,
11429 recog_data
.operand_loc
[opno
],
11430 recog_data
.operand_mode
[opno
], cop
);
11441 /* Gcc puts the pool in the wrong place for ARM, since we can only
11442 load addresses a limited distance around the pc. We do some
11443 special munging to move the constant pool values to the correct
11444 point in the code. */
11449 HOST_WIDE_INT address
= 0;
11452 minipool_fix_head
= minipool_fix_tail
= NULL
;
11454 /* The first insn must always be a note, or the code below won't
11455 scan it properly. */
11456 insn
= get_insns ();
11457 gcc_assert (GET_CODE (insn
) == NOTE
);
11460 /* Scan all the insns and record the operands that will need fixing. */
11461 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
11463 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11464 && (arm_cirrus_insn_p (insn
)
11465 || GET_CODE (insn
) == JUMP_INSN
11466 || arm_memory_load_p (insn
)))
11467 cirrus_reorg (insn
);
11469 if (GET_CODE (insn
) == BARRIER
)
11470 push_minipool_barrier (insn
, address
);
11471 else if (INSN_P (insn
))
11475 note_invalid_constants (insn
, address
, true);
11476 address
+= get_attr_length (insn
);
11478 /* If the insn is a vector jump, add the size of the table
11479 and skip the table. */
11480 if ((table
= is_jump_table (insn
)) != NULL
)
11482 address
+= get_jump_table_size (table
);
11488 fix
= minipool_fix_head
;
11490 /* Now scan the fixups and perform the required changes. */
11495 Mfix
* last_added_fix
;
11496 Mfix
* last_barrier
= NULL
;
11499 /* Skip any further barriers before the next fix. */
11500 while (fix
&& GET_CODE (fix
->insn
) == BARRIER
)
11503 /* No more fixes. */
11507 last_added_fix
= NULL
;
11509 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
11511 if (GET_CODE (ftmp
->insn
) == BARRIER
)
11513 if (ftmp
->address
>= minipool_vector_head
->max_address
)
11516 last_barrier
= ftmp
;
11518 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
11521 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
11524 /* If we found a barrier, drop back to that; any fixes that we
11525 could have reached but come after the barrier will now go in
11526 the next mini-pool. */
11527 if (last_barrier
!= NULL
)
11529 /* Reduce the refcount for those fixes that won't go into this
11531 for (fdel
= last_barrier
->next
;
11532 fdel
&& fdel
!= ftmp
;
11535 fdel
->minipool
->refcount
--;
11536 fdel
->minipool
= NULL
;
11539 ftmp
= last_barrier
;
11543 /* ftmp is first fix that we can't fit into this pool and
11544 there no natural barriers that we could use. Insert a
11545 new barrier in the code somewhere between the previous
11546 fix and this one, and arrange to jump around it. */
11547 HOST_WIDE_INT max_address
;
11549 /* The last item on the list of fixes must be a barrier, so
11550 we can never run off the end of the list of fixes without
11551 last_barrier being set. */
11554 max_address
= minipool_vector_head
->max_address
;
11555 /* Check that there isn't another fix that is in range that
11556 we couldn't fit into this pool because the pool was
11557 already too large: we need to put the pool before such an
11558 instruction. The pool itself may come just after the
11559 fix because create_fix_barrier also allows space for a
11560 jump instruction. */
11561 if (ftmp
->address
< max_address
)
11562 max_address
= ftmp
->address
+ 1;
11564 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
11567 assign_minipool_offsets (last_barrier
);
11571 if (GET_CODE (ftmp
->insn
) != BARRIER
11572 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
11579 /* Scan over the fixes we have identified for this pool, fixing them
11580 up and adding the constants to the pool itself. */
11581 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
11582 this_fix
= this_fix
->next
)
11583 if (GET_CODE (this_fix
->insn
) != BARRIER
)
11586 = plus_constant (gen_rtx_LABEL_REF (VOIDmode
,
11587 minipool_vector_label
),
11588 this_fix
->minipool
->offset
);
11589 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
11592 dump_minipool (last_barrier
->insn
);
11596 /* From now on we must synthesize any constants that we can't handle
11597 directly. This can happen if the RTL gets split during final
11598 instruction generation. */
11599 after_arm_reorg
= 1;
11601 /* Free the minipool memory. */
11602 obstack_free (&minipool_obstack
, minipool_startobj
);
11605 /* Routines to output assembly language. */
11607 /* If the rtx is the correct value then return the string of the number.
11608 In this way we can ensure that valid double constants are generated even
11609 when cross compiling. */
11611 fp_immediate_constant (rtx x
)
11616 if (!fp_consts_inited
)
11619 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11620 for (i
= 0; i
< 8; i
++)
11621 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
11622 return strings_fp
[i
];
11624 gcc_unreachable ();
11627 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11628 static const char *
11629 fp_const_from_val (REAL_VALUE_TYPE
*r
)
11633 if (!fp_consts_inited
)
11636 for (i
= 0; i
< 8; i
++)
11637 if (REAL_VALUES_EQUAL (*r
, values_fp
[i
]))
11638 return strings_fp
[i
];
11640 gcc_unreachable ();
11643 /* Output the operands of a LDM/STM instruction to STREAM.
11644 MASK is the ARM register set mask of which only bits 0-15 are important.
11645 REG is the base register, either the frame pointer or the stack pointer,
11646 INSTR is the possibly suffixed load or store instruction.
11647 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11650 print_multi_reg (FILE *stream
, const char *instr
, unsigned reg
,
11651 unsigned long mask
, int rfe
)
11654 bool not_first
= FALSE
;
11656 gcc_assert (!rfe
|| (mask
& (1 << PC_REGNUM
)));
11657 fputc ('\t', stream
);
11658 asm_fprintf (stream
, instr
, reg
);
11659 fputc ('{', stream
);
11661 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
11662 if (mask
& (1 << i
))
11665 fprintf (stream
, ", ");
11667 asm_fprintf (stream
, "%r", i
);
11672 fprintf (stream
, "}^\n");
11674 fprintf (stream
, "}\n");
11678 /* Output a FLDMD instruction to STREAM.
11679 BASE if the register containing the address.
11680 REG and COUNT specify the register range.
11681 Extra registers may be added to avoid hardware bugs.
11683 We output FLDMD even for ARMv5 VFP implementations. Although
11684 FLDMD is technically not supported until ARMv6, it is believed
11685 that all VFP implementations support its use in this context. */
11688 vfp_output_fldmd (FILE * stream
, unsigned int base
, int reg
, int count
)
11692 /* Workaround ARM10 VFPr1 bug. */
11693 if (count
== 2 && !arm_arch6
)
11700 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11701 load into multiple parts if we have to handle more than 16 registers. */
11704 vfp_output_fldmd (stream
, base
, reg
, 16);
11705 vfp_output_fldmd (stream
, base
, reg
+ 16, count
- 16);
11709 fputc ('\t', stream
);
11710 asm_fprintf (stream
, "fldmfdd\t%r!, {", base
);
11712 for (i
= reg
; i
< reg
+ count
; i
++)
11715 fputs (", ", stream
);
11716 asm_fprintf (stream
, "d%d", i
);
11718 fputs ("}\n", stream
);
11723 /* Output the assembly for a store multiple. */
11726 vfp_output_fstmd (rtx
* operands
)
11733 strcpy (pattern
, "fstmfdd\t%m0!, {%P1");
11734 p
= strlen (pattern
);
11736 gcc_assert (GET_CODE (operands
[1]) == REG
);
11738 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
11739 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
11741 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
11743 strcpy (&pattern
[p
], "}");
11745 output_asm_insn (pattern
, operands
);
11750 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11751 number of bytes pushed. */
11754 vfp_emit_fstmd (int base_reg
, int count
)
11761 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11762 register pairs are stored by a store multiple insn. We avoid this
11763 by pushing an extra pair. */
11764 if (count
== 2 && !arm_arch6
)
11766 if (base_reg
== LAST_VFP_REGNUM
- 3)
11771 /* FSTMD may not store more than 16 doubleword registers at once. Split
11772 larger stores into multiple parts (up to a maximum of two, in
11777 /* NOTE: base_reg is an internal register number, so each D register
11779 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
11780 saved
+= vfp_emit_fstmd (base_reg
, 16);
11784 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
11785 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
11787 reg
= gen_rtx_REG (DFmode
, base_reg
);
11790 XVECEXP (par
, 0, 0)
11791 = gen_rtx_SET (VOIDmode
,
11794 gen_rtx_PRE_MODIFY (Pmode
,
11797 (stack_pointer_rtx
,
11800 gen_rtx_UNSPEC (BLKmode
,
11801 gen_rtvec (1, reg
),
11802 UNSPEC_PUSH_MULT
));
11804 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
11805 plus_constant (stack_pointer_rtx
, -(count
* 8)));
11806 RTX_FRAME_RELATED_P (tmp
) = 1;
11807 XVECEXP (dwarf
, 0, 0) = tmp
;
11809 tmp
= gen_rtx_SET (VOIDmode
,
11810 gen_frame_mem (DFmode
, stack_pointer_rtx
),
11812 RTX_FRAME_RELATED_P (tmp
) = 1;
11813 XVECEXP (dwarf
, 0, 1) = tmp
;
11815 for (i
= 1; i
< count
; i
++)
11817 reg
= gen_rtx_REG (DFmode
, base_reg
);
11819 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
11821 tmp
= gen_rtx_SET (VOIDmode
,
11822 gen_frame_mem (DFmode
,
11823 plus_constant (stack_pointer_rtx
,
11826 RTX_FRAME_RELATED_P (tmp
) = 1;
11827 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
11830 par
= emit_insn (par
);
11831 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
11832 RTX_FRAME_RELATED_P (par
) = 1;
11837 /* Emit a call instruction with pattern PAT. ADDR is the address of
11838 the call target. */
11841 arm_emit_call_insn (rtx pat
, rtx addr
)
11845 insn
= emit_call_insn (pat
);
11847 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11848 If the call might use such an entry, add a use of the PIC register
11849 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11850 if (TARGET_VXWORKS_RTP
11852 && GET_CODE (addr
) == SYMBOL_REF
11853 && (SYMBOL_REF_DECL (addr
)
11854 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
11855 : !SYMBOL_REF_LOCAL_P (addr
)))
11857 require_pic_register ();
11858 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
11862 /* Output a 'call' insn. */
11864 output_call (rtx
*operands
)
11866 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
11868 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11869 if (REGNO (operands
[0]) == LR_REGNUM
)
11871 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
11872 output_asm_insn ("mov%?\t%0, %|lr", operands
);
11875 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
11877 if (TARGET_INTERWORK
|| arm_arch4t
)
11878 output_asm_insn ("bx%?\t%0", operands
);
11880 output_asm_insn ("mov%?\t%|pc, %0", operands
);
11885 /* Output a 'call' insn that is a reference in memory. This is
11886 disabled for ARMv5 and we prefer a blx instead because otherwise
11887 there's a significant performance overhead. */
11889 output_call_mem (rtx
*operands
)
11891 gcc_assert (!arm_arch5
);
11892 if (TARGET_INTERWORK
)
11894 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
11895 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
11896 output_asm_insn ("bx%?\t%|ip", operands
);
11898 else if (regno_use_in (LR_REGNUM
, operands
[0]))
11900 /* LR is used in the memory address. We load the address in the
11901 first instruction. It's safe to use IP as the target of the
11902 load since the call will kill it anyway. */
11903 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
11904 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
11906 output_asm_insn ("bx%?\t%|ip", operands
);
11908 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
11912 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
11913 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
11920 /* Output a move from arm registers to an fpa registers.
11921 OPERANDS[0] is an fpa register.
11922 OPERANDS[1] is the first registers of an arm register pair. */
11924 output_mov_long_double_fpa_from_arm (rtx
*operands
)
11926 int arm_reg0
= REGNO (operands
[1]);
11929 gcc_assert (arm_reg0
!= IP_REGNUM
);
11931 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
11932 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
11933 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
11935 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
11936 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands
);
11941 /* Output a move from an fpa register to arm registers.
11942 OPERANDS[0] is the first registers of an arm register pair.
11943 OPERANDS[1] is an fpa register. */
11945 output_mov_long_double_arm_from_fpa (rtx
*operands
)
11947 int arm_reg0
= REGNO (operands
[0]);
11950 gcc_assert (arm_reg0
!= IP_REGNUM
);
11952 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
11953 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
11954 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
11956 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands
);
11957 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
11961 /* Output a move from arm registers to arm registers of a long double
11962 OPERANDS[0] is the destination.
11963 OPERANDS[1] is the source. */
11965 output_mov_long_double_arm_from_arm (rtx
*operands
)
11967 /* We have to be careful here because the two might overlap. */
11968 int dest_start
= REGNO (operands
[0]);
11969 int src_start
= REGNO (operands
[1]);
11973 if (dest_start
< src_start
)
11975 for (i
= 0; i
< 3; i
++)
11977 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
11978 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
11979 output_asm_insn ("mov%?\t%0, %1", ops
);
11984 for (i
= 2; i
>= 0; i
--)
11986 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
11987 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
11988 output_asm_insn ("mov%?\t%0, %1", ops
);
11996 arm_emit_movpair (rtx dest
, rtx src
)
11998 /* If the src is an immediate, simplify it. */
11999 if (CONST_INT_P (src
))
12001 HOST_WIDE_INT val
= INTVAL (src
);
12002 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
12003 if ((val
>> 16) & 0x0000ffff)
12004 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
12006 GEN_INT ((val
>> 16) & 0x0000ffff));
12009 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
12010 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
12013 /* Output a move from arm registers to an fpa registers.
12014 OPERANDS[0] is an fpa register.
12015 OPERANDS[1] is the first registers of an arm register pair. */
12017 output_mov_double_fpa_from_arm (rtx
*operands
)
12019 int arm_reg0
= REGNO (operands
[1]);
12022 gcc_assert (arm_reg0
!= IP_REGNUM
);
12024 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12025 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12026 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops
);
12027 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands
);
12031 /* Output a move from an fpa register to arm registers.
12032 OPERANDS[0] is the first registers of an arm register pair.
12033 OPERANDS[1] is an fpa register. */
12035 output_mov_double_arm_from_fpa (rtx
*operands
)
12037 int arm_reg0
= REGNO (operands
[0]);
12040 gcc_assert (arm_reg0
!= IP_REGNUM
);
12042 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12043 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12044 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands
);
12045 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops
);
12049 /* Output a move between double words.
12050 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12051 or MEM<-REG and all MEMs must be offsettable addresses. */
12053 output_move_double (rtx
*operands
)
12055 enum rtx_code code0
= GET_CODE (operands
[0]);
12056 enum rtx_code code1
= GET_CODE (operands
[1]);
12061 unsigned int reg0
= REGNO (operands
[0]);
12063 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
12065 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
12067 switch (GET_CODE (XEXP (operands
[1], 0)))
12071 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
12072 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
12074 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
12078 gcc_assert (TARGET_LDRD
);
12079 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
12084 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
12086 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
12091 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
12093 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
12097 gcc_assert (TARGET_LDRD
);
12098 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
12103 /* Autoicrement addressing modes should never have overlapping
12104 base and destination registers, and overlapping index registers
12105 are already prohibited, so this doesn't need to worry about
12107 otherops
[0] = operands
[0];
12108 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
12109 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
12111 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
12113 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
12115 /* Registers overlap so split out the increment. */
12116 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
12117 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
12121 /* Use a single insn if we can.
12122 FIXME: IWMMXT allows offsets larger than ldrd can
12123 handle, fix these up with a pair of ldr. */
12125 || GET_CODE (otherops
[2]) != CONST_INT
12126 || (INTVAL (otherops
[2]) > -256
12127 && INTVAL (otherops
[2]) < 256))
12128 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
12131 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
12132 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12138 /* Use a single insn if we can.
12139 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12140 fix these up with a pair of ldr. */
12142 || GET_CODE (otherops
[2]) != CONST_INT
12143 || (INTVAL (otherops
[2]) > -256
12144 && INTVAL (otherops
[2]) < 256))
12145 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
12148 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12149 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
12156 /* We might be able to use ldrd %0, %1 here. However the range is
12157 different to ldr/adr, and it is broken on some ARMv7-M
12158 implementations. */
12159 /* Use the second register of the pair to avoid problematic
12161 otherops
[1] = operands
[1];
12162 output_asm_insn ("adr%?\t%0, %1", otherops
);
12163 operands
[1] = otherops
[0];
12165 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
12167 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
12170 /* ??? This needs checking for thumb2. */
12172 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
12173 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
12175 otherops
[0] = operands
[0];
12176 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
12177 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
12179 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
12181 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
12183 switch ((int) INTVAL (otherops
[2]))
12186 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
12191 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
12196 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
12200 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
12201 operands
[1] = otherops
[0];
12203 && (GET_CODE (otherops
[2]) == REG
12205 || (GET_CODE (otherops
[2]) == CONST_INT
12206 && INTVAL (otherops
[2]) > -256
12207 && INTVAL (otherops
[2]) < 256)))
12209 if (reg_overlap_mentioned_p (operands
[0],
12213 /* Swap base and index registers over to
12214 avoid a conflict. */
12216 otherops
[1] = otherops
[2];
12219 /* If both registers conflict, it will usually
12220 have been fixed by a splitter. */
12221 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
12222 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
12224 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
12225 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
12229 otherops
[0] = operands
[0];
12230 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
12235 if (GET_CODE (otherops
[2]) == CONST_INT
)
12237 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
12238 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
12240 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
12243 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
12246 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
12249 return "ldr%(d%)\t%0, [%1]";
12251 return "ldm%(ia%)\t%1, %M0";
12255 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
12256 /* Take care of overlapping base/data reg. */
12257 if (reg_mentioned_p (operands
[0], operands
[1]))
12259 output_asm_insn ("ldr%?\t%0, %1", otherops
);
12260 output_asm_insn ("ldr%?\t%0, %1", operands
);
12264 output_asm_insn ("ldr%?\t%0, %1", operands
);
12265 output_asm_insn ("ldr%?\t%0, %1", otherops
);
12272 /* Constraints should ensure this. */
12273 gcc_assert (code0
== MEM
&& code1
== REG
);
12274 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
12276 switch (GET_CODE (XEXP (operands
[0], 0)))
12280 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
12282 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
12286 gcc_assert (TARGET_LDRD
);
12287 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
12292 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
12294 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
12299 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
12301 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
12305 gcc_assert (TARGET_LDRD
);
12306 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
12311 otherops
[0] = operands
[1];
12312 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
12313 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
12315 /* IWMMXT allows offsets larger than ldrd can handle,
12316 fix these up with a pair of ldr. */
12318 && GET_CODE (otherops
[2]) == CONST_INT
12319 && (INTVAL(otherops
[2]) <= -256
12320 || INTVAL(otherops
[2]) >= 256))
12322 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
12324 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
12325 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12329 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12330 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
12333 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
12334 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
12336 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
12340 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
12341 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
12343 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
12346 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
12352 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
12358 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
12363 && (GET_CODE (otherops
[2]) == REG
12365 || (GET_CODE (otherops
[2]) == CONST_INT
12366 && INTVAL (otherops
[2]) > -256
12367 && INTVAL (otherops
[2]) < 256)))
12369 otherops
[0] = operands
[1];
12370 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
12371 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
12377 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
12378 otherops
[1] = operands
[1];
12379 output_asm_insn ("str%?\t%1, %0", operands
);
12380 output_asm_insn ("str%?\t%H1, %0", otherops
);
12387 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12388 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12391 output_move_quad (rtx
*operands
)
12393 if (REG_P (operands
[0]))
12395 /* Load, or reg->reg move. */
12397 if (MEM_P (operands
[1]))
12399 switch (GET_CODE (XEXP (operands
[1], 0)))
12402 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
12407 output_asm_insn ("adr%?\t%0, %1", operands
);
12408 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
12412 gcc_unreachable ();
12420 gcc_assert (REG_P (operands
[1]));
12422 dest
= REGNO (operands
[0]);
12423 src
= REGNO (operands
[1]);
12425 /* This seems pretty dumb, but hopefully GCC won't try to do it
12428 for (i
= 0; i
< 4; i
++)
12430 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
12431 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
12432 output_asm_insn ("mov%?\t%0, %1", ops
);
12435 for (i
= 3; i
>= 0; i
--)
12437 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
12438 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
12439 output_asm_insn ("mov%?\t%0, %1", ops
);
12445 gcc_assert (MEM_P (operands
[0]));
12446 gcc_assert (REG_P (operands
[1]));
12447 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
12449 switch (GET_CODE (XEXP (operands
[0], 0)))
12452 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
12456 gcc_unreachable ();
12463 /* Output a VFP load or store instruction. */
12466 output_move_vfp (rtx
*operands
)
12468 rtx reg
, mem
, addr
, ops
[2];
12469 int load
= REG_P (operands
[0]);
12470 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
12471 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
12474 enum machine_mode mode
;
12476 reg
= operands
[!load
];
12477 mem
= operands
[load
];
12479 mode
= GET_MODE (reg
);
12481 gcc_assert (REG_P (reg
));
12482 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
12483 gcc_assert (mode
== SFmode
12487 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
12488 gcc_assert (MEM_P (mem
));
12490 addr
= XEXP (mem
, 0);
12492 switch (GET_CODE (addr
))
12495 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12496 ops
[0] = XEXP (addr
, 0);
12501 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
12502 ops
[0] = XEXP (addr
, 0);
12507 templ
= "f%s%c%%?\t%%%s0, %%1%s";
12513 sprintf (buff
, templ
,
12514 load
? "ld" : "st",
12517 integer_p
? "\t%@ int" : "");
12518 output_asm_insn (buff
, ops
);
12523 /* Output a Neon quad-word load or store, or a load or store for
12524 larger structure modes.
12526 WARNING: The ordering of elements is weird in big-endian mode,
12527 because we use VSTM, as required by the EABI. GCC RTL defines
12528 element ordering based on in-memory order. This can be differ
12529 from the architectural ordering of elements within a NEON register.
12530 The intrinsics defined in arm_neon.h use the NEON register element
12531 ordering, not the GCC RTL element ordering.
12533 For example, the in-memory ordering of a big-endian a quadword
12534 vector with 16-bit elements when stored from register pair {d0,d1}
12535 will be (lowest address first, d0[N] is NEON register element N):
12537 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12539 When necessary, quadword registers (dN, dN+1) are moved to ARM
12540 registers from rN in the order:
12542 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12544 So that STM/LDM can be used on vectors in ARM registers, and the
12545 same memory layout will result as if VSTM/VLDM were used. */
12548 output_move_neon (rtx
*operands
)
12550 rtx reg
, mem
, addr
, ops
[2];
12551 int regno
, load
= REG_P (operands
[0]);
12554 enum machine_mode mode
;
12556 reg
= operands
[!load
];
12557 mem
= operands
[load
];
12559 mode
= GET_MODE (reg
);
12561 gcc_assert (REG_P (reg
));
12562 regno
= REGNO (reg
);
12563 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
12564 || NEON_REGNO_OK_FOR_QUAD (regno
));
12565 gcc_assert (VALID_NEON_DREG_MODE (mode
)
12566 || VALID_NEON_QREG_MODE (mode
)
12567 || VALID_NEON_STRUCT_MODE (mode
));
12568 gcc_assert (MEM_P (mem
));
12570 addr
= XEXP (mem
, 0);
12572 /* Strip off const from addresses like (const (plus (...))). */
12573 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
12574 addr
= XEXP (addr
, 0);
12576 switch (GET_CODE (addr
))
12579 templ
= "v%smia%%?\t%%0!, %%h1";
12580 ops
[0] = XEXP (addr
, 0);
12585 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12586 templ
= "v%smdb%%?\t%%0!, %%h1";
12587 ops
[0] = XEXP (addr
, 0);
12592 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12593 gcc_unreachable ();
12598 int nregs
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
12601 for (i
= 0; i
< nregs
; i
++)
12603 /* We're only using DImode here because it's a convenient size. */
12604 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
12605 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
12606 if (reg_overlap_mentioned_p (ops
[0], mem
))
12608 gcc_assert (overlap
== -1);
12613 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
12614 output_asm_insn (buff
, ops
);
12619 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
12620 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
12621 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
12622 output_asm_insn (buff
, ops
);
12629 templ
= "v%smia%%?\t%%m0, %%h1";
12634 sprintf (buff
, templ
, load
? "ld" : "st");
12635 output_asm_insn (buff
, ops
);
12640 /* Output an ADD r, s, #n where n may be too big for one instruction.
12641 If adding zero to one register, output nothing. */
12643 output_add_immediate (rtx
*operands
)
12645 HOST_WIDE_INT n
= INTVAL (operands
[2]);
12647 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
12650 output_multi_immediate (operands
,
12651 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12654 output_multi_immediate (operands
,
12655 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12662 /* Output a multiple immediate operation.
12663 OPERANDS is the vector of operands referred to in the output patterns.
12664 INSTR1 is the output pattern to use for the first constant.
12665 INSTR2 is the output pattern to use for subsequent constants.
12666 IMMED_OP is the index of the constant slot in OPERANDS.
12667 N is the constant value. */
12668 static const char *
12669 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
12670 int immed_op
, HOST_WIDE_INT n
)
12672 #if HOST_BITS_PER_WIDE_INT > 32
12678 /* Quick and easy output. */
12679 operands
[immed_op
] = const0_rtx
;
12680 output_asm_insn (instr1
, operands
);
12685 const char * instr
= instr1
;
12687 /* Note that n is never zero here (which would give no output). */
12688 for (i
= 0; i
< 32; i
+= 2)
12692 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
12693 output_asm_insn (instr
, operands
);
12703 /* Return the name of a shifter operation. */
12704 static const char *
12705 arm_shift_nmem(enum rtx_code code
)
12710 return ARM_LSL_NAME
;
12726 /* Return the appropriate ARM instruction for the operation code.
12727 The returned result should not be overwritten. OP is the rtx of the
12728 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12731 arithmetic_instr (rtx op
, int shift_first_arg
)
12733 switch (GET_CODE (op
))
12739 return shift_first_arg
? "rsb" : "sub";
12754 return arm_shift_nmem(GET_CODE(op
));
12757 gcc_unreachable ();
12761 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12762 for the operation code. The returned result should not be overwritten.
12763 OP is the rtx code of the shift.
12764 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12766 static const char *
12767 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
12770 enum rtx_code code
= GET_CODE (op
);
12772 switch (GET_CODE (XEXP (op
, 1)))
12780 *amountp
= INTVAL (XEXP (op
, 1));
12784 gcc_unreachable ();
12790 gcc_assert (*amountp
!= -1);
12791 *amountp
= 32 - *amountp
;
12794 /* Fall through. */
12800 mnem
= arm_shift_nmem(code
);
12804 /* We never have to worry about the amount being other than a
12805 power of 2, since this case can never be reloaded from a reg. */
12806 gcc_assert (*amountp
!= -1);
12807 *amountp
= int_log2 (*amountp
);
12808 return ARM_LSL_NAME
;
12811 gcc_unreachable ();
12814 if (*amountp
!= -1)
12816 /* This is not 100% correct, but follows from the desire to merge
12817 multiplication by a power of 2 with the recognizer for a
12818 shift. >=32 is not a valid shift for "lsl", so we must try and
12819 output a shift that produces the correct arithmetical result.
12820 Using lsr #32 is identical except for the fact that the carry bit
12821 is not set correctly if we set the flags; but we never use the
12822 carry bit from such an operation, so we can ignore that. */
12823 if (code
== ROTATERT
)
12824 /* Rotate is just modulo 32. */
12826 else if (*amountp
!= (*amountp
& 31))
12828 if (code
== ASHIFT
)
12833 /* Shifts of 0 are no-ops. */
12841 /* Obtain the shift from the POWER of two. */
12843 static HOST_WIDE_INT
12844 int_log2 (HOST_WIDE_INT power
)
12846 HOST_WIDE_INT shift
= 0;
12848 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
12850 gcc_assert (shift
<= 31);
12857 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12858 because /bin/as is horribly restrictive. The judgement about
12859 whether or not each character is 'printable' (and can be output as
12860 is) or not (and must be printed with an octal escape) must be made
12861 with reference to the *host* character set -- the situation is
12862 similar to that discussed in the comments above pp_c_char in
12863 c-pretty-print.c. */
12865 #define MAX_ASCII_LEN 51
12868 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
12871 int len_so_far
= 0;
12873 fputs ("\t.ascii\t\"", stream
);
12875 for (i
= 0; i
< len
; i
++)
12879 if (len_so_far
>= MAX_ASCII_LEN
)
12881 fputs ("\"\n\t.ascii\t\"", stream
);
12887 if (c
== '\\' || c
== '\"')
12889 putc ('\\', stream
);
12897 fprintf (stream
, "\\%03o", c
);
12902 fputs ("\"\n", stream
);
12905 /* Compute the register save mask for registers 0 through 12
12906 inclusive. This code is used by arm_compute_save_reg_mask. */
12908 static unsigned long
12909 arm_compute_save_reg0_reg12_mask (void)
12911 unsigned long func_type
= arm_current_func_type ();
12912 unsigned long save_reg_mask
= 0;
12915 if (IS_INTERRUPT (func_type
))
12917 unsigned int max_reg
;
12918 /* Interrupt functions must not corrupt any registers,
12919 even call clobbered ones. If this is a leaf function
12920 we can just examine the registers used by the RTL, but
12921 otherwise we have to assume that whatever function is
12922 called might clobber anything, and so we have to save
12923 all the call-clobbered registers as well. */
12924 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
12925 /* FIQ handlers have registers r8 - r12 banked, so
12926 we only need to check r0 - r7, Normal ISRs only
12927 bank r14 and r15, so we must check up to r12.
12928 r13 is the stack pointer which is always preserved,
12929 so we do not need to consider it here. */
12934 for (reg
= 0; reg
<= max_reg
; reg
++)
12935 if (df_regs_ever_live_p (reg
)
12936 || (! current_function_is_leaf
&& call_used_regs
[reg
]))
12937 save_reg_mask
|= (1 << reg
);
12939 /* Also save the pic base register if necessary. */
12941 && !TARGET_SINGLE_PIC_BASE
12942 && arm_pic_register
!= INVALID_REGNUM
12943 && crtl
->uses_pic_offset_table
)
12944 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
12946 else if (IS_VOLATILE(func_type
))
12948 /* For noreturn functions we historically omitted register saves
12949 altogether. However this really messes up debugging. As a
12950 compromise save just the frame pointers. Combined with the link
12951 register saved elsewhere this should be sufficient to get
12953 if (frame_pointer_needed
)
12954 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
12955 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
12956 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
12957 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
12958 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
12962 /* In the normal case we only need to save those registers
12963 which are call saved and which are used by this function. */
12964 for (reg
= 0; reg
<= 11; reg
++)
12965 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
12966 save_reg_mask
|= (1 << reg
);
12968 /* Handle the frame pointer as a special case. */
12969 if (frame_pointer_needed
)
12970 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
12972 /* If we aren't loading the PIC register,
12973 don't stack it even though it may be live. */
12975 && !TARGET_SINGLE_PIC_BASE
12976 && arm_pic_register
!= INVALID_REGNUM
12977 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
12978 || crtl
->uses_pic_offset_table
))
12979 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
12981 /* The prologue will copy SP into R0, so save it. */
12982 if (IS_STACKALIGN (func_type
))
12983 save_reg_mask
|= 1;
12986 /* Save registers so the exception handler can modify them. */
12987 if (crtl
->calls_eh_return
)
12993 reg
= EH_RETURN_DATA_REGNO (i
);
12994 if (reg
== INVALID_REGNUM
)
12996 save_reg_mask
|= 1 << reg
;
13000 return save_reg_mask
;
13004 /* Compute the number of bytes used to store the static chain register on the
13005 stack, above the stack frame. We need to know this accurately to get the
13006 alignment of the rest of the stack frame correct. */
13008 static int arm_compute_static_chain_stack_bytes (void)
13010 unsigned long func_type
= arm_current_func_type ();
13011 int static_chain_stack_bytes
= 0;
13013 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
13014 IS_NESTED (func_type
) &&
13015 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
13016 static_chain_stack_bytes
= 4;
13018 return static_chain_stack_bytes
;
13022 /* Compute a bit mask of which registers need to be
13023 saved on the stack for the current function.
13024 This is used by arm_get_frame_offsets, which may add extra registers. */
13026 static unsigned long
13027 arm_compute_save_reg_mask (void)
13029 unsigned int save_reg_mask
= 0;
13030 unsigned long func_type
= arm_current_func_type ();
13033 if (IS_NAKED (func_type
))
13034 /* This should never really happen. */
13037 /* If we are creating a stack frame, then we must save the frame pointer,
13038 IP (which will hold the old stack pointer), LR and the PC. */
13039 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
13041 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
13044 | (1 << PC_REGNUM
);
13046 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
13048 /* Decide if we need to save the link register.
13049 Interrupt routines have their own banked link register,
13050 so they never need to save it.
13051 Otherwise if we do not use the link register we do not need to save
13052 it. If we are pushing other registers onto the stack however, we
13053 can save an instruction in the epilogue by pushing the link register
13054 now and then popping it back into the PC. This incurs extra memory
13055 accesses though, so we only do it when optimizing for size, and only
13056 if we know that we will not need a fancy return sequence. */
13057 if (df_regs_ever_live_p (LR_REGNUM
)
13060 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
13061 && !crtl
->calls_eh_return
))
13062 save_reg_mask
|= 1 << LR_REGNUM
;
13064 if (cfun
->machine
->lr_save_eliminated
)
13065 save_reg_mask
&= ~ (1 << LR_REGNUM
);
13067 if (TARGET_REALLY_IWMMXT
13068 && ((bit_count (save_reg_mask
)
13069 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
13070 arm_compute_static_chain_stack_bytes())
13073 /* The total number of registers that are going to be pushed
13074 onto the stack is odd. We need to ensure that the stack
13075 is 64-bit aligned before we start to save iWMMXt registers,
13076 and also before we start to create locals. (A local variable
13077 might be a double or long long which we will load/store using
13078 an iWMMXt instruction). Therefore we need to push another
13079 ARM register, so that the stack will be 64-bit aligned. We
13080 try to avoid using the arg registers (r0 -r3) as they might be
13081 used to pass values in a tail call. */
13082 for (reg
= 4; reg
<= 12; reg
++)
13083 if ((save_reg_mask
& (1 << reg
)) == 0)
13087 save_reg_mask
|= (1 << reg
);
13090 cfun
->machine
->sibcall_blocked
= 1;
13091 save_reg_mask
|= (1 << 3);
13095 /* We may need to push an additional register for use initializing the
13096 PIC base register. */
13097 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
13098 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
13100 reg
= thumb_find_work_register (1 << 4);
13101 if (!call_used_regs
[reg
])
13102 save_reg_mask
|= (1 << reg
);
13105 return save_reg_mask
;
13109 /* Compute a bit mask of which registers need to be
13110 saved on the stack for the current function. */
13111 static unsigned long
13112 thumb1_compute_save_reg_mask (void)
13114 unsigned long mask
;
13118 for (reg
= 0; reg
< 12; reg
++)
13119 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13123 && !TARGET_SINGLE_PIC_BASE
13124 && arm_pic_register
!= INVALID_REGNUM
13125 && crtl
->uses_pic_offset_table
)
13126 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13128 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13129 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
13130 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
13132 /* LR will also be pushed if any lo regs are pushed. */
13133 if (mask
& 0xff || thumb_force_lr_save ())
13134 mask
|= (1 << LR_REGNUM
);
13136 /* Make sure we have a low work register if we need one.
13137 We will need one if we are going to push a high register,
13138 but we are not currently intending to push a low register. */
13139 if ((mask
& 0xff) == 0
13140 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
13142 /* Use thumb_find_work_register to choose which register
13143 we will use. If the register is live then we will
13144 have to push it. Use LAST_LO_REGNUM as our fallback
13145 choice for the register to select. */
13146 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
13147 /* Make sure the register returned by thumb_find_work_register is
13148 not part of the return value. */
13149 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
13150 reg
= LAST_LO_REGNUM
;
13152 if (! call_used_regs
[reg
])
13156 /* The 504 below is 8 bytes less than 512 because there are two possible
13157 alignment words. We can't tell here if they will be present or not so we
13158 have to play it safe and assume that they are. */
13159 if ((CALLER_INTERWORKING_SLOT_SIZE
+
13160 ROUND_UP_WORD (get_frame_size ()) +
13161 crtl
->outgoing_args_size
) >= 504)
13163 /* This is the same as the code in thumb1_expand_prologue() which
13164 determines which register to use for stack decrement. */
13165 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
13166 if (mask
& (1 << reg
))
13169 if (reg
> LAST_LO_REGNUM
)
13171 /* Make sure we have a register available for stack decrement. */
13172 mask
|= 1 << LAST_LO_REGNUM
;
13180 /* Return the number of bytes required to save VFP registers. */
13182 arm_get_vfp_saved_size (void)
13184 unsigned int regno
;
13189 /* Space for saved VFP registers. */
13190 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
13193 for (regno
= FIRST_VFP_REGNUM
;
13194 regno
< LAST_VFP_REGNUM
;
13197 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
13198 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
13202 /* Workaround ARM10 VFPr1 bug. */
13203 if (count
== 2 && !arm_arch6
)
13205 saved
+= count
* 8;
13214 if (count
== 2 && !arm_arch6
)
13216 saved
+= count
* 8;
13223 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13224 everything bar the final return instruction. */
13226 output_return_instruction (rtx operand
, int really_return
, int reverse
)
13228 char conditional
[10];
13231 unsigned long live_regs_mask
;
13232 unsigned long func_type
;
13233 arm_stack_offsets
*offsets
;
13235 func_type
= arm_current_func_type ();
13237 if (IS_NAKED (func_type
))
13240 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
13242 /* If this function was declared non-returning, and we have
13243 found a tail call, then we have to trust that the called
13244 function won't return. */
13249 /* Otherwise, trap an attempted return by aborting. */
13251 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
13253 assemble_external_libcall (ops
[1]);
13254 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
13260 gcc_assert (!cfun
->calls_alloca
|| really_return
);
13262 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
13264 cfun
->machine
->return_used_this_function
= 1;
13266 offsets
= arm_get_frame_offsets ();
13267 live_regs_mask
= offsets
->saved_regs_mask
;
13269 if (live_regs_mask
)
13271 const char * return_reg
;
13273 /* If we do not have any special requirements for function exit
13274 (e.g. interworking) then we can load the return address
13275 directly into the PC. Otherwise we must load it into LR. */
13277 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
13278 return_reg
= reg_names
[PC_REGNUM
];
13280 return_reg
= reg_names
[LR_REGNUM
];
13282 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
13284 /* There are three possible reasons for the IP register
13285 being saved. 1) a stack frame was created, in which case
13286 IP contains the old stack pointer, or 2) an ISR routine
13287 corrupted it, or 3) it was saved to align the stack on
13288 iWMMXt. In case 1, restore IP into SP, otherwise just
13290 if (frame_pointer_needed
)
13292 live_regs_mask
&= ~ (1 << IP_REGNUM
);
13293 live_regs_mask
|= (1 << SP_REGNUM
);
13296 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
13299 /* On some ARM architectures it is faster to use LDR rather than
13300 LDM to load a single register. On other architectures, the
13301 cost is the same. In 26 bit mode, or for exception handlers,
13302 we have to use LDM to load the PC so that the CPSR is also
13304 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
13305 if (live_regs_mask
== (1U << reg
))
13308 if (reg
<= LAST_ARM_REGNUM
13309 && (reg
!= LR_REGNUM
13311 || ! IS_INTERRUPT (func_type
)))
13313 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
13314 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
13321 /* Generate the load multiple instruction to restore the
13322 registers. Note we can get here, even if
13323 frame_pointer_needed is true, but only if sp already
13324 points to the base of the saved core registers. */
13325 if (live_regs_mask
& (1 << SP_REGNUM
))
13327 unsigned HOST_WIDE_INT stack_adjust
;
13329 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
13330 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
13332 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
13333 if (TARGET_UNIFIED_ASM
)
13334 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
13336 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
13339 /* If we can't use ldmib (SA110 bug),
13340 then try to pop r3 instead. */
13342 live_regs_mask
|= 1 << 3;
13344 if (TARGET_UNIFIED_ASM
)
13345 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
13347 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
13351 if (TARGET_UNIFIED_ASM
)
13352 sprintf (instr
, "pop%s\t{", conditional
);
13354 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
13356 p
= instr
+ strlen (instr
);
13358 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
13359 if (live_regs_mask
& (1 << reg
))
13361 int l
= strlen (reg_names
[reg
]);
13367 memcpy (p
, ", ", 2);
13371 memcpy (p
, "%|", 2);
13372 memcpy (p
+ 2, reg_names
[reg
], l
);
13376 if (live_regs_mask
& (1 << LR_REGNUM
))
13378 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
13379 /* If returning from an interrupt, restore the CPSR. */
13380 if (IS_INTERRUPT (func_type
))
13387 output_asm_insn (instr
, & operand
);
13389 /* See if we need to generate an extra instruction to
13390 perform the actual function return. */
13392 && func_type
!= ARM_FT_INTERWORKED
13393 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
13395 /* The return has already been handled
13396 by loading the LR into the PC. */
13403 switch ((int) ARM_FUNC_TYPE (func_type
))
13407 /* ??? This is wrong for unified assembly syntax. */
13408 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
13411 case ARM_FT_INTERWORKED
:
13412 sprintf (instr
, "bx%s\t%%|lr", conditional
);
13415 case ARM_FT_EXCEPTION
:
13416 /* ??? This is wrong for unified assembly syntax. */
13417 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
13421 /* Use bx if it's available. */
13422 if (arm_arch5
|| arm_arch4t
)
13423 sprintf (instr
, "bx%s\t%%|lr", conditional
);
13425 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
13429 output_asm_insn (instr
, & operand
);
13435 /* Write the function name into the code section, directly preceding
13436 the function prologue.
13438 Code will be output similar to this:
13440 .ascii "arm_poke_function_name", 0
13443 .word 0xff000000 + (t1 - t0)
13444 arm_poke_function_name
13446 stmfd sp!, {fp, ip, lr, pc}
13449 When performing a stack backtrace, code can inspect the value
13450 of 'pc' stored at 'fp' + 0. If the trace function then looks
13451 at location pc - 12 and the top 8 bits are set, then we know
13452 that there is a function name embedded immediately preceding this
13453 location and has length ((pc[-3]) & 0xff000000).
13455 We assume that pc is declared as a pointer to an unsigned long.
13457 It is of no benefit to output the function name if we are assembling
13458 a leaf function. These function types will not contain a stack
13459 backtrace structure, therefore it is not possible to determine the
13462 arm_poke_function_name (FILE *stream
, const char *name
)
13464 unsigned long alignlength
;
13465 unsigned long length
;
13468 length
= strlen (name
) + 1;
13469 alignlength
= ROUND_UP_WORD (length
);
13471 ASM_OUTPUT_ASCII (stream
, name
, length
);
13472 ASM_OUTPUT_ALIGN (stream
, 2);
13473 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
13474 assemble_aligned_integer (UNITS_PER_WORD
, x
);
13477 /* Place some comments into the assembler stream
13478 describing the current function. */
13480 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
13482 unsigned long func_type
;
13486 thumb1_output_function_prologue (f
, frame_size
);
13490 /* Sanity check. */
13491 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
13493 func_type
= arm_current_func_type ();
13495 switch ((int) ARM_FUNC_TYPE (func_type
))
13498 case ARM_FT_NORMAL
:
13500 case ARM_FT_INTERWORKED
:
13501 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
13504 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
13507 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
13509 case ARM_FT_EXCEPTION
:
13510 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
13514 if (IS_NAKED (func_type
))
13515 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13517 if (IS_VOLATILE (func_type
))
13518 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
13520 if (IS_NESTED (func_type
))
13521 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
13522 if (IS_STACKALIGN (func_type
))
13523 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13525 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13527 crtl
->args
.pretend_args_size
, frame_size
);
13529 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13530 frame_pointer_needed
,
13531 cfun
->machine
->uses_anonymous_args
);
13533 if (cfun
->machine
->lr_save_eliminated
)
13534 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
13536 if (crtl
->calls_eh_return
)
13537 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
13542 arm_output_epilogue (rtx sibling
)
13545 unsigned long saved_regs_mask
;
13546 unsigned long func_type
;
13547 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13548 frame that is $fp + 4 for a non-variadic function. */
13549 int floats_offset
= 0;
13551 FILE * f
= asm_out_file
;
13552 unsigned int lrm_count
= 0;
13553 int really_return
= (sibling
== NULL
);
13555 arm_stack_offsets
*offsets
;
13557 /* If we have already generated the return instruction
13558 then it is futile to generate anything else. */
13559 if (use_return_insn (FALSE
, sibling
) &&
13560 (cfun
->machine
->return_used_this_function
!= 0))
13563 func_type
= arm_current_func_type ();
13565 if (IS_NAKED (func_type
))
13566 /* Naked functions don't have epilogues. */
13569 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
13573 /* A volatile function should never return. Call abort. */
13574 op
= gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)" : "abort");
13575 assemble_external_libcall (op
);
13576 output_asm_insn ("bl\t%a0", &op
);
13581 /* If we are throwing an exception, then we really must be doing a
13582 return, so we can't tail-call. */
13583 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
13585 offsets
= arm_get_frame_offsets ();
13586 saved_regs_mask
= offsets
->saved_regs_mask
;
13589 lrm_count
= bit_count (saved_regs_mask
);
13591 floats_offset
= offsets
->saved_args
;
13592 /* Compute how far away the floats will be. */
13593 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
13594 if (saved_regs_mask
& (1 << reg
))
13595 floats_offset
+= 4;
13597 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
13599 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13600 int vfp_offset
= offsets
->frame
;
13602 if (TARGET_FPA_EMU2
)
13604 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
13605 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13607 floats_offset
+= 12;
13608 asm_fprintf (f
, "\tldfe\t%r, [%r, #-%d]\n",
13609 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
13614 start_reg
= LAST_FPA_REGNUM
;
13616 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
13618 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13620 floats_offset
+= 12;
13622 /* We can't unstack more than four registers at once. */
13623 if (start_reg
- reg
== 3)
13625 asm_fprintf (f
, "\tlfm\t%r, 4, [%r, #-%d]\n",
13626 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
13627 start_reg
= reg
- 1;
13632 if (reg
!= start_reg
)
13633 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
13634 reg
+ 1, start_reg
- reg
,
13635 FP_REGNUM
, floats_offset
- vfp_offset
);
13636 start_reg
= reg
- 1;
13640 /* Just in case the last register checked also needs unstacking. */
13641 if (reg
!= start_reg
)
13642 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
13643 reg
+ 1, start_reg
- reg
,
13644 FP_REGNUM
, floats_offset
- vfp_offset
);
13647 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
13651 /* The fldmd insns do not have base+offset addressing
13652 modes, so we use IP to hold the address. */
13653 saved_size
= arm_get_vfp_saved_size ();
13655 if (saved_size
> 0)
13657 floats_offset
+= saved_size
;
13658 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", IP_REGNUM
,
13659 FP_REGNUM
, floats_offset
- vfp_offset
);
13661 start_reg
= FIRST_VFP_REGNUM
;
13662 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
13664 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
13665 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
13667 if (start_reg
!= reg
)
13668 vfp_output_fldmd (f
, IP_REGNUM
,
13669 (start_reg
- FIRST_VFP_REGNUM
) / 2,
13670 (reg
- start_reg
) / 2);
13671 start_reg
= reg
+ 2;
13674 if (start_reg
!= reg
)
13675 vfp_output_fldmd (f
, IP_REGNUM
,
13676 (start_reg
- FIRST_VFP_REGNUM
) / 2,
13677 (reg
- start_reg
) / 2);
13682 /* The frame pointer is guaranteed to be non-double-word aligned.
13683 This is because it is set to (old_stack_pointer - 4) and the
13684 old_stack_pointer was double word aligned. Thus the offset to
13685 the iWMMXt registers to be loaded must also be non-double-word
13686 sized, so that the resultant address *is* double-word aligned.
13687 We can ignore floats_offset since that was already included in
13688 the live_regs_mask. */
13689 lrm_count
+= (lrm_count
% 2 ? 2 : 1);
13691 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
13692 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13694 asm_fprintf (f
, "\twldrd\t%r, [%r, #-%d]\n",
13695 reg
, FP_REGNUM
, lrm_count
* 4);
13700 /* saved_regs_mask should contain the IP, which at the time of stack
13701 frame generation actually contains the old stack pointer. So a
13702 quick way to unwind the stack is just pop the IP register directly
13703 into the stack pointer. */
13704 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
13705 saved_regs_mask
&= ~ (1 << IP_REGNUM
);
13706 saved_regs_mask
|= (1 << SP_REGNUM
);
13708 /* There are two registers left in saved_regs_mask - LR and PC. We
13709 only need to restore the LR register (the return address), but to
13710 save time we can load it directly into the PC, unless we need a
13711 special function exit sequence, or we are not really returning. */
13713 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
13714 && !crtl
->calls_eh_return
)
13715 /* Delete the LR from the register mask, so that the LR on
13716 the stack is loaded into the PC in the register mask. */
13717 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
13719 saved_regs_mask
&= ~ (1 << PC_REGNUM
);
13721 /* We must use SP as the base register, because SP is one of the
13722 registers being restored. If an interrupt or page fault
13723 happens in the ldm instruction, the SP might or might not
13724 have been restored. That would be bad, as then SP will no
13725 longer indicate the safe area of stack, and we can get stack
13726 corruption. Using SP as the base register means that it will
13727 be reset correctly to the original value, should an interrupt
13728 occur. If the stack pointer already points at the right
13729 place, then omit the subtraction. */
13730 if (offsets
->outgoing_args
!= (1 + (int) bit_count (saved_regs_mask
))
13731 || cfun
->calls_alloca
)
13732 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", SP_REGNUM
, FP_REGNUM
,
13733 4 * bit_count (saved_regs_mask
));
13734 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
, 0);
13736 if (IS_INTERRUPT (func_type
))
13737 /* Interrupt handlers will have pushed the
13738 IP onto the stack, so restore it now. */
13739 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, 1 << IP_REGNUM
, 0);
13743 /* This branch is executed for ARM mode (non-apcs frames) and
13744 Thumb-2 mode. Frame layout is essentially the same for those
13745 cases, except that in ARM mode frame pointer points to the
13746 first saved register, while in Thumb-2 mode the frame pointer points
13747 to the last saved register.
13749 It is possible to make frame pointer point to last saved
13750 register in both cases, and remove some conditionals below.
13751 That means that fp setup in prologue would be just "mov fp, sp"
13752 and sp restore in epilogue would be just "mov sp, fp", whereas
13753 now we have to use add/sub in those cases. However, the value
13754 of that would be marginal, as both mov and add/sub are 32-bit
13755 in ARM mode, and it would require extra conditionals
13756 in arm_expand_prologue to distingish ARM-apcs-frame case
13757 (where frame pointer is required to point at first register)
13758 and ARM-non-apcs-frame. Therefore, such change is postponed
13759 until real need arise. */
13760 unsigned HOST_WIDE_INT amount
;
13762 /* Restore stack pointer if necessary. */
13763 if (TARGET_ARM
&& frame_pointer_needed
)
13765 operands
[0] = stack_pointer_rtx
;
13766 operands
[1] = hard_frame_pointer_rtx
;
13768 operands
[2] = GEN_INT (offsets
->frame
- offsets
->saved_regs
);
13769 output_add_immediate (operands
);
13773 if (frame_pointer_needed
)
13775 /* For Thumb-2 restore sp from the frame pointer.
13776 Operand restrictions mean we have to incrememnt FP, then copy
13778 amount
= offsets
->locals_base
- offsets
->saved_regs
;
13779 operands
[0] = hard_frame_pointer_rtx
;
13783 unsigned long count
;
13784 operands
[0] = stack_pointer_rtx
;
13785 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
13786 /* pop call clobbered registers if it avoids a
13787 separate stack adjustment. */
13788 count
= offsets
->saved_regs
- offsets
->saved_args
;
13791 && !crtl
->calls_eh_return
13792 && bit_count(saved_regs_mask
) * 4 == count
13793 && !IS_INTERRUPT (func_type
)
13794 && !crtl
->tail_call_emit
)
13796 unsigned long mask
;
13797 mask
= (1 << (arm_size_return_regs() / 4)) - 1;
13799 mask
&= ~saved_regs_mask
;
13801 while (bit_count (mask
) * 4 > amount
)
13803 while ((mask
& (1 << reg
)) == 0)
13805 mask
&= ~(1 << reg
);
13807 if (bit_count (mask
) * 4 == amount
) {
13809 saved_regs_mask
|= mask
;
13816 operands
[1] = operands
[0];
13817 operands
[2] = GEN_INT (amount
);
13818 output_add_immediate (operands
);
13820 if (frame_pointer_needed
)
13821 asm_fprintf (f
, "\tmov\t%r, %r\n",
13822 SP_REGNUM
, HARD_FRAME_POINTER_REGNUM
);
13825 if (TARGET_FPA_EMU2
)
13827 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
13828 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13829 asm_fprintf (f
, "\tldfe\t%r, [%r], #12\n",
13834 start_reg
= FIRST_FPA_REGNUM
;
13836 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
13838 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13840 if (reg
- start_reg
== 3)
13842 asm_fprintf (f
, "\tlfmfd\t%r, 4, [%r]!\n",
13843 start_reg
, SP_REGNUM
);
13844 start_reg
= reg
+ 1;
13849 if (reg
!= start_reg
)
13850 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
13851 start_reg
, reg
- start_reg
,
13854 start_reg
= reg
+ 1;
13858 /* Just in case the last register checked also needs unstacking. */
13859 if (reg
!= start_reg
)
13860 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
13861 start_reg
, reg
- start_reg
, SP_REGNUM
);
13864 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
13866 int end_reg
= LAST_VFP_REGNUM
+ 1;
13868 /* Scan the registers in reverse order. We need to match
13869 any groupings made in the prologue and generate matching
13871 for (reg
= LAST_VFP_REGNUM
- 1; reg
>= FIRST_VFP_REGNUM
; reg
-= 2)
13873 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
13874 && (!df_regs_ever_live_p (reg
+ 1)
13875 || call_used_regs
[reg
+ 1]))
13877 if (end_reg
> reg
+ 2)
13878 vfp_output_fldmd (f
, SP_REGNUM
,
13879 (reg
+ 2 - FIRST_VFP_REGNUM
) / 2,
13880 (end_reg
- (reg
+ 2)) / 2);
13884 if (end_reg
> reg
+ 2)
13885 vfp_output_fldmd (f
, SP_REGNUM
, 0,
13886 (end_reg
- (reg
+ 2)) / 2);
13890 for (reg
= FIRST_IWMMXT_REGNUM
; reg
<= LAST_IWMMXT_REGNUM
; reg
++)
13891 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13892 asm_fprintf (f
, "\twldrd\t%r, [%r], #8\n", reg
, SP_REGNUM
);
13894 /* If we can, restore the LR into the PC. */
13895 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
13896 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
13897 && !IS_STACKALIGN (func_type
)
13899 && crtl
->args
.pretend_args_size
== 0
13900 && saved_regs_mask
& (1 << LR_REGNUM
)
13901 && !crtl
->calls_eh_return
)
13903 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
13904 saved_regs_mask
|= (1 << PC_REGNUM
);
13905 rfe
= IS_INTERRUPT (func_type
);
13910 /* Load the registers off the stack. If we only have one register
13911 to load use the LDR instruction - it is faster. For Thumb-2
13912 always use pop and the assembler will pick the best instruction.*/
13913 if (TARGET_ARM
&& saved_regs_mask
== (1 << LR_REGNUM
)
13914 && !IS_INTERRUPT(func_type
))
13916 asm_fprintf (f
, "\tldr\t%r, [%r], #4\n", LR_REGNUM
, SP_REGNUM
);
13918 else if (saved_regs_mask
)
13920 if (saved_regs_mask
& (1 << SP_REGNUM
))
13921 /* Note - write back to the stack register is not enabled
13922 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13923 in the list of registers and if we add writeback the
13924 instruction becomes UNPREDICTABLE. */
13925 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
,
13927 else if (TARGET_ARM
)
13928 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, saved_regs_mask
,
13931 print_multi_reg (f
, "pop\t", SP_REGNUM
, saved_regs_mask
, 0);
13934 if (crtl
->args
.pretend_args_size
)
13936 /* Unwind the pre-pushed regs. */
13937 operands
[0] = operands
[1] = stack_pointer_rtx
;
13938 operands
[2] = GEN_INT (crtl
->args
.pretend_args_size
);
13939 output_add_immediate (operands
);
13943 /* We may have already restored PC directly from the stack. */
13944 if (!really_return
|| saved_regs_mask
& (1 << PC_REGNUM
))
13947 /* Stack adjustment for exception handler. */
13948 if (crtl
->calls_eh_return
)
13949 asm_fprintf (f
, "\tadd\t%r, %r, %r\n", SP_REGNUM
, SP_REGNUM
,
13950 ARM_EH_STACKADJ_REGNUM
);
13952 /* Generate the return instruction. */
13953 switch ((int) ARM_FUNC_TYPE (func_type
))
13957 asm_fprintf (f
, "\tsubs\t%r, %r, #4\n", PC_REGNUM
, LR_REGNUM
);
13960 case ARM_FT_EXCEPTION
:
13961 asm_fprintf (f
, "\tmovs\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
13964 case ARM_FT_INTERWORKED
:
13965 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
13969 if (IS_STACKALIGN (func_type
))
13971 /* See comment in arm_expand_prologue. */
13972 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, 0);
13974 if (arm_arch5
|| arm_arch4t
)
13975 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
13977 asm_fprintf (f
, "\tmov\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
13985 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
13986 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
13988 arm_stack_offsets
*offsets
;
13994 /* Emit any call-via-reg trampolines that are needed for v4t support
13995 of call_reg and call_value_reg type insns. */
13996 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
13998 rtx label
= cfun
->machine
->call_via
[regno
];
14002 switch_to_section (function_section (current_function_decl
));
14003 targetm
.asm_out
.internal_label (asm_out_file
, "L",
14004 CODE_LABEL_NUMBER (label
));
14005 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
14009 /* ??? Probably not safe to set this here, since it assumes that a
14010 function will be emitted as assembly immediately after we generate
14011 RTL for it. This does not happen for inline functions. */
14012 cfun
->machine
->return_used_this_function
= 0;
14014 else /* TARGET_32BIT */
14016 /* We need to take into account any stack-frame rounding. */
14017 offsets
= arm_get_frame_offsets ();
14019 gcc_assert (!use_return_insn (FALSE
, NULL
)
14020 || (cfun
->machine
->return_used_this_function
!= 0)
14021 || offsets
->saved_regs
== offsets
->outgoing_args
14022 || frame_pointer_needed
);
14024 /* Reset the ARM-specific per-function variables. */
14025 after_arm_reorg
= 0;
14029 /* Generate and emit an insn that we will recognize as a push_multi.
14030 Unfortunately, since this insn does not reflect very well the actual
14031 semantics of the operation, we need to annotate the insn for the benefit
14032 of DWARF2 frame unwind information. */
14034 emit_multi_reg_push (unsigned long mask
)
14037 int num_dwarf_regs
;
14041 int dwarf_par_index
;
14044 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
14045 if (mask
& (1 << i
))
14048 gcc_assert (num_regs
&& num_regs
<= 16);
14050 /* We don't record the PC in the dwarf frame information. */
14051 num_dwarf_regs
= num_regs
;
14052 if (mask
& (1 << PC_REGNUM
))
14055 /* For the body of the insn we are going to generate an UNSPEC in
14056 parallel with several USEs. This allows the insn to be recognized
14057 by the push_multi pattern in the arm.md file.
14059 The body of the insn looks something like this:
14062 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14063 (const_int:SI <num>)))
14064 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14070 For the frame note however, we try to be more explicit and actually
14071 show each register being stored into the stack frame, plus a (single)
14072 decrement of the stack pointer. We do it this way in order to be
14073 friendly to the stack unwinding code, which only wants to see a single
14074 stack decrement per instruction. The RTL we generate for the note looks
14075 something like this:
14078 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14079 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14080 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14081 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14085 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14086 instead we'd have a parallel expression detailing all
14087 the stores to the various memory addresses so that debug
14088 information is more up-to-date. Remember however while writing
14089 this to take care of the constraints with the push instruction.
14091 Note also that this has to be taken care of for the VFP registers.
14093 For more see PR43399. */
14095 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
14096 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
14097 dwarf_par_index
= 1;
14099 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
14101 if (mask
& (1 << i
))
14103 reg
= gen_rtx_REG (SImode
, i
);
14105 XVECEXP (par
, 0, 0)
14106 = gen_rtx_SET (VOIDmode
,
14109 gen_rtx_PRE_MODIFY (Pmode
,
14112 (stack_pointer_rtx
,
14115 gen_rtx_UNSPEC (BLKmode
,
14116 gen_rtvec (1, reg
),
14117 UNSPEC_PUSH_MULT
));
14119 if (i
!= PC_REGNUM
)
14121 tmp
= gen_rtx_SET (VOIDmode
,
14122 gen_frame_mem (SImode
, stack_pointer_rtx
),
14124 RTX_FRAME_RELATED_P (tmp
) = 1;
14125 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
14133 for (j
= 1, i
++; j
< num_regs
; i
++)
14135 if (mask
& (1 << i
))
14137 reg
= gen_rtx_REG (SImode
, i
);
14139 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
14141 if (i
!= PC_REGNUM
)
14144 = gen_rtx_SET (VOIDmode
,
14147 plus_constant (stack_pointer_rtx
,
14150 RTX_FRAME_RELATED_P (tmp
) = 1;
14151 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
14158 par
= emit_insn (par
);
14160 tmp
= gen_rtx_SET (VOIDmode
,
14162 plus_constant (stack_pointer_rtx
, -4 * num_regs
));
14163 RTX_FRAME_RELATED_P (tmp
) = 1;
14164 XVECEXP (dwarf
, 0, 0) = tmp
;
14166 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
14171 /* Calculate the size of the return value that is passed in registers. */
14173 arm_size_return_regs (void)
14175 enum machine_mode mode
;
14177 if (crtl
->return_rtx
!= 0)
14178 mode
= GET_MODE (crtl
->return_rtx
);
14180 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
14182 return GET_MODE_SIZE (mode
);
14186 emit_sfm (int base_reg
, int count
)
14193 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
14194 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
14196 reg
= gen_rtx_REG (XFmode
, base_reg
++);
14198 XVECEXP (par
, 0, 0)
14199 = gen_rtx_SET (VOIDmode
,
14202 gen_rtx_PRE_MODIFY (Pmode
,
14205 (stack_pointer_rtx
,
14208 gen_rtx_UNSPEC (BLKmode
,
14209 gen_rtvec (1, reg
),
14210 UNSPEC_PUSH_MULT
));
14211 tmp
= gen_rtx_SET (VOIDmode
,
14212 gen_frame_mem (XFmode
, stack_pointer_rtx
), reg
);
14213 RTX_FRAME_RELATED_P (tmp
) = 1;
14214 XVECEXP (dwarf
, 0, 1) = tmp
;
14216 for (i
= 1; i
< count
; i
++)
14218 reg
= gen_rtx_REG (XFmode
, base_reg
++);
14219 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
14221 tmp
= gen_rtx_SET (VOIDmode
,
14222 gen_frame_mem (XFmode
,
14223 plus_constant (stack_pointer_rtx
,
14226 RTX_FRAME_RELATED_P (tmp
) = 1;
14227 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
14230 tmp
= gen_rtx_SET (VOIDmode
,
14232 plus_constant (stack_pointer_rtx
, -12 * count
));
14234 RTX_FRAME_RELATED_P (tmp
) = 1;
14235 XVECEXP (dwarf
, 0, 0) = tmp
;
14237 par
= emit_insn (par
);
14238 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
14244 /* Return true if the current function needs to save/restore LR. */
14247 thumb_force_lr_save (void)
14249 return !cfun
->machine
->lr_save_eliminated
14250 && (!leaf_function_p ()
14251 || thumb_far_jump_used_p ()
14252 || df_regs_ever_live_p (LR_REGNUM
));
14256 /* Compute the distance from register FROM to register TO.
14257 These can be the arg pointer (26), the soft frame pointer (25),
14258 the stack pointer (13) or the hard frame pointer (11).
14259 In thumb mode r7 is used as the soft frame pointer, if needed.
14260 Typical stack layout looks like this:
14262 old stack pointer -> | |
14265 | | saved arguments for
14266 | | vararg functions
14269 hard FP & arg pointer -> | | \
14277 soft frame pointer -> | | /
14282 locals base pointer -> | | /
14287 current stack pointer -> | | /
14290 For a given function some or all of these stack components
14291 may not be needed, giving rise to the possibility of
14292 eliminating some of the registers.
14294 The values returned by this function must reflect the behavior
14295 of arm_expand_prologue() and arm_compute_save_reg_mask().
14297 The sign of the number returned reflects the direction of stack
14298 growth, so the values are positive for all eliminations except
14299 from the soft frame pointer to the hard frame pointer.
14301 SFP may point just inside the local variables block to ensure correct
14305 /* Calculate stack offsets. These are used to calculate register elimination
14306 offsets and in prologue/epilogue code. Also calculates which registers
14307 should be saved. */
14309 static arm_stack_offsets
*
14310 arm_get_frame_offsets (void)
14312 struct arm_stack_offsets
*offsets
;
14313 unsigned long func_type
;
14317 HOST_WIDE_INT frame_size
;
14320 offsets
= &cfun
->machine
->stack_offsets
;
14322 /* We need to know if we are a leaf function. Unfortunately, it
14323 is possible to be called after start_sequence has been called,
14324 which causes get_insns to return the insns for the sequence,
14325 not the function, which will cause leaf_function_p to return
14326 the incorrect result.
14328 to know about leaf functions once reload has completed, and the
14329 frame size cannot be changed after that time, so we can safely
14330 use the cached value. */
14332 if (reload_completed
)
14335 /* Initially this is the size of the local variables. It will translated
14336 into an offset once we have determined the size of preceding data. */
14337 frame_size
= ROUND_UP_WORD (get_frame_size ());
14339 leaf
= leaf_function_p ();
14341 /* Space for variadic functions. */
14342 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
14344 /* In Thumb mode this is incorrect, but never used. */
14345 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
14346 arm_compute_static_chain_stack_bytes();
14350 unsigned int regno
;
14352 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
14353 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
14354 saved
= core_saved
;
14356 /* We know that SP will be doubleword aligned on entry, and we must
14357 preserve that condition at any subroutine call. We also require the
14358 soft frame pointer to be doubleword aligned. */
14360 if (TARGET_REALLY_IWMMXT
)
14362 /* Check for the call-saved iWMMXt registers. */
14363 for (regno
= FIRST_IWMMXT_REGNUM
;
14364 regno
<= LAST_IWMMXT_REGNUM
;
14366 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
14370 func_type
= arm_current_func_type ();
14371 if (! IS_VOLATILE (func_type
))
14373 /* Space for saved FPA registers. */
14374 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
14375 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
14378 /* Space for saved VFP registers. */
14379 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14380 saved
+= arm_get_vfp_saved_size ();
14383 else /* TARGET_THUMB1 */
14385 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
14386 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
14387 saved
= core_saved
;
14388 if (TARGET_BACKTRACE
)
14392 /* Saved registers include the stack frame. */
14393 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
14394 arm_compute_static_chain_stack_bytes();
14395 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
14396 /* A leaf function does not need any stack alignment if it has nothing
14398 if (leaf
&& frame_size
== 0)
14400 offsets
->outgoing_args
= offsets
->soft_frame
;
14401 offsets
->locals_base
= offsets
->soft_frame
;
14405 /* Ensure SFP has the correct alignment. */
14406 if (ARM_DOUBLEWORD_ALIGN
14407 && (offsets
->soft_frame
& 7))
14409 offsets
->soft_frame
+= 4;
14410 /* Try to align stack by pushing an extra reg. Don't bother doing this
14411 when there is a stack frame as the alignment will be rolled into
14412 the normal stack adjustment. */
14413 if (frame_size
+ crtl
->outgoing_args_size
== 0)
14417 /* If it is safe to use r3, then do so. This sometimes
14418 generates better code on Thumb-2 by avoiding the need to
14419 use 32-bit push/pop instructions. */
14420 if (!crtl
->tail_call_emit
14421 && arm_size_return_regs () <= 12)
14426 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
14428 if ((offsets
->saved_regs_mask
& (1 << i
)) == 0)
14437 offsets
->saved_regs
+= 4;
14438 offsets
->saved_regs_mask
|= (1 << reg
);
14443 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
14444 offsets
->outgoing_args
= (offsets
->locals_base
14445 + crtl
->outgoing_args_size
);
14447 if (ARM_DOUBLEWORD_ALIGN
)
14449 /* Ensure SP remains doubleword aligned. */
14450 if (offsets
->outgoing_args
& 7)
14451 offsets
->outgoing_args
+= 4;
14452 gcc_assert (!(offsets
->outgoing_args
& 7));
14459 /* Calculate the relative offsets for the different stack pointers. Positive
14460 offsets are in the direction of stack growth. */
14463 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
14465 arm_stack_offsets
*offsets
;
14467 offsets
= arm_get_frame_offsets ();
14469 /* OK, now we have enough information to compute the distances.
14470 There must be an entry in these switch tables for each pair
14471 of registers in ELIMINABLE_REGS, even if some of the entries
14472 seem to be redundant or useless. */
14475 case ARG_POINTER_REGNUM
:
14478 case THUMB_HARD_FRAME_POINTER_REGNUM
:
14481 case FRAME_POINTER_REGNUM
:
14482 /* This is the reverse of the soft frame pointer
14483 to hard frame pointer elimination below. */
14484 return offsets
->soft_frame
- offsets
->saved_args
;
14486 case ARM_HARD_FRAME_POINTER_REGNUM
:
14487 /* This is only non-zero in the case where the static chain register
14488 is stored above the frame. */
14489 return offsets
->frame
- offsets
->saved_args
- 4;
14491 case STACK_POINTER_REGNUM
:
14492 /* If nothing has been pushed on the stack at all
14493 then this will return -4. This *is* correct! */
14494 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
14497 gcc_unreachable ();
14499 gcc_unreachable ();
14501 case FRAME_POINTER_REGNUM
:
14504 case THUMB_HARD_FRAME_POINTER_REGNUM
:
14507 case ARM_HARD_FRAME_POINTER_REGNUM
:
14508 /* The hard frame pointer points to the top entry in the
14509 stack frame. The soft frame pointer to the bottom entry
14510 in the stack frame. If there is no stack frame at all,
14511 then they are identical. */
14513 return offsets
->frame
- offsets
->soft_frame
;
14515 case STACK_POINTER_REGNUM
:
14516 return offsets
->outgoing_args
- offsets
->soft_frame
;
14519 gcc_unreachable ();
14521 gcc_unreachable ();
14524 /* You cannot eliminate from the stack pointer.
14525 In theory you could eliminate from the hard frame
14526 pointer to the stack pointer, but this will never
14527 happen, since if a stack frame is not needed the
14528 hard frame pointer will never be used. */
14529 gcc_unreachable ();
14533 /* Given FROM and TO register numbers, say whether this elimination is
14534 allowed. Frame pointer elimination is automatically handled.
14536 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14537 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14538 pointer, we must eliminate FRAME_POINTER_REGNUM into
14539 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14540 ARG_POINTER_REGNUM. */
14543 arm_can_eliminate (const int from
, const int to
)
14545 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
14546 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
14547 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
14548 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
14552 /* Emit RTL to save coprocessor registers on function entry. Returns the
14553 number of bytes pushed. */
14556 arm_save_coproc_regs(void)
14558 int saved_size
= 0;
14560 unsigned start_reg
;
14563 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
14564 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
14566 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
14567 insn
= gen_rtx_MEM (V2SImode
, insn
);
14568 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
14569 RTX_FRAME_RELATED_P (insn
) = 1;
14573 /* Save any floating point call-saved registers used by this
14575 if (TARGET_FPA_EMU2
)
14577 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14578 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14580 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
14581 insn
= gen_rtx_MEM (XFmode
, insn
);
14582 insn
= emit_set_insn (insn
, gen_rtx_REG (XFmode
, reg
));
14583 RTX_FRAME_RELATED_P (insn
) = 1;
14589 start_reg
= LAST_FPA_REGNUM
;
14591 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14593 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14595 if (start_reg
- reg
== 3)
14597 insn
= emit_sfm (reg
, 4);
14598 RTX_FRAME_RELATED_P (insn
) = 1;
14600 start_reg
= reg
- 1;
14605 if (start_reg
!= reg
)
14607 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
14608 RTX_FRAME_RELATED_P (insn
) = 1;
14609 saved_size
+= (start_reg
- reg
) * 12;
14611 start_reg
= reg
- 1;
14615 if (start_reg
!= reg
)
14617 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
14618 saved_size
+= (start_reg
- reg
) * 12;
14619 RTX_FRAME_RELATED_P (insn
) = 1;
14622 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14624 start_reg
= FIRST_VFP_REGNUM
;
14626 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
14628 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14629 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
14631 if (start_reg
!= reg
)
14632 saved_size
+= vfp_emit_fstmd (start_reg
,
14633 (reg
- start_reg
) / 2);
14634 start_reg
= reg
+ 2;
14637 if (start_reg
!= reg
)
14638 saved_size
+= vfp_emit_fstmd (start_reg
,
14639 (reg
- start_reg
) / 2);
14645 /* Set the Thumb frame pointer from the stack pointer. */
14648 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
14650 HOST_WIDE_INT amount
;
14653 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
14655 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
14656 stack_pointer_rtx
, GEN_INT (amount
)));
14659 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
14660 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14661 expects the first two operands to be the same. */
14664 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
14666 hard_frame_pointer_rtx
));
14670 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
14671 hard_frame_pointer_rtx
,
14672 stack_pointer_rtx
));
14674 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
14675 plus_constant (stack_pointer_rtx
, amount
));
14676 RTX_FRAME_RELATED_P (dwarf
) = 1;
14677 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
14680 RTX_FRAME_RELATED_P (insn
) = 1;
14683 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14686 arm_expand_prologue (void)
14691 unsigned long live_regs_mask
;
14692 unsigned long func_type
;
14694 int saved_pretend_args
= 0;
14695 int saved_regs
= 0;
14696 unsigned HOST_WIDE_INT args_to_push
;
14697 arm_stack_offsets
*offsets
;
14699 func_type
= arm_current_func_type ();
14701 /* Naked functions don't have prologues. */
14702 if (IS_NAKED (func_type
))
14705 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14706 args_to_push
= crtl
->args
.pretend_args_size
;
14708 /* Compute which register we will have to save onto the stack. */
14709 offsets
= arm_get_frame_offsets ();
14710 live_regs_mask
= offsets
->saved_regs_mask
;
14712 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
14714 if (IS_STACKALIGN (func_type
))
14719 /* Handle a word-aligned stack pointer. We generate the following:
14724 <save and restore r0 in normal prologue/epilogue>
14728 The unwinder doesn't need to know about the stack realignment.
14729 Just tell it we saved SP in r0. */
14730 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
14732 r0
= gen_rtx_REG (SImode
, 0);
14733 r1
= gen_rtx_REG (SImode
, 1);
14734 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14735 compiler won't choke. */
14736 dwarf
= gen_rtx_UNSPEC (SImode
, rtvec_alloc (0), UNSPEC_STACK_ALIGN
);
14737 dwarf
= gen_rtx_SET (VOIDmode
, r0
, dwarf
);
14738 insn
= gen_movsi (r0
, stack_pointer_rtx
);
14739 RTX_FRAME_RELATED_P (insn
) = 1;
14740 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
14742 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
14743 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
14746 /* For APCS frames, if IP register is clobbered
14747 when creating frame, save that register in a special
14749 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
14751 if (IS_INTERRUPT (func_type
))
14753 /* Interrupt functions must not corrupt any registers.
14754 Creating a frame pointer however, corrupts the IP
14755 register, so we must push it first. */
14756 insn
= emit_multi_reg_push (1 << IP_REGNUM
);
14758 /* Do not set RTX_FRAME_RELATED_P on this insn.
14759 The dwarf stack unwinding code only wants to see one
14760 stack decrement per function, and this is not it. If
14761 this instruction is labeled as being part of the frame
14762 creation sequence then dwarf2out_frame_debug_expr will
14763 die when it encounters the assignment of IP to FP
14764 later on, since the use of SP here establishes SP as
14765 the CFA register and not IP.
14767 Anyway this instruction is not really part of the stack
14768 frame creation although it is part of the prologue. */
14770 else if (IS_NESTED (func_type
))
14772 /* The Static chain register is the same as the IP register
14773 used as a scratch register during stack frame creation.
14774 To get around this need to find somewhere to store IP
14775 whilst the frame is being created. We try the following
14778 1. The last argument register.
14779 2. A slot on the stack above the frame. (This only
14780 works if the function is not a varargs function).
14781 3. Register r3, after pushing the argument registers
14784 Note - we only need to tell the dwarf2 backend about the SP
14785 adjustment in the second variant; the static chain register
14786 doesn't need to be unwound, as it doesn't contain a value
14787 inherited from the caller. */
14789 if (df_regs_ever_live_p (3) == false)
14790 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
14791 else if (args_to_push
== 0)
14795 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14798 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
14799 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
14802 /* Just tell the dwarf backend that we adjusted SP. */
14803 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14804 plus_constant (stack_pointer_rtx
,
14806 RTX_FRAME_RELATED_P (insn
) = 1;
14807 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
14811 /* Store the args on the stack. */
14812 if (cfun
->machine
->uses_anonymous_args
)
14813 insn
= emit_multi_reg_push
14814 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
14817 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
14818 GEN_INT (- args_to_push
)));
14820 RTX_FRAME_RELATED_P (insn
) = 1;
14822 saved_pretend_args
= 1;
14823 fp_offset
= args_to_push
;
14826 /* Now reuse r3 to preserve IP. */
14827 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
14831 insn
= emit_set_insn (ip_rtx
,
14832 plus_constant (stack_pointer_rtx
, fp_offset
));
14833 RTX_FRAME_RELATED_P (insn
) = 1;
14838 /* Push the argument registers, or reserve space for them. */
14839 if (cfun
->machine
->uses_anonymous_args
)
14840 insn
= emit_multi_reg_push
14841 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
14844 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
14845 GEN_INT (- args_to_push
)));
14846 RTX_FRAME_RELATED_P (insn
) = 1;
14849 /* If this is an interrupt service routine, and the link register
14850 is going to be pushed, and we're not generating extra
14851 push of IP (needed when frame is needed and frame layout if apcs),
14852 subtracting four from LR now will mean that the function return
14853 can be done with a single instruction. */
14854 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
14855 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
14856 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
14859 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
14861 emit_set_insn (lr
, plus_constant (lr
, -4));
14864 if (live_regs_mask
)
14866 saved_regs
+= bit_count (live_regs_mask
) * 4;
14867 if (optimize_size
&& !frame_pointer_needed
14868 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
14870 /* If no coprocessor registers are being pushed and we don't have
14871 to worry about a frame pointer then push extra registers to
14872 create the stack frame. This is done is a way that does not
14873 alter the frame layout, so is independent of the epilogue. */
14877 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
14879 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
14880 if (frame
&& n
* 4 >= frame
)
14883 live_regs_mask
|= (1 << n
) - 1;
14884 saved_regs
+= frame
;
14887 insn
= emit_multi_reg_push (live_regs_mask
);
14888 RTX_FRAME_RELATED_P (insn
) = 1;
14891 if (! IS_VOLATILE (func_type
))
14892 saved_regs
+= arm_save_coproc_regs ();
14894 if (frame_pointer_needed
&& TARGET_ARM
)
14896 /* Create the new frame pointer. */
14897 if (TARGET_APCS_FRAME
)
14899 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
14900 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
14901 RTX_FRAME_RELATED_P (insn
) = 1;
14903 if (IS_NESTED (func_type
))
14905 /* Recover the static chain register. */
14906 if (!df_regs_ever_live_p (3)
14907 || saved_pretend_args
)
14908 insn
= gen_rtx_REG (SImode
, 3);
14909 else /* if (crtl->args.pretend_args_size == 0) */
14911 insn
= plus_constant (hard_frame_pointer_rtx
, 4);
14912 insn
= gen_frame_mem (SImode
, insn
);
14914 emit_set_insn (ip_rtx
, insn
);
14915 /* Add a USE to stop propagate_one_insn() from barfing. */
14916 emit_insn (gen_prologue_use (ip_rtx
));
14921 insn
= GEN_INT (saved_regs
- 4);
14922 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
14923 stack_pointer_rtx
, insn
));
14924 RTX_FRAME_RELATED_P (insn
) = 1;
14928 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
14930 /* This add can produce multiple insns for a large constant, so we
14931 need to get tricky. */
14932 rtx last
= get_last_insn ();
14934 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
14935 - offsets
->outgoing_args
);
14937 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
14941 last
= last
? NEXT_INSN (last
) : get_insns ();
14942 RTX_FRAME_RELATED_P (last
) = 1;
14944 while (last
!= insn
);
14946 /* If the frame pointer is needed, emit a special barrier that
14947 will prevent the scheduler from moving stores to the frame
14948 before the stack adjustment. */
14949 if (frame_pointer_needed
)
14950 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
14951 hard_frame_pointer_rtx
));
14955 if (frame_pointer_needed
&& TARGET_THUMB2
)
14956 thumb_set_frame_pointer (offsets
);
14958 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
14960 unsigned long mask
;
14962 mask
= live_regs_mask
;
14963 mask
&= THUMB2_WORK_REGS
;
14964 if (!IS_NESTED (func_type
))
14965 mask
|= (1 << IP_REGNUM
);
14966 arm_load_pic_register (mask
);
14969 /* If we are profiling, make sure no instructions are scheduled before
14970 the call to mcount. Similarly if the user has requested no
14971 scheduling in the prolog. Similarly if we want non-call exceptions
14972 using the EABI unwinder, to prevent faulting instructions from being
14973 swapped with a stack adjustment. */
14974 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
14975 || (ARM_EABI_UNWIND_TABLES
&& cfun
->can_throw_non_call_exceptions
))
14976 emit_insn (gen_blockage ());
14978 /* If the link register is being kept alive, with the return address in it,
14979 then make sure that it does not get reused by the ce2 pass. */
14980 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
14981 cfun
->machine
->lr_save_eliminated
= 1;
14984 /* Print condition code to STREAM. Helper function for arm_print_operand. */
14986 arm_print_condition (FILE *stream
)
14988 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
14990 /* Branch conversion is not implemented for Thumb-2. */
14993 output_operand_lossage ("predicated Thumb instruction");
14996 if (current_insn_predicate
!= NULL
)
14998 output_operand_lossage
14999 ("predicated instruction in conditional sequence");
15003 fputs (arm_condition_codes
[arm_current_cc
], stream
);
15005 else if (current_insn_predicate
)
15007 enum arm_cond_code code
;
15011 output_operand_lossage ("predicated Thumb instruction");
15015 code
= get_arm_condition_code (current_insn_predicate
);
15016 fputs (arm_condition_codes
[code
], stream
);
15021 /* If CODE is 'd', then the X is a condition operand and the instruction
15022 should only be executed if the condition is true.
15023 if CODE is 'D', then the X is a condition operand and the instruction
15024 should only be executed if the condition is false: however, if the mode
15025 of the comparison is CCFPEmode, then always execute the instruction -- we
15026 do this because in these circumstances !GE does not necessarily imply LT;
15027 in these cases the instruction pattern will take care to make sure that
15028 an instruction containing %d will follow, thereby undoing the effects of
15029 doing this instruction unconditionally.
15030 If CODE is 'N' then X is a floating point operand that must be negated
15032 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15033 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15035 arm_print_operand (FILE *stream
, rtx x
, int code
)
15040 fputs (ASM_COMMENT_START
, stream
);
15044 fputs (user_label_prefix
, stream
);
15048 fputs (REGISTER_PREFIX
, stream
);
15052 arm_print_condition (stream
);
15056 /* Nothing in unified syntax, otherwise the current condition code. */
15057 if (!TARGET_UNIFIED_ASM
)
15058 arm_print_condition (stream
);
15062 /* The current condition code in unified syntax, otherwise nothing. */
15063 if (TARGET_UNIFIED_ASM
)
15064 arm_print_condition (stream
);
15068 /* The current condition code for a condition code setting instruction.
15069 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15070 if (TARGET_UNIFIED_ASM
)
15072 fputc('s', stream
);
15073 arm_print_condition (stream
);
15077 arm_print_condition (stream
);
15078 fputc('s', stream
);
15083 /* If the instruction is conditionally executed then print
15084 the current condition code, otherwise print 's'. */
15085 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
15086 if (current_insn_predicate
)
15087 arm_print_condition (stream
);
15089 fputc('s', stream
);
15092 /* %# is a "break" sequence. It doesn't output anything, but is used to
15093 separate e.g. operand numbers from following text, if that text consists
15094 of further digits which we don't want to be part of the operand
15102 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15103 r
= real_value_negate (&r
);
15104 fprintf (stream
, "%s", fp_const_from_val (&r
));
15108 /* An integer or symbol address without a preceding # sign. */
15110 switch (GET_CODE (x
))
15113 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15117 output_addr_const (stream
, x
);
15121 gcc_unreachable ();
15126 if (GET_CODE (x
) == CONST_INT
)
15129 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
15130 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
15134 putc ('~', stream
);
15135 output_addr_const (stream
, x
);
15140 /* The low 16 bits of an immediate constant. */
15141 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
15145 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
15148 /* Truncate Cirrus shift counts. */
15150 if (GET_CODE (x
) == CONST_INT
)
15152 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 0x3f);
15155 arm_print_operand (stream
, x
, 0);
15159 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
15167 if (!shift_operator (x
, SImode
))
15169 output_operand_lossage ("invalid shift operand");
15173 shift
= shift_op (x
, &val
);
15177 fprintf (stream
, ", %s ", shift
);
15179 arm_print_operand (stream
, XEXP (x
, 1), 0);
15181 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
15186 /* An explanation of the 'Q', 'R' and 'H' register operands:
15188 In a pair of registers containing a DI or DF value the 'Q'
15189 operand returns the register number of the register containing
15190 the least significant part of the value. The 'R' operand returns
15191 the register number of the register containing the most
15192 significant part of the value.
15194 The 'H' operand returns the higher of the two register numbers.
15195 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15196 same as the 'Q' operand, since the most significant part of the
15197 value is held in the lower number register. The reverse is true
15198 on systems where WORDS_BIG_ENDIAN is false.
15200 The purpose of these operands is to distinguish between cases
15201 where the endian-ness of the values is important (for example
15202 when they are added together), and cases where the endian-ness
15203 is irrelevant, but the order of register operations is important.
15204 For example when loading a value from memory into a register
15205 pair, the endian-ness does not matter. Provided that the value
15206 from the lower memory address is put into the lower numbered
15207 register, and the value from the higher address is put into the
15208 higher numbered register, the load will work regardless of whether
15209 the value being loaded is big-wordian or little-wordian. The
15210 order of the two register loads can matter however, if the address
15211 of the memory location is actually held in one of the registers
15212 being overwritten by the load. */
15214 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15216 output_operand_lossage ("invalid operand for code '%c'", code
);
15220 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
15224 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15226 output_operand_lossage ("invalid operand for code '%c'", code
);
15230 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
15234 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15236 output_operand_lossage ("invalid operand for code '%c'", code
);
15240 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
15244 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15246 output_operand_lossage ("invalid operand for code '%c'", code
);
15250 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
15254 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15256 output_operand_lossage ("invalid operand for code '%c'", code
);
15260 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
15264 asm_fprintf (stream
, "%r",
15265 GET_CODE (XEXP (x
, 0)) == REG
15266 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
15270 asm_fprintf (stream
, "{%r-%r}",
15272 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
15275 /* Like 'M', but writing doubleword vector registers, for use by Neon
15279 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
15280 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
15282 asm_fprintf (stream
, "{d%d}", regno
);
15284 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
15289 /* CONST_TRUE_RTX means always -- that's the default. */
15290 if (x
== const_true_rtx
)
15293 if (!COMPARISON_P (x
))
15295 output_operand_lossage ("invalid operand for code '%c'", code
);
15299 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
15304 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15305 want to do that. */
15306 if (x
== const_true_rtx
)
15308 output_operand_lossage ("instruction never executed");
15311 if (!COMPARISON_P (x
))
15313 output_operand_lossage ("invalid operand for code '%c'", code
);
15317 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
15318 (get_arm_condition_code (x
))],
15322 /* Cirrus registers can be accessed in a variety of ways:
15323 single floating point (f)
15324 double floating point (d)
15326 64bit integer (dx). */
15327 case 'W': /* Cirrus register in F mode. */
15328 case 'X': /* Cirrus register in D mode. */
15329 case 'Y': /* Cirrus register in FX mode. */
15330 case 'Z': /* Cirrus register in DX mode. */
15331 gcc_assert (GET_CODE (x
) == REG
15332 && REGNO_REG_CLASS (REGNO (x
)) == CIRRUS_REGS
);
15334 fprintf (stream
, "mv%s%s",
15336 : code
== 'X' ? "d"
15337 : code
== 'Y' ? "fx" : "dx", reg_names
[REGNO (x
)] + 2);
15341 /* Print cirrus register in the mode specified by the register's mode. */
15344 int mode
= GET_MODE (x
);
15346 if (GET_CODE (x
) != REG
|| REGNO_REG_CLASS (REGNO (x
)) != CIRRUS_REGS
)
15348 output_operand_lossage ("invalid operand for code '%c'", code
);
15352 fprintf (stream
, "mv%s%s",
15353 mode
== DFmode
? "d"
15354 : mode
== SImode
? "fx"
15355 : mode
== DImode
? "dx"
15356 : "f", reg_names
[REGNO (x
)] + 2);
15362 if (GET_CODE (x
) != REG
15363 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
15364 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
15365 /* Bad value for wCG register number. */
15367 output_operand_lossage ("invalid operand for code '%c'", code
);
15372 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
15375 /* Print an iWMMXt control register name. */
15377 if (GET_CODE (x
) != CONST_INT
15379 || INTVAL (x
) >= 16)
15380 /* Bad value for wC register number. */
15382 output_operand_lossage ("invalid operand for code '%c'", code
);
15388 static const char * wc_reg_names
[16] =
15390 "wCID", "wCon", "wCSSF", "wCASF",
15391 "wC4", "wC5", "wC6", "wC7",
15392 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15393 "wC12", "wC13", "wC14", "wC15"
15396 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
15400 /* Print the high single-precision register of a VFP double-precision
15404 int mode
= GET_MODE (x
);
15407 if (GET_MODE_SIZE (mode
) != 8 || GET_CODE (x
) != REG
)
15409 output_operand_lossage ("invalid operand for code '%c'", code
);
15414 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
15416 output_operand_lossage ("invalid operand for code '%c'", code
);
15420 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
15424 /* Print a VFP/Neon double precision or quad precision register name. */
15428 int mode
= GET_MODE (x
);
15429 int is_quad
= (code
== 'q');
15432 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
15434 output_operand_lossage ("invalid operand for code '%c'", code
);
15438 if (GET_CODE (x
) != REG
15439 || !IS_VFP_REGNUM (REGNO (x
)))
15441 output_operand_lossage ("invalid operand for code '%c'", code
);
15446 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
15447 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
15449 output_operand_lossage ("invalid operand for code '%c'", code
);
15453 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
15454 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
15458 /* These two codes print the low/high doubleword register of a Neon quad
15459 register, respectively. For pair-structure types, can also print
15460 low/high quadword registers. */
15464 int mode
= GET_MODE (x
);
15467 if ((GET_MODE_SIZE (mode
) != 16
15468 && GET_MODE_SIZE (mode
) != 32) || GET_CODE (x
) != REG
)
15470 output_operand_lossage ("invalid operand for code '%c'", code
);
15475 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
15477 output_operand_lossage ("invalid operand for code '%c'", code
);
15481 if (GET_MODE_SIZE (mode
) == 16)
15482 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
15483 + (code
== 'f' ? 1 : 0));
15485 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
15486 + (code
== 'f' ? 1 : 0));
15490 /* Print a VFPv3 floating-point constant, represented as an integer
15494 int index
= vfp3_const_double_index (x
);
15495 gcc_assert (index
!= -1);
15496 fprintf (stream
, "%d", index
);
15500 /* Print bits representing opcode features for Neon.
15502 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15503 and polynomials as unsigned.
15505 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15507 Bit 2 is 1 for rounding functions, 0 otherwise. */
15509 /* Identify the type as 's', 'u', 'p' or 'f'. */
15512 HOST_WIDE_INT bits
= INTVAL (x
);
15513 fputc ("uspf"[bits
& 3], stream
);
15517 /* Likewise, but signed and unsigned integers are both 'i'. */
15520 HOST_WIDE_INT bits
= INTVAL (x
);
15521 fputc ("iipf"[bits
& 3], stream
);
15525 /* As for 'T', but emit 'u' instead of 'p'. */
15528 HOST_WIDE_INT bits
= INTVAL (x
);
15529 fputc ("usuf"[bits
& 3], stream
);
15533 /* Bit 2: rounding (vs none). */
15536 HOST_WIDE_INT bits
= INTVAL (x
);
15537 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
15541 /* Memory operand for vld1/vst1 instruction. */
15545 bool postinc
= FALSE
;
15546 gcc_assert (GET_CODE (x
) == MEM
);
15547 addr
= XEXP (x
, 0);
15548 if (GET_CODE (addr
) == POST_INC
)
15551 addr
= XEXP (addr
, 0);
15553 asm_fprintf (stream
, "[%r]", REGNO (addr
));
15555 fputs("!", stream
);
15559 /* Translate an S register number into a D register number and element index. */
15562 int mode
= GET_MODE (x
);
15565 if (GET_MODE_SIZE (mode
) != 4 || GET_CODE (x
) != REG
)
15567 output_operand_lossage ("invalid operand for code '%c'", code
);
15572 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
15574 output_operand_lossage ("invalid operand for code '%c'", code
);
15578 regno
= regno
- FIRST_VFP_REGNUM
;
15579 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
15583 /* Register specifier for vld1.16/vst1.16. Translate the S register
15584 number into a D register number and element index. */
15587 int mode
= GET_MODE (x
);
15590 if (GET_MODE_SIZE (mode
) != 2 || GET_CODE (x
) != REG
)
15592 output_operand_lossage ("invalid operand for code '%c'", code
);
15597 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
15599 output_operand_lossage ("invalid operand for code '%c'", code
);
15603 regno
= regno
- FIRST_VFP_REGNUM
;
15604 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
15611 output_operand_lossage ("missing operand");
15615 switch (GET_CODE (x
))
15618 asm_fprintf (stream
, "%r", REGNO (x
));
15622 output_memory_reference_mode
= GET_MODE (x
);
15623 output_address (XEXP (x
, 0));
15630 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
15631 sizeof (fpstr
), 0, 1);
15632 fprintf (stream
, "#%s", fpstr
);
15635 fprintf (stream
, "#%s", fp_immediate_constant (x
));
15639 gcc_assert (GET_CODE (x
) != NEG
);
15640 fputc ('#', stream
);
15641 if (GET_CODE (x
) == HIGH
)
15643 fputs (":lower16:", stream
);
15647 output_addr_const (stream
, x
);
15653 /* Target hook for assembling integer objects. The ARM version needs to
15654 handle word-sized values specially. */
15656 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
15658 enum machine_mode mode
;
15660 if (size
== UNITS_PER_WORD
&& aligned_p
)
15662 fputs ("\t.word\t", asm_out_file
);
15663 output_addr_const (asm_out_file
, x
);
15665 /* Mark symbols as position independent. We only do this in the
15666 .text segment, not in the .data segment. */
15667 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
15668 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
15670 /* See legitimize_pic_address for an explanation of the
15671 TARGET_VXWORKS_RTP check. */
15672 if (TARGET_VXWORKS_RTP
15673 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
15674 fputs ("(GOT)", asm_out_file
);
15676 fputs ("(GOTOFF)", asm_out_file
);
15678 fputc ('\n', asm_out_file
);
15682 mode
= GET_MODE (x
);
15684 if (arm_vector_mode_supported_p (mode
))
15688 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
15690 units
= CONST_VECTOR_NUNITS (x
);
15691 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
15693 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15694 for (i
= 0; i
< units
; i
++)
15696 rtx elt
= CONST_VECTOR_ELT (x
, i
);
15698 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
15701 for (i
= 0; i
< units
; i
++)
15703 rtx elt
= CONST_VECTOR_ELT (x
, i
);
15704 REAL_VALUE_TYPE rval
;
15706 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
15709 (rval
, GET_MODE_INNER (mode
),
15710 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
15716 return default_assemble_integer (x
, size
, aligned_p
);
15720 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
15724 if (!TARGET_AAPCS_BASED
)
15727 default_named_section_asm_out_constructor
15728 : default_named_section_asm_out_destructor
) (symbol
, priority
);
15732 /* Put these in the .init_array section, using a special relocation. */
15733 if (priority
!= DEFAULT_INIT_PRIORITY
)
15736 sprintf (buf
, "%s.%.5u",
15737 is_ctor
? ".init_array" : ".fini_array",
15739 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
15746 switch_to_section (s
);
15747 assemble_align (POINTER_SIZE
);
15748 fputs ("\t.word\t", asm_out_file
);
15749 output_addr_const (asm_out_file
, symbol
);
15750 fputs ("(target1)\n", asm_out_file
);
15753 /* Add a function to the list of static constructors. */
15756 arm_elf_asm_constructor (rtx symbol
, int priority
)
15758 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
15761 /* Add a function to the list of static destructors. */
15764 arm_elf_asm_destructor (rtx symbol
, int priority
)
15766 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
15769 /* A finite state machine takes care of noticing whether or not instructions
15770 can be conditionally executed, and thus decrease execution time and code
15771 size by deleting branch instructions. The fsm is controlled by
15772 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15774 /* The state of the fsm controlling condition codes are:
15775 0: normal, do nothing special
15776 1: make ASM_OUTPUT_OPCODE not output this instruction
15777 2: make ASM_OUTPUT_OPCODE not output this instruction
15778 3: make instructions conditional
15779 4: make instructions conditional
15781 State transitions (state->state by whom under condition):
15782 0 -> 1 final_prescan_insn if the `target' is a label
15783 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15784 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15785 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15786 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15787 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15788 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15789 (the target insn is arm_target_insn).
15791 If the jump clobbers the conditions then we use states 2 and 4.
15793 A similar thing can be done with conditional return insns.
15795 XXX In case the `target' is an unconditional branch, this conditionalising
15796 of the instructions always reduces code size, but not always execution
15797 time. But then, I want to reduce the code size to somewhere near what
15798 /bin/cc produces. */
15800 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15801 instructions. When a COND_EXEC instruction is seen the subsequent
15802 instructions are scanned so that multiple conditional instructions can be
15803 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15804 specify the length and true/false mask for the IT block. These will be
15805 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15807 /* Returns the index of the ARM condition code string in
15808 `arm_condition_codes'. COMPARISON should be an rtx like
15809 `(eq (...) (...))'. */
15810 static enum arm_cond_code
15811 get_arm_condition_code (rtx comparison
)
15813 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
15814 enum arm_cond_code code
;
15815 enum rtx_code comp_code
= GET_CODE (comparison
);
15817 if (GET_MODE_CLASS (mode
) != MODE_CC
)
15818 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
15819 XEXP (comparison
, 1));
15823 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
15824 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
15825 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
15826 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
15827 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
15828 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
15829 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
15830 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
15831 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
15832 case CC_DLTUmode
: code
= ARM_CC
;
15835 gcc_assert (comp_code
== EQ
|| comp_code
== NE
);
15837 if (comp_code
== EQ
)
15838 return ARM_INVERSE_CONDITION_CODE (code
);
15844 case NE
: return ARM_NE
;
15845 case EQ
: return ARM_EQ
;
15846 case GE
: return ARM_PL
;
15847 case LT
: return ARM_MI
;
15848 default: gcc_unreachable ();
15854 case NE
: return ARM_NE
;
15855 case EQ
: return ARM_EQ
;
15856 default: gcc_unreachable ();
15862 case NE
: return ARM_MI
;
15863 case EQ
: return ARM_PL
;
15864 default: gcc_unreachable ();
15869 /* These encodings assume that AC=1 in the FPA system control
15870 byte. This allows us to handle all cases except UNEQ and
15874 case GE
: return ARM_GE
;
15875 case GT
: return ARM_GT
;
15876 case LE
: return ARM_LS
;
15877 case LT
: return ARM_MI
;
15878 case NE
: return ARM_NE
;
15879 case EQ
: return ARM_EQ
;
15880 case ORDERED
: return ARM_VC
;
15881 case UNORDERED
: return ARM_VS
;
15882 case UNLT
: return ARM_LT
;
15883 case UNLE
: return ARM_LE
;
15884 case UNGT
: return ARM_HI
;
15885 case UNGE
: return ARM_PL
;
15886 /* UNEQ and LTGT do not have a representation. */
15887 case UNEQ
: /* Fall through. */
15888 case LTGT
: /* Fall through. */
15889 default: gcc_unreachable ();
15895 case NE
: return ARM_NE
;
15896 case EQ
: return ARM_EQ
;
15897 case GE
: return ARM_LE
;
15898 case GT
: return ARM_LT
;
15899 case LE
: return ARM_GE
;
15900 case LT
: return ARM_GT
;
15901 case GEU
: return ARM_LS
;
15902 case GTU
: return ARM_CC
;
15903 case LEU
: return ARM_CS
;
15904 case LTU
: return ARM_HI
;
15905 default: gcc_unreachable ();
15911 case LTU
: return ARM_CS
;
15912 case GEU
: return ARM_CC
;
15913 default: gcc_unreachable ();
15919 case NE
: return ARM_NE
;
15920 case EQ
: return ARM_EQ
;
15921 case GE
: return ARM_GE
;
15922 case GT
: return ARM_GT
;
15923 case LE
: return ARM_LE
;
15924 case LT
: return ARM_LT
;
15925 case GEU
: return ARM_CS
;
15926 case GTU
: return ARM_HI
;
15927 case LEU
: return ARM_LS
;
15928 case LTU
: return ARM_CC
;
15929 default: gcc_unreachable ();
15932 default: gcc_unreachable ();
15936 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15939 thumb2_final_prescan_insn (rtx insn
)
15941 rtx first_insn
= insn
;
15942 rtx body
= PATTERN (insn
);
15944 enum arm_cond_code code
;
15948 /* Remove the previous insn from the count of insns to be output. */
15949 if (arm_condexec_count
)
15950 arm_condexec_count
--;
15952 /* Nothing to do if we are already inside a conditional block. */
15953 if (arm_condexec_count
)
15956 if (GET_CODE (body
) != COND_EXEC
)
15959 /* Conditional jumps are implemented directly. */
15960 if (GET_CODE (insn
) == JUMP_INSN
)
15963 predicate
= COND_EXEC_TEST (body
);
15964 arm_current_cc
= get_arm_condition_code (predicate
);
15966 n
= get_attr_ce_count (insn
);
15967 arm_condexec_count
= 1;
15968 arm_condexec_mask
= (1 << n
) - 1;
15969 arm_condexec_masklen
= n
;
15970 /* See if subsequent instructions can be combined into the same block. */
15973 insn
= next_nonnote_insn (insn
);
15975 /* Jumping into the middle of an IT block is illegal, so a label or
15976 barrier terminates the block. */
15977 if (GET_CODE (insn
) != INSN
&& GET_CODE(insn
) != JUMP_INSN
)
15980 body
= PATTERN (insn
);
15981 /* USE and CLOBBER aren't really insns, so just skip them. */
15982 if (GET_CODE (body
) == USE
15983 || GET_CODE (body
) == CLOBBER
)
15986 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
15987 if (GET_CODE (body
) != COND_EXEC
)
15989 /* Allow up to 4 conditionally executed instructions in a block. */
15990 n
= get_attr_ce_count (insn
);
15991 if (arm_condexec_masklen
+ n
> 4)
15994 predicate
= COND_EXEC_TEST (body
);
15995 code
= get_arm_condition_code (predicate
);
15996 mask
= (1 << n
) - 1;
15997 if (arm_current_cc
== code
)
15998 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
15999 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
16002 arm_condexec_count
++;
16003 arm_condexec_masklen
+= n
;
16005 /* A jump must be the last instruction in a conditional block. */
16006 if (GET_CODE(insn
) == JUMP_INSN
)
16009 /* Restore recog_data (getting the attributes of other insns can
16010 destroy this array, but final.c assumes that it remains intact
16011 across this call). */
16012 extract_constrain_insn_cached (first_insn
);
16016 arm_final_prescan_insn (rtx insn
)
16018 /* BODY will hold the body of INSN. */
16019 rtx body
= PATTERN (insn
);
16021 /* This will be 1 if trying to repeat the trick, and things need to be
16022 reversed if it appears to fail. */
16025 /* If we start with a return insn, we only succeed if we find another one. */
16026 int seeking_return
= 0;
16028 /* START_INSN will hold the insn from where we start looking. This is the
16029 first insn after the following code_label if REVERSE is true. */
16030 rtx start_insn
= insn
;
16032 /* If in state 4, check if the target branch is reached, in order to
16033 change back to state 0. */
16034 if (arm_ccfsm_state
== 4)
16036 if (insn
== arm_target_insn
)
16038 arm_target_insn
= NULL
;
16039 arm_ccfsm_state
= 0;
16044 /* If in state 3, it is possible to repeat the trick, if this insn is an
16045 unconditional branch to a label, and immediately following this branch
16046 is the previous target label which is only used once, and the label this
16047 branch jumps to is not too far off. */
16048 if (arm_ccfsm_state
== 3)
16050 if (simplejump_p (insn
))
16052 start_insn
= next_nonnote_insn (start_insn
);
16053 if (GET_CODE (start_insn
) == BARRIER
)
16055 /* XXX Isn't this always a barrier? */
16056 start_insn
= next_nonnote_insn (start_insn
);
16058 if (GET_CODE (start_insn
) == CODE_LABEL
16059 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
16060 && LABEL_NUSES (start_insn
) == 1)
16065 else if (GET_CODE (body
) == RETURN
)
16067 start_insn
= next_nonnote_insn (start_insn
);
16068 if (GET_CODE (start_insn
) == BARRIER
)
16069 start_insn
= next_nonnote_insn (start_insn
);
16070 if (GET_CODE (start_insn
) == CODE_LABEL
16071 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
16072 && LABEL_NUSES (start_insn
) == 1)
16075 seeking_return
= 1;
16084 gcc_assert (!arm_ccfsm_state
|| reverse
);
16085 if (GET_CODE (insn
) != JUMP_INSN
)
16088 /* This jump might be paralleled with a clobber of the condition codes
16089 the jump should always come first */
16090 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
16091 body
= XVECEXP (body
, 0, 0);
16094 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
16095 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
16098 int fail
= FALSE
, succeed
= FALSE
;
16099 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16100 int then_not_else
= TRUE
;
16101 rtx this_insn
= start_insn
, label
= 0;
16103 /* Register the insn jumped to. */
16106 if (!seeking_return
)
16107 label
= XEXP (SET_SRC (body
), 0);
16109 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
16110 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
16111 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
16113 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
16114 then_not_else
= FALSE
;
16116 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == RETURN
)
16117 seeking_return
= 1;
16118 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == RETURN
)
16120 seeking_return
= 1;
16121 then_not_else
= FALSE
;
16124 gcc_unreachable ();
16126 /* See how many insns this branch skips, and what kind of insns. If all
16127 insns are okay, and the label or unconditional branch to the same
16128 label is not too far away, succeed. */
16129 for (insns_skipped
= 0;
16130 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
16134 this_insn
= next_nonnote_insn (this_insn
);
16138 switch (GET_CODE (this_insn
))
16141 /* Succeed if it is the target label, otherwise fail since
16142 control falls in from somewhere else. */
16143 if (this_insn
== label
)
16145 arm_ccfsm_state
= 1;
16153 /* Succeed if the following insn is the target label.
16155 If return insns are used then the last insn in a function
16156 will be a barrier. */
16157 this_insn
= next_nonnote_insn (this_insn
);
16158 if (this_insn
&& this_insn
== label
)
16160 arm_ccfsm_state
= 1;
16168 /* The AAPCS says that conditional calls should not be
16169 used since they make interworking inefficient (the
16170 linker can't transform BL<cond> into BLX). That's
16171 only a problem if the machine has BLX. */
16178 /* Succeed if the following insn is the target label, or
16179 if the following two insns are a barrier and the
16181 this_insn
= next_nonnote_insn (this_insn
);
16182 if (this_insn
&& GET_CODE (this_insn
) == BARRIER
)
16183 this_insn
= next_nonnote_insn (this_insn
);
16185 if (this_insn
&& this_insn
== label
16186 && insns_skipped
< max_insns_skipped
)
16188 arm_ccfsm_state
= 1;
16196 /* If this is an unconditional branch to the same label, succeed.
16197 If it is to another label, do nothing. If it is conditional,
16199 /* XXX Probably, the tests for SET and the PC are
16202 scanbody
= PATTERN (this_insn
);
16203 if (GET_CODE (scanbody
) == SET
16204 && GET_CODE (SET_DEST (scanbody
)) == PC
)
16206 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
16207 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
16209 arm_ccfsm_state
= 2;
16212 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
16215 /* Fail if a conditional return is undesirable (e.g. on a
16216 StrongARM), but still allow this if optimizing for size. */
16217 else if (GET_CODE (scanbody
) == RETURN
16218 && !use_return_insn (TRUE
, NULL
)
16221 else if (GET_CODE (scanbody
) == RETURN
16224 arm_ccfsm_state
= 2;
16227 else if (GET_CODE (scanbody
) == PARALLEL
)
16229 switch (get_attr_conds (this_insn
))
16239 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
16244 /* Instructions using or affecting the condition codes make it
16246 scanbody
= PATTERN (this_insn
);
16247 if (!(GET_CODE (scanbody
) == SET
16248 || GET_CODE (scanbody
) == PARALLEL
)
16249 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
16252 /* A conditional cirrus instruction must be followed by
16253 a non Cirrus instruction. However, since we
16254 conditionalize instructions in this function and by
16255 the time we get here we can't add instructions
16256 (nops), because shorten_branches() has already been
16257 called, we will disable conditionalizing Cirrus
16258 instructions to be safe. */
16259 if (GET_CODE (scanbody
) != USE
16260 && GET_CODE (scanbody
) != CLOBBER
16261 && get_attr_cirrus (this_insn
) != CIRRUS_NOT
)
16271 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
16272 arm_target_label
= CODE_LABEL_NUMBER (label
);
16275 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
16277 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
16279 this_insn
= next_nonnote_insn (this_insn
);
16280 gcc_assert (!this_insn
16281 || (GET_CODE (this_insn
) != BARRIER
16282 && GET_CODE (this_insn
) != CODE_LABEL
));
16286 /* Oh, dear! we ran off the end.. give up. */
16287 extract_constrain_insn_cached (insn
);
16288 arm_ccfsm_state
= 0;
16289 arm_target_insn
= NULL
;
16292 arm_target_insn
= this_insn
;
16295 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16298 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
16300 if (reverse
|| then_not_else
)
16301 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
16304 /* Restore recog_data (getting the attributes of other insns can
16305 destroy this array, but final.c assumes that it remains intact
16306 across this call. */
16307 extract_constrain_insn_cached (insn
);
16311 /* Output IT instructions. */
16313 thumb2_asm_output_opcode (FILE * stream
)
16318 if (arm_condexec_mask
)
16320 for (n
= 0; n
< arm_condexec_masklen
; n
++)
16321 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
16323 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
16324 arm_condition_codes
[arm_current_cc
]);
16325 arm_condexec_mask
= 0;
16329 /* Returns true if REGNO is a valid register
16330 for holding a quantity of type MODE. */
16332 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
16334 if (GET_MODE_CLASS (mode
) == MODE_CC
)
16335 return (regno
== CC_REGNUM
16336 || (TARGET_HARD_FLOAT
&& TARGET_VFP
16337 && regno
== VFPCC_REGNUM
));
16340 /* For the Thumb we only allow values bigger than SImode in
16341 registers 0 - 6, so that there is always a second low
16342 register available to hold the upper part of the value.
16343 We probably we ought to ensure that the register is the
16344 start of an even numbered register pair. */
16345 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
16347 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
16348 && IS_CIRRUS_REGNUM (regno
))
16349 /* We have outlawed SI values in Cirrus registers because they
16350 reside in the lower 32 bits, but SF values reside in the
16351 upper 32 bits. This causes gcc all sorts of grief. We can't
16352 even split the registers into pairs because Cirrus SI values
16353 get sign extended to 64bits-- aldyh. */
16354 return (GET_MODE_CLASS (mode
) == MODE_FLOAT
) || (mode
== DImode
);
16356 if (TARGET_HARD_FLOAT
&& TARGET_VFP
16357 && IS_VFP_REGNUM (regno
))
16359 if (mode
== SFmode
|| mode
== SImode
)
16360 return VFP_REGNO_OK_FOR_SINGLE (regno
);
16362 if (mode
== DFmode
)
16363 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
16365 /* VFP registers can hold HFmode values, but there is no point in
16366 putting them there unless we have hardware conversion insns. */
16367 if (mode
== HFmode
)
16368 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
16371 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
16372 || (VALID_NEON_QREG_MODE (mode
)
16373 && NEON_REGNO_OK_FOR_QUAD (regno
))
16374 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
16375 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
16376 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
16377 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
16378 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
16383 if (TARGET_REALLY_IWMMXT
)
16385 if (IS_IWMMXT_GR_REGNUM (regno
))
16386 return mode
== SImode
;
16388 if (IS_IWMMXT_REGNUM (regno
))
16389 return VALID_IWMMXT_REG_MODE (mode
);
16392 /* We allow almost any value to be stored in the general registers.
16393 Restrict doubleword quantities to even register pairs so that we can
16394 use ldrd. Do not allow very large Neon structure opaque modes in
16395 general registers; they would use too many. */
16396 if (regno
<= LAST_ARM_REGNUM
)
16397 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
16398 && ARM_NUM_REGS (mode
) <= 4;
16400 if (regno
== FRAME_POINTER_REGNUM
16401 || regno
== ARG_POINTER_REGNUM
)
16402 /* We only allow integers in the fake hard registers. */
16403 return GET_MODE_CLASS (mode
) == MODE_INT
;
16405 /* The only registers left are the FPA registers
16406 which we only allow to hold FP values. */
16407 return (TARGET_HARD_FLOAT
&& TARGET_FPA
16408 && GET_MODE_CLASS (mode
) == MODE_FLOAT
16409 && regno
>= FIRST_FPA_REGNUM
16410 && regno
<= LAST_FPA_REGNUM
);
16413 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16414 not used in arm mode. */
16417 arm_regno_class (int regno
)
16421 if (regno
== STACK_POINTER_REGNUM
)
16423 if (regno
== CC_REGNUM
)
16430 if (TARGET_THUMB2
&& regno
< 8)
16433 if ( regno
<= LAST_ARM_REGNUM
16434 || regno
== FRAME_POINTER_REGNUM
16435 || regno
== ARG_POINTER_REGNUM
)
16436 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
16438 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
16439 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
16441 if (IS_CIRRUS_REGNUM (regno
))
16442 return CIRRUS_REGS
;
16444 if (IS_VFP_REGNUM (regno
))
16446 if (regno
<= D7_VFP_REGNUM
)
16447 return VFP_D0_D7_REGS
;
16448 else if (regno
<= LAST_LO_VFP_REGNUM
)
16449 return VFP_LO_REGS
;
16451 return VFP_HI_REGS
;
16454 if (IS_IWMMXT_REGNUM (regno
))
16455 return IWMMXT_REGS
;
16457 if (IS_IWMMXT_GR_REGNUM (regno
))
16458 return IWMMXT_GR_REGS
;
16463 /* Handle a special case when computing the offset
16464 of an argument from the frame pointer. */
16466 arm_debugger_arg_offset (int value
, rtx addr
)
16470 /* We are only interested if dbxout_parms() failed to compute the offset. */
16474 /* We can only cope with the case where the address is held in a register. */
16475 if (GET_CODE (addr
) != REG
)
16478 /* If we are using the frame pointer to point at the argument, then
16479 an offset of 0 is correct. */
16480 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
16483 /* If we are using the stack pointer to point at the
16484 argument, then an offset of 0 is correct. */
16485 /* ??? Check this is consistent with thumb2 frame layout. */
16486 if ((TARGET_THUMB
|| !frame_pointer_needed
)
16487 && REGNO (addr
) == SP_REGNUM
)
16490 /* Oh dear. The argument is pointed to by a register rather
16491 than being held in a register, or being stored at a known
16492 offset from the frame pointer. Since GDB only understands
16493 those two kinds of argument we must translate the address
16494 held in the register into an offset from the frame pointer.
16495 We do this by searching through the insns for the function
16496 looking to see where this register gets its value. If the
16497 register is initialized from the frame pointer plus an offset
16498 then we are in luck and we can continue, otherwise we give up.
16500 This code is exercised by producing debugging information
16501 for a function with arguments like this:
16503 double func (double a, double b, int c, double d) {return d;}
16505 Without this code the stab for parameter 'd' will be set to
16506 an offset of 0 from the frame pointer, rather than 8. */
16508 /* The if() statement says:
16510 If the insn is a normal instruction
16511 and if the insn is setting the value in a register
16512 and if the register being set is the register holding the address of the argument
16513 and if the address is computing by an addition
16514 that involves adding to a register
16515 which is the frame pointer
16520 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
16522 if ( GET_CODE (insn
) == INSN
16523 && GET_CODE (PATTERN (insn
)) == SET
16524 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
16525 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
16526 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 0)) == REG
16527 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16528 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 1)) == CONST_INT
16531 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
16540 warning (0, "unable to compute real location of stacked parameter");
16541 value
= 8; /* XXX magic hack */
16547 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16550 if ((MASK) & insn_flags) \
16551 add_builtin_function ((NAME), (TYPE), (CODE), \
16552 BUILT_IN_MD, NULL, NULL_TREE); \
16556 struct builtin_description
16558 const unsigned int mask
;
16559 const enum insn_code icode
;
16560 const char * const name
;
16561 const enum arm_builtins code
;
16562 const enum rtx_code comparison
;
16563 const unsigned int flag
;
16566 static const struct builtin_description bdesc_2arg
[] =
16568 #define IWMMXT_BUILTIN(code, string, builtin) \
16569 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16570 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16572 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
16573 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
16574 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
16575 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
16576 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
16577 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
16578 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
16579 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
16580 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
16581 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
16582 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
16583 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
16584 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
16585 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
16586 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
16587 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
16588 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
16589 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
16590 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
16591 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
16592 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
16593 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
16594 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
16595 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
16596 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
16597 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
16598 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
16599 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
16600 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
16601 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
16602 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
16603 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
16604 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
16605 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
16606 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
16607 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
16608 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
16609 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
16610 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
16611 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
16612 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
16613 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
16614 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
16615 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
16616 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
16617 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
16618 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
16619 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
16620 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
16621 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
16622 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
16623 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
16624 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
16625 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
16626 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
16627 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
16628 IWMMXT_BUILTIN (iwmmxt_wmadds
, "wmadds", WMADDS
)
16629 IWMMXT_BUILTIN (iwmmxt_wmaddu
, "wmaddu", WMADDU
)
16631 #define IWMMXT_BUILTIN2(code, builtin) \
16632 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16634 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
16635 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
16636 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
16637 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
16638 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
16639 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
16640 IWMMXT_BUILTIN2 (ashlv4hi3_di
, WSLLH
)
16641 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt
, WSLLHI
)
16642 IWMMXT_BUILTIN2 (ashlv2si3_di
, WSLLW
)
16643 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt
, WSLLWI
)
16644 IWMMXT_BUILTIN2 (ashldi3_di
, WSLLD
)
16645 IWMMXT_BUILTIN2 (ashldi3_iwmmxt
, WSLLDI
)
16646 IWMMXT_BUILTIN2 (lshrv4hi3_di
, WSRLH
)
16647 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt
, WSRLHI
)
16648 IWMMXT_BUILTIN2 (lshrv2si3_di
, WSRLW
)
16649 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt
, WSRLWI
)
16650 IWMMXT_BUILTIN2 (lshrdi3_di
, WSRLD
)
16651 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt
, WSRLDI
)
16652 IWMMXT_BUILTIN2 (ashrv4hi3_di
, WSRAH
)
16653 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt
, WSRAHI
)
16654 IWMMXT_BUILTIN2 (ashrv2si3_di
, WSRAW
)
16655 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt
, WSRAWI
)
16656 IWMMXT_BUILTIN2 (ashrdi3_di
, WSRAD
)
16657 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt
, WSRADI
)
16658 IWMMXT_BUILTIN2 (rorv4hi3_di
, WRORH
)
16659 IWMMXT_BUILTIN2 (rorv4hi3
, WRORHI
)
16660 IWMMXT_BUILTIN2 (rorv2si3_di
, WRORW
)
16661 IWMMXT_BUILTIN2 (rorv2si3
, WRORWI
)
16662 IWMMXT_BUILTIN2 (rordi3_di
, WRORD
)
16663 IWMMXT_BUILTIN2 (rordi3
, WRORDI
)
16664 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
16665 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
16668 static const struct builtin_description bdesc_1arg
[] =
16670 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
16671 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
16672 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
16673 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
16674 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
16675 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
16676 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
16677 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
16678 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
16679 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
16680 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
16681 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
16682 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
16683 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
16684 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
16685 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
16686 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
16687 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
16690 /* Set up all the iWMMXt builtins. This is
16691 not called if TARGET_IWMMXT is zero. */
16694 arm_init_iwmmxt_builtins (void)
16696 const struct builtin_description
* d
;
16698 tree endlink
= void_list_node
;
16700 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16701 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16702 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
16705 = build_function_type (integer_type_node
,
16706 tree_cons (NULL_TREE
, integer_type_node
, endlink
));
16707 tree v8qi_ftype_v8qi_v8qi_int
16708 = build_function_type (V8QI_type_node
,
16709 tree_cons (NULL_TREE
, V8QI_type_node
,
16710 tree_cons (NULL_TREE
, V8QI_type_node
,
16711 tree_cons (NULL_TREE
,
16714 tree v4hi_ftype_v4hi_int
16715 = build_function_type (V4HI_type_node
,
16716 tree_cons (NULL_TREE
, V4HI_type_node
,
16717 tree_cons (NULL_TREE
, integer_type_node
,
16719 tree v2si_ftype_v2si_int
16720 = build_function_type (V2SI_type_node
,
16721 tree_cons (NULL_TREE
, V2SI_type_node
,
16722 tree_cons (NULL_TREE
, integer_type_node
,
16724 tree v2si_ftype_di_di
16725 = build_function_type (V2SI_type_node
,
16726 tree_cons (NULL_TREE
, long_long_integer_type_node
,
16727 tree_cons (NULL_TREE
, long_long_integer_type_node
,
16729 tree di_ftype_di_int
16730 = build_function_type (long_long_integer_type_node
,
16731 tree_cons (NULL_TREE
, long_long_integer_type_node
,
16732 tree_cons (NULL_TREE
, integer_type_node
,
16734 tree di_ftype_di_int_int
16735 = build_function_type (long_long_integer_type_node
,
16736 tree_cons (NULL_TREE
, long_long_integer_type_node
,
16737 tree_cons (NULL_TREE
, integer_type_node
,
16738 tree_cons (NULL_TREE
,
16741 tree int_ftype_v8qi
16742 = build_function_type (integer_type_node
,
16743 tree_cons (NULL_TREE
, V8QI_type_node
,
16745 tree int_ftype_v4hi
16746 = build_function_type (integer_type_node
,
16747 tree_cons (NULL_TREE
, V4HI_type_node
,
16749 tree int_ftype_v2si
16750 = build_function_type (integer_type_node
,
16751 tree_cons (NULL_TREE
, V2SI_type_node
,
16753 tree int_ftype_v8qi_int
16754 = build_function_type (integer_type_node
,
16755 tree_cons (NULL_TREE
, V8QI_type_node
,
16756 tree_cons (NULL_TREE
, integer_type_node
,
16758 tree int_ftype_v4hi_int
16759 = build_function_type (integer_type_node
,
16760 tree_cons (NULL_TREE
, V4HI_type_node
,
16761 tree_cons (NULL_TREE
, integer_type_node
,
16763 tree int_ftype_v2si_int
16764 = build_function_type (integer_type_node
,
16765 tree_cons (NULL_TREE
, V2SI_type_node
,
16766 tree_cons (NULL_TREE
, integer_type_node
,
16768 tree v8qi_ftype_v8qi_int_int
16769 = build_function_type (V8QI_type_node
,
16770 tree_cons (NULL_TREE
, V8QI_type_node
,
16771 tree_cons (NULL_TREE
, integer_type_node
,
16772 tree_cons (NULL_TREE
,
16775 tree v4hi_ftype_v4hi_int_int
16776 = build_function_type (V4HI_type_node
,
16777 tree_cons (NULL_TREE
, V4HI_type_node
,
16778 tree_cons (NULL_TREE
, integer_type_node
,
16779 tree_cons (NULL_TREE
,
16782 tree v2si_ftype_v2si_int_int
16783 = build_function_type (V2SI_type_node
,
16784 tree_cons (NULL_TREE
, V2SI_type_node
,
16785 tree_cons (NULL_TREE
, integer_type_node
,
16786 tree_cons (NULL_TREE
,
16789 /* Miscellaneous. */
16790 tree v8qi_ftype_v4hi_v4hi
16791 = build_function_type (V8QI_type_node
,
16792 tree_cons (NULL_TREE
, V4HI_type_node
,
16793 tree_cons (NULL_TREE
, V4HI_type_node
,
16795 tree v4hi_ftype_v2si_v2si
16796 = build_function_type (V4HI_type_node
,
16797 tree_cons (NULL_TREE
, V2SI_type_node
,
16798 tree_cons (NULL_TREE
, V2SI_type_node
,
16800 tree v2si_ftype_v4hi_v4hi
16801 = build_function_type (V2SI_type_node
,
16802 tree_cons (NULL_TREE
, V4HI_type_node
,
16803 tree_cons (NULL_TREE
, V4HI_type_node
,
16805 tree v2si_ftype_v8qi_v8qi
16806 = build_function_type (V2SI_type_node
,
16807 tree_cons (NULL_TREE
, V8QI_type_node
,
16808 tree_cons (NULL_TREE
, V8QI_type_node
,
16810 tree v4hi_ftype_v4hi_di
16811 = build_function_type (V4HI_type_node
,
16812 tree_cons (NULL_TREE
, V4HI_type_node
,
16813 tree_cons (NULL_TREE
,
16814 long_long_integer_type_node
,
16816 tree v2si_ftype_v2si_di
16817 = build_function_type (V2SI_type_node
,
16818 tree_cons (NULL_TREE
, V2SI_type_node
,
16819 tree_cons (NULL_TREE
,
16820 long_long_integer_type_node
,
16822 tree void_ftype_int_int
16823 = build_function_type (void_type_node
,
16824 tree_cons (NULL_TREE
, integer_type_node
,
16825 tree_cons (NULL_TREE
, integer_type_node
,
16828 = build_function_type (long_long_unsigned_type_node
, endlink
);
16830 = build_function_type (long_long_integer_type_node
,
16831 tree_cons (NULL_TREE
, V8QI_type_node
,
16834 = build_function_type (long_long_integer_type_node
,
16835 tree_cons (NULL_TREE
, V4HI_type_node
,
16838 = build_function_type (long_long_integer_type_node
,
16839 tree_cons (NULL_TREE
, V2SI_type_node
,
16841 tree v2si_ftype_v4hi
16842 = build_function_type (V2SI_type_node
,
16843 tree_cons (NULL_TREE
, V4HI_type_node
,
16845 tree v4hi_ftype_v8qi
16846 = build_function_type (V4HI_type_node
,
16847 tree_cons (NULL_TREE
, V8QI_type_node
,
16850 tree di_ftype_di_v4hi_v4hi
16851 = build_function_type (long_long_unsigned_type_node
,
16852 tree_cons (NULL_TREE
,
16853 long_long_unsigned_type_node
,
16854 tree_cons (NULL_TREE
, V4HI_type_node
,
16855 tree_cons (NULL_TREE
,
16859 tree di_ftype_v4hi_v4hi
16860 = build_function_type (long_long_unsigned_type_node
,
16861 tree_cons (NULL_TREE
, V4HI_type_node
,
16862 tree_cons (NULL_TREE
, V4HI_type_node
,
16865 /* Normal vector binops. */
16866 tree v8qi_ftype_v8qi_v8qi
16867 = build_function_type (V8QI_type_node
,
16868 tree_cons (NULL_TREE
, V8QI_type_node
,
16869 tree_cons (NULL_TREE
, V8QI_type_node
,
16871 tree v4hi_ftype_v4hi_v4hi
16872 = build_function_type (V4HI_type_node
,
16873 tree_cons (NULL_TREE
, V4HI_type_node
,
16874 tree_cons (NULL_TREE
, V4HI_type_node
,
16876 tree v2si_ftype_v2si_v2si
16877 = build_function_type (V2SI_type_node
,
16878 tree_cons (NULL_TREE
, V2SI_type_node
,
16879 tree_cons (NULL_TREE
, V2SI_type_node
,
16881 tree di_ftype_di_di
16882 = build_function_type (long_long_unsigned_type_node
,
16883 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
16884 tree_cons (NULL_TREE
,
16885 long_long_unsigned_type_node
,
16888 /* Add all builtins that are more or less simple operations on two
16890 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16892 /* Use one of the operands; the target can have a different mode for
16893 mask-generating compares. */
16894 enum machine_mode mode
;
16900 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16905 type
= v8qi_ftype_v8qi_v8qi
;
16908 type
= v4hi_ftype_v4hi_v4hi
;
16911 type
= v2si_ftype_v2si_v2si
;
16914 type
= di_ftype_di_di
;
16918 gcc_unreachable ();
16921 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
16924 /* Add the remaining MMX insns with somewhat more complicated types. */
16925 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wzero", di_ftype_void
, ARM_BUILTIN_WZERO
);
16926 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_setwcx", void_ftype_int_int
, ARM_BUILTIN_SETWCX
);
16927 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_getwcx", int_ftype_int
, ARM_BUILTIN_GETWCX
);
16929 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSLLH
);
16930 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSLLW
);
16931 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslld", di_ftype_di_di
, ARM_BUILTIN_WSLLD
);
16932 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSLLHI
);
16933 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSLLWI
);
16934 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslldi", di_ftype_di_int
, ARM_BUILTIN_WSLLDI
);
16936 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRLH
);
16937 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRLW
);
16938 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrld", di_ftype_di_di
, ARM_BUILTIN_WSRLD
);
16939 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRLHI
);
16940 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRLWI
);
16941 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrldi", di_ftype_di_int
, ARM_BUILTIN_WSRLDI
);
16943 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRAH
);
16944 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsraw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRAW
);
16945 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrad", di_ftype_di_di
, ARM_BUILTIN_WSRAD
);
16946 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRAHI
);
16947 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrawi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRAWI
);
16948 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsradi", di_ftype_di_int
, ARM_BUILTIN_WSRADI
);
16950 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WRORH
);
16951 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorw", v2si_ftype_v2si_di
, ARM_BUILTIN_WRORW
);
16952 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrord", di_ftype_di_di
, ARM_BUILTIN_WRORD
);
16953 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WRORHI
);
16954 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WRORWI
);
16955 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrordi", di_ftype_di_int
, ARM_BUILTIN_WRORDI
);
16957 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSHUFH
);
16959 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADB
);
16960 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADH
);
16961 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADBZ
);
16962 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADHZ
);
16964 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsb", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMSB
);
16965 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMSH
);
16966 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMSW
);
16967 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmub", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMUB
);
16968 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMUH
);
16969 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMUW
);
16970 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int
, ARM_BUILTIN_TINSRB
);
16971 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int
, ARM_BUILTIN_TINSRH
);
16972 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int
, ARM_BUILTIN_TINSRW
);
16974 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccb", di_ftype_v8qi
, ARM_BUILTIN_WACCB
);
16975 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wacch", di_ftype_v4hi
, ARM_BUILTIN_WACCH
);
16976 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccw", di_ftype_v2si
, ARM_BUILTIN_WACCW
);
16978 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskb", int_ftype_v8qi
, ARM_BUILTIN_TMOVMSKB
);
16979 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskh", int_ftype_v4hi
, ARM_BUILTIN_TMOVMSKH
);
16980 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskw", int_ftype_v2si
, ARM_BUILTIN_TMOVMSKW
);
16982 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHSS
);
16983 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHUS
);
16984 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWUS
);
16985 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWSS
);
16986 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdus", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDUS
);
16987 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdss", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDSS
);
16989 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHUB
);
16990 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHUH
);
16991 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHUW
);
16992 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHSB
);
16993 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHSH
);
16994 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHSW
);
16995 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELUB
);
16996 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELUH
);
16997 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELUW
);
16998 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELSB
);
16999 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELSH
);
17000 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELSW
);
17002 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACS
);
17003 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACSZ
);
17004 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACU
);
17005 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACUZ
);
17007 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int
, ARM_BUILTIN_WALIGN
);
17008 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmia", di_ftype_di_int_int
, ARM_BUILTIN_TMIA
);
17009 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiaph", di_ftype_di_int_int
, ARM_BUILTIN_TMIAPH
);
17010 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabb", di_ftype_di_int_int
, ARM_BUILTIN_TMIABB
);
17011 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabt", di_ftype_di_int_int
, ARM_BUILTIN_TMIABT
);
17012 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatb", di_ftype_di_int_int
, ARM_BUILTIN_TMIATB
);
17013 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatt", di_ftype_di_int_int
, ARM_BUILTIN_TMIATT
);
17017 arm_init_tls_builtins (void)
17021 ftype
= build_function_type (ptr_type_node
, void_list_node
);
17022 decl
= add_builtin_function ("__builtin_thread_pointer", ftype
,
17023 ARM_BUILTIN_THREAD_POINTER
, BUILT_IN_MD
,
17025 TREE_NOTHROW (decl
) = 1;
17026 TREE_READONLY (decl
) = 1;
17029 enum neon_builtin_type_bits
{
17045 #define v8qi_UP T_V8QI
17046 #define v4hi_UP T_V4HI
17047 #define v2si_UP T_V2SI
17048 #define v2sf_UP T_V2SF
17050 #define v16qi_UP T_V16QI
17051 #define v8hi_UP T_V8HI
17052 #define v4si_UP T_V4SI
17053 #define v4sf_UP T_V4SF
17054 #define v2di_UP T_V2DI
17059 #define UP(X) X##_UP
17094 NEON_LOADSTRUCTLANE
,
17096 NEON_STORESTRUCTLANE
,
17105 const neon_itype itype
;
17107 const enum insn_code codes
[T_MAX
];
17108 const unsigned int num_vars
;
17109 unsigned int base_fcode
;
17110 } neon_builtin_datum
;
17112 #define CF(N,X) CODE_FOR_neon_##N##X
17114 #define VAR1(T, N, A) \
17115 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17116 #define VAR2(T, N, A, B) \
17117 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17118 #define VAR3(T, N, A, B, C) \
17119 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17120 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17121 #define VAR4(T, N, A, B, C, D) \
17122 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17123 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17124 #define VAR5(T, N, A, B, C, D, E) \
17125 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17126 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17127 #define VAR6(T, N, A, B, C, D, E, F) \
17128 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17129 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17130 #define VAR7(T, N, A, B, C, D, E, F, G) \
17131 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17132 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17134 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17135 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17137 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17138 CF (N, G), CF (N, H) }, 8, 0
17139 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17140 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17141 | UP (H) | UP (I), \
17142 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17143 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17144 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17145 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17146 | UP (H) | UP (I) | UP (J), \
17147 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17148 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17150 /* The mode entries in the following table correspond to the "key" type of the
17151 instruction variant, i.e. equivalent to that which would be specified after
17152 the assembler mnemonic, which usually refers to the last vector operand.
17153 (Signed/unsigned/polynomial types are not differentiated between though, and
17154 are all mapped onto the same mode for a given element size.) The modes
17155 listed per instruction should be the same as those defined for that
17156 instruction's pattern in neon.md.
17157 WARNING: Variants should be listed in the same increasing order as
17158 neon_builtin_type_bits. */
17160 static neon_builtin_datum neon_builtin_data
[] =
17162 { VAR10 (BINOP
, vadd
,
17163 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17164 { VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
) },
17165 { VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
) },
17166 { VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17167 { VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17168 { VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
) },
17169 { VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17170 { VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17171 { VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
) },
17172 { VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17173 { VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
) },
17174 { VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
) },
17175 { VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
) },
17176 { VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
) },
17177 { VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
) },
17178 { VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
) },
17179 { VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
) },
17180 { VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
) },
17181 { VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
) },
17182 { VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
) },
17183 { VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
) },
17184 { VAR2 (BINOP
, vqdmull
, v4hi
, v2si
) },
17185 { VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17186 { VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17187 { VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17188 { VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
) },
17189 { VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
) },
17190 { VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
) },
17191 { VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17192 { VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17193 { VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17194 { VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
) },
17195 { VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17196 { VAR10 (BINOP
, vsub
,
17197 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17198 { VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
) },
17199 { VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
) },
17200 { VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17201 { VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17202 { VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
) },
17203 { VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17204 { VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17205 { VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17206 { VAR2 (BINOP
, vcage
, v2sf
, v4sf
) },
17207 { VAR2 (BINOP
, vcagt
, v2sf
, v4sf
) },
17208 { VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17209 { VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17210 { VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
) },
17211 { VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17212 { VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
) },
17213 { VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17214 { VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17215 { VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
) },
17216 { VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17217 { VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17218 { VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
) },
17219 { VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
) },
17220 { VAR2 (BINOP
, vrecps
, v2sf
, v4sf
) },
17221 { VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
) },
17222 { VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17223 { VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17224 { VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17225 { VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17226 { VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17227 { VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17228 { VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17229 { VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17230 { VAR2 (UNOP
, vcnt
, v8qi
, v16qi
) },
17231 { VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
) },
17232 { VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
) },
17233 { VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17234 /* FIXME: vget_lane supports more variants than this! */
17235 { VAR10 (GETLANE
, vget_lane
,
17236 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17237 { VAR10 (SETLANE
, vset_lane
,
17238 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17239 { VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17240 { VAR10 (DUP
, vdup_n
,
17241 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17242 { VAR10 (DUPLANE
, vdup_lane
,
17243 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17244 { VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17245 { VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17246 { VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17247 { VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
) },
17248 { VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
) },
17249 { VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
) },
17250 { VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
) },
17251 { VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17252 { VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17253 { VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
) },
17254 { VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
) },
17255 { VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17256 { VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
) },
17257 { VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
) },
17258 { VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17259 { VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17260 { VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
) },
17261 { VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
) },
17262 { VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17263 { VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
) },
17264 { VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
) },
17265 { VAR10 (BINOP
, vext
,
17266 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17267 { VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17268 { VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
) },
17269 { VAR2 (UNOP
, vrev16
, v8qi
, v16qi
) },
17270 { VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
) },
17271 { VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
) },
17272 { VAR10 (SELECT
, vbsl
,
17273 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17274 { VAR1 (VTBL
, vtbl1
, v8qi
) },
17275 { VAR1 (VTBL
, vtbl2
, v8qi
) },
17276 { VAR1 (VTBL
, vtbl3
, v8qi
) },
17277 { VAR1 (VTBL
, vtbl4
, v8qi
) },
17278 { VAR1 (VTBX
, vtbx1
, v8qi
) },
17279 { VAR1 (VTBX
, vtbx2
, v8qi
) },
17280 { VAR1 (VTBX
, vtbx3
, v8qi
) },
17281 { VAR1 (VTBX
, vtbx4
, v8qi
) },
17282 { VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17283 { VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17284 { VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17285 { VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17286 { VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17287 { VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17288 { VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17289 { VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17290 { VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17291 { VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17292 { VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17293 { VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17294 { VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17295 { VAR10 (LOAD1
, vld1
,
17296 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17297 { VAR10 (LOAD1LANE
, vld1_lane
,
17298 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17299 { VAR10 (LOAD1
, vld1_dup
,
17300 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17301 { VAR10 (STORE1
, vst1
,
17302 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17303 { VAR10 (STORE1LANE
, vst1_lane
,
17304 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17305 { VAR9 (LOADSTRUCT
,
17306 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17307 { VAR7 (LOADSTRUCTLANE
, vld2_lane
,
17308 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17309 { VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17310 { VAR9 (STORESTRUCT
, vst2
,
17311 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17312 { VAR7 (STORESTRUCTLANE
, vst2_lane
,
17313 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17314 { VAR9 (LOADSTRUCT
,
17315 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17316 { VAR7 (LOADSTRUCTLANE
, vld3_lane
,
17317 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17318 { VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17319 { VAR9 (STORESTRUCT
, vst3
,
17320 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17321 { VAR7 (STORESTRUCTLANE
, vst3_lane
,
17322 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17323 { VAR9 (LOADSTRUCT
, vld4
,
17324 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17325 { VAR7 (LOADSTRUCTLANE
, vld4_lane
,
17326 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17327 { VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17328 { VAR9 (STORESTRUCT
, vst4
,
17329 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17330 { VAR7 (STORESTRUCTLANE
, vst4_lane
,
17331 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17332 { VAR10 (LOGICBINOP
, vand
,
17333 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17334 { VAR10 (LOGICBINOP
, vorr
,
17335 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17336 { VAR10 (BINOP
, veor
,
17337 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17338 { VAR10 (LOGICBINOP
, vbic
,
17339 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17340 { VAR10 (LOGICBINOP
, vorn
,
17341 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) }
17357 arm_init_neon_builtins (void)
17359 unsigned int i
, fcode
= ARM_BUILTIN_NEON_BASE
;
17361 tree neon_intQI_type_node
;
17362 tree neon_intHI_type_node
;
17363 tree neon_polyQI_type_node
;
17364 tree neon_polyHI_type_node
;
17365 tree neon_intSI_type_node
;
17366 tree neon_intDI_type_node
;
17367 tree neon_float_type_node
;
17369 tree intQI_pointer_node
;
17370 tree intHI_pointer_node
;
17371 tree intSI_pointer_node
;
17372 tree intDI_pointer_node
;
17373 tree float_pointer_node
;
17375 tree const_intQI_node
;
17376 tree const_intHI_node
;
17377 tree const_intSI_node
;
17378 tree const_intDI_node
;
17379 tree const_float_node
;
17381 tree const_intQI_pointer_node
;
17382 tree const_intHI_pointer_node
;
17383 tree const_intSI_pointer_node
;
17384 tree const_intDI_pointer_node
;
17385 tree const_float_pointer_node
;
17387 tree V8QI_type_node
;
17388 tree V4HI_type_node
;
17389 tree V2SI_type_node
;
17390 tree V2SF_type_node
;
17391 tree V16QI_type_node
;
17392 tree V8HI_type_node
;
17393 tree V4SI_type_node
;
17394 tree V4SF_type_node
;
17395 tree V2DI_type_node
;
17397 tree intUQI_type_node
;
17398 tree intUHI_type_node
;
17399 tree intUSI_type_node
;
17400 tree intUDI_type_node
;
17402 tree intEI_type_node
;
17403 tree intOI_type_node
;
17404 tree intCI_type_node
;
17405 tree intXI_type_node
;
17407 tree V8QI_pointer_node
;
17408 tree V4HI_pointer_node
;
17409 tree V2SI_pointer_node
;
17410 tree V2SF_pointer_node
;
17411 tree V16QI_pointer_node
;
17412 tree V8HI_pointer_node
;
17413 tree V4SI_pointer_node
;
17414 tree V4SF_pointer_node
;
17415 tree V2DI_pointer_node
;
17417 tree void_ftype_pv8qi_v8qi_v8qi
;
17418 tree void_ftype_pv4hi_v4hi_v4hi
;
17419 tree void_ftype_pv2si_v2si_v2si
;
17420 tree void_ftype_pv2sf_v2sf_v2sf
;
17421 tree void_ftype_pdi_di_di
;
17422 tree void_ftype_pv16qi_v16qi_v16qi
;
17423 tree void_ftype_pv8hi_v8hi_v8hi
;
17424 tree void_ftype_pv4si_v4si_v4si
;
17425 tree void_ftype_pv4sf_v4sf_v4sf
;
17426 tree void_ftype_pv2di_v2di_v2di
;
17428 tree reinterp_ftype_dreg
[5][5];
17429 tree reinterp_ftype_qreg
[5][5];
17430 tree dreg_types
[5], qreg_types
[5];
17432 /* Create distinguished type nodes for NEON vector element types,
17433 and pointers to values of such types, so we can detect them later. */
17434 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
17435 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
17436 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
17437 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
17438 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
17439 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
17440 neon_float_type_node
= make_node (REAL_TYPE
);
17441 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
17442 layout_type (neon_float_type_node
);
17444 /* Define typedefs which exactly correspond to the modes we are basing vector
17445 types on. If you change these names you'll need to change
17446 the table used by arm_mangle_type too. */
17447 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
17448 "__builtin_neon_qi");
17449 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
17450 "__builtin_neon_hi");
17451 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
17452 "__builtin_neon_si");
17453 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
17454 "__builtin_neon_sf");
17455 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
17456 "__builtin_neon_di");
17457 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
17458 "__builtin_neon_poly8");
17459 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
17460 "__builtin_neon_poly16");
17462 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
17463 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
17464 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
17465 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
17466 float_pointer_node
= build_pointer_type (neon_float_type_node
);
17468 /* Next create constant-qualified versions of the above types. */
17469 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
17471 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
17473 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
17475 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
17477 const_float_node
= build_qualified_type (neon_float_type_node
,
17480 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
17481 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
17482 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
17483 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
17484 const_float_pointer_node
= build_pointer_type (const_float_node
);
17486 /* Now create vector types based on our NEON element types. */
17487 /* 64-bit vectors. */
17489 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
17491 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
17493 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
17495 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
17496 /* 128-bit vectors. */
17498 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
17500 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
17502 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
17504 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
17506 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
17508 /* Unsigned integer types for various mode sizes. */
17509 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
17510 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
17511 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
17512 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
17514 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
17515 "__builtin_neon_uqi");
17516 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
17517 "__builtin_neon_uhi");
17518 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
17519 "__builtin_neon_usi");
17520 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
17521 "__builtin_neon_udi");
17523 /* Opaque integer types for structures of vectors. */
17524 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
17525 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
17526 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
17527 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
17529 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
17530 "__builtin_neon_ti");
17531 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
17532 "__builtin_neon_ei");
17533 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
17534 "__builtin_neon_oi");
17535 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
17536 "__builtin_neon_ci");
17537 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
17538 "__builtin_neon_xi");
17540 /* Pointers to vector types. */
17541 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
17542 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
17543 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
17544 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
17545 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
17546 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
17547 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
17548 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
17549 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
17551 /* Operations which return results as pairs. */
17552 void_ftype_pv8qi_v8qi_v8qi
=
17553 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
17554 V8QI_type_node
, NULL
);
17555 void_ftype_pv4hi_v4hi_v4hi
=
17556 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
17557 V4HI_type_node
, NULL
);
17558 void_ftype_pv2si_v2si_v2si
=
17559 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
17560 V2SI_type_node
, NULL
);
17561 void_ftype_pv2sf_v2sf_v2sf
=
17562 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
17563 V2SF_type_node
, NULL
);
17564 void_ftype_pdi_di_di
=
17565 build_function_type_list (void_type_node
, intDI_pointer_node
,
17566 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
17567 void_ftype_pv16qi_v16qi_v16qi
=
17568 build_function_type_list (void_type_node
, V16QI_pointer_node
,
17569 V16QI_type_node
, V16QI_type_node
, NULL
);
17570 void_ftype_pv8hi_v8hi_v8hi
=
17571 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
17572 V8HI_type_node
, NULL
);
17573 void_ftype_pv4si_v4si_v4si
=
17574 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
17575 V4SI_type_node
, NULL
);
17576 void_ftype_pv4sf_v4sf_v4sf
=
17577 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
17578 V4SF_type_node
, NULL
);
17579 void_ftype_pv2di_v2di_v2di
=
17580 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
17581 V2DI_type_node
, NULL
);
17583 dreg_types
[0] = V8QI_type_node
;
17584 dreg_types
[1] = V4HI_type_node
;
17585 dreg_types
[2] = V2SI_type_node
;
17586 dreg_types
[3] = V2SF_type_node
;
17587 dreg_types
[4] = neon_intDI_type_node
;
17589 qreg_types
[0] = V16QI_type_node
;
17590 qreg_types
[1] = V8HI_type_node
;
17591 qreg_types
[2] = V4SI_type_node
;
17592 qreg_types
[3] = V4SF_type_node
;
17593 qreg_types
[4] = V2DI_type_node
;
17595 for (i
= 0; i
< 5; i
++)
17598 for (j
= 0; j
< 5; j
++)
17600 reinterp_ftype_dreg
[i
][j
]
17601 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
17602 reinterp_ftype_qreg
[i
][j
]
17603 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
17607 for (i
= 0; i
< ARRAY_SIZE (neon_builtin_data
); i
++)
17609 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
17610 unsigned int j
, codeidx
= 0;
17612 d
->base_fcode
= fcode
;
17614 for (j
= 0; j
< T_MAX
; j
++)
17616 const char* const modenames
[] = {
17617 "v8qi", "v4hi", "v2si", "v2sf", "di",
17618 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17622 enum insn_code icode
;
17623 int is_load
= 0, is_store
= 0;
17625 if ((d
->bits
& (1 << j
)) == 0)
17628 icode
= d
->codes
[codeidx
++];
17633 case NEON_LOAD1LANE
:
17634 case NEON_LOADSTRUCT
:
17635 case NEON_LOADSTRUCTLANE
:
17637 /* Fall through. */
17639 case NEON_STORE1LANE
:
17640 case NEON_STORESTRUCT
:
17641 case NEON_STORESTRUCTLANE
:
17644 /* Fall through. */
17647 case NEON_LOGICBINOP
:
17648 case NEON_SHIFTINSERT
:
17655 case NEON_SHIFTIMM
:
17656 case NEON_SHIFTACC
:
17662 case NEON_LANEMULL
:
17663 case NEON_LANEMULH
:
17665 case NEON_SCALARMUL
:
17666 case NEON_SCALARMULL
:
17667 case NEON_SCALARMULH
:
17668 case NEON_SCALARMAC
:
17674 tree return_type
= void_type_node
, args
= void_list_node
;
17676 /* Build a function type directly from the insn_data for this
17677 builtin. The build_function_type() function takes care of
17678 removing duplicates for us. */
17679 for (k
= insn_data
[icode
].n_operands
- 1; k
>= 0; k
--)
17683 if (is_load
&& k
== 1)
17685 /* Neon load patterns always have the memory operand
17686 (a SImode pointer) in the operand 1 position. We
17687 want a const pointer to the element type in that
17689 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
17695 eltype
= const_intQI_pointer_node
;
17700 eltype
= const_intHI_pointer_node
;
17705 eltype
= const_intSI_pointer_node
;
17710 eltype
= const_float_pointer_node
;
17715 eltype
= const_intDI_pointer_node
;
17718 default: gcc_unreachable ();
17721 else if (is_store
&& k
== 0)
17723 /* Similarly, Neon store patterns use operand 0 as
17724 the memory location to store to (a SImode pointer).
17725 Use a pointer to the element type of the store in
17727 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
17733 eltype
= intQI_pointer_node
;
17738 eltype
= intHI_pointer_node
;
17743 eltype
= intSI_pointer_node
;
17748 eltype
= float_pointer_node
;
17753 eltype
= intDI_pointer_node
;
17756 default: gcc_unreachable ();
17761 switch (insn_data
[icode
].operand
[k
].mode
)
17763 case VOIDmode
: eltype
= void_type_node
; break;
17765 case QImode
: eltype
= neon_intQI_type_node
; break;
17766 case HImode
: eltype
= neon_intHI_type_node
; break;
17767 case SImode
: eltype
= neon_intSI_type_node
; break;
17768 case SFmode
: eltype
= neon_float_type_node
; break;
17769 case DImode
: eltype
= neon_intDI_type_node
; break;
17770 case TImode
: eltype
= intTI_type_node
; break;
17771 case EImode
: eltype
= intEI_type_node
; break;
17772 case OImode
: eltype
= intOI_type_node
; break;
17773 case CImode
: eltype
= intCI_type_node
; break;
17774 case XImode
: eltype
= intXI_type_node
; break;
17775 /* 64-bit vectors. */
17776 case V8QImode
: eltype
= V8QI_type_node
; break;
17777 case V4HImode
: eltype
= V4HI_type_node
; break;
17778 case V2SImode
: eltype
= V2SI_type_node
; break;
17779 case V2SFmode
: eltype
= V2SF_type_node
; break;
17780 /* 128-bit vectors. */
17781 case V16QImode
: eltype
= V16QI_type_node
; break;
17782 case V8HImode
: eltype
= V8HI_type_node
; break;
17783 case V4SImode
: eltype
= V4SI_type_node
; break;
17784 case V4SFmode
: eltype
= V4SF_type_node
; break;
17785 case V2DImode
: eltype
= V2DI_type_node
; break;
17786 default: gcc_unreachable ();
17790 if (k
== 0 && !is_store
)
17791 return_type
= eltype
;
17793 args
= tree_cons (NULL_TREE
, eltype
, args
);
17796 ftype
= build_function_type (return_type
, args
);
17800 case NEON_RESULTPAIR
:
17802 switch (insn_data
[icode
].operand
[1].mode
)
17804 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
17805 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
17806 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
17807 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
17808 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
17809 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
17810 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
17811 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
17812 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
17813 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
17814 default: gcc_unreachable ();
17819 case NEON_REINTERP
:
17821 /* We iterate over 5 doubleword types, then 5 quadword
17824 switch (insn_data
[icode
].operand
[0].mode
)
17826 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
17827 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
17828 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
17829 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
17830 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
17831 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
17832 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
17833 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
17834 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
17835 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
17836 default: gcc_unreachable ();
17842 gcc_unreachable ();
17845 gcc_assert (ftype
!= NULL
);
17847 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[j
]);
17849 add_builtin_function (namebuf
, ftype
, fcode
++, BUILT_IN_MD
, NULL
,
17856 arm_init_fp16_builtins (void)
17858 tree fp16_type
= make_node (REAL_TYPE
);
17859 TYPE_PRECISION (fp16_type
) = 16;
17860 layout_type (fp16_type
);
17861 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
17865 arm_init_builtins (void)
17867 arm_init_tls_builtins ();
17869 if (TARGET_REALLY_IWMMXT
)
17870 arm_init_iwmmxt_builtins ();
17873 arm_init_neon_builtins ();
17875 if (arm_fp16_format
)
17876 arm_init_fp16_builtins ();
17879 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17881 static const char *
17882 arm_invalid_parameter_type (const_tree t
)
17884 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
17885 return N_("function parameters cannot have __fp16 type");
17889 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17891 static const char *
17892 arm_invalid_return_type (const_tree t
)
17894 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
17895 return N_("functions cannot return __fp16 type");
17899 /* Implement TARGET_PROMOTED_TYPE. */
17902 arm_promoted_type (const_tree t
)
17904 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
17905 return float_type_node
;
17909 /* Implement TARGET_CONVERT_TO_TYPE.
17910 Specifically, this hook implements the peculiarity of the ARM
17911 half-precision floating-point C semantics that requires conversions between
17912 __fp16 to or from double to do an intermediate conversion to float. */
17915 arm_convert_to_type (tree type
, tree expr
)
17917 tree fromtype
= TREE_TYPE (expr
);
17918 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
17920 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
17921 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
17922 return convert (type
, convert (float_type_node
, expr
));
17926 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17927 This simply adds HFmode as a supported mode; even though we don't
17928 implement arithmetic on this type directly, it's supported by
17929 optabs conversions, much the way the double-word arithmetic is
17930 special-cased in the default hook. */
17933 arm_scalar_mode_supported_p (enum machine_mode mode
)
17935 if (mode
== HFmode
)
17936 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
17938 return default_scalar_mode_supported_p (mode
);
17941 /* Errors in the source file can cause expand_expr to return const0_rtx
17942 where we expect a vector. To avoid crashing, use one of the vector
17943 clear instructions. */
17946 safe_vector_operand (rtx x
, enum machine_mode mode
)
17948 if (x
!= const0_rtx
)
17950 x
= gen_reg_rtx (mode
);
17952 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
17953 : gen_rtx_SUBREG (DImode
, x
, 0)));
17957 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17960 arm_expand_binop_builtin (enum insn_code icode
,
17961 tree exp
, rtx target
)
17964 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17965 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17966 rtx op0
= expand_normal (arg0
);
17967 rtx op1
= expand_normal (arg1
);
17968 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17969 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17970 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17972 if (VECTOR_MODE_P (mode0
))
17973 op0
= safe_vector_operand (op0
, mode0
);
17974 if (VECTOR_MODE_P (mode1
))
17975 op1
= safe_vector_operand (op1
, mode1
);
17978 || GET_MODE (target
) != tmode
17979 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17980 target
= gen_reg_rtx (tmode
);
17982 gcc_assert (GET_MODE (op0
) == mode0
&& GET_MODE (op1
) == mode1
);
17984 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17985 op0
= copy_to_mode_reg (mode0
, op0
);
17986 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17987 op1
= copy_to_mode_reg (mode1
, op1
);
17989 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17996 /* Subroutine of arm_expand_builtin to take care of unop insns. */
17999 arm_expand_unop_builtin (enum insn_code icode
,
18000 tree exp
, rtx target
, int do_load
)
18003 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18004 rtx op0
= expand_normal (arg0
);
18005 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18006 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18009 || GET_MODE (target
) != tmode
18010 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18011 target
= gen_reg_rtx (tmode
);
18013 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18016 if (VECTOR_MODE_P (mode0
))
18017 op0
= safe_vector_operand (op0
, mode0
);
18019 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18020 op0
= copy_to_mode_reg (mode0
, op0
);
18023 pat
= GEN_FCN (icode
) (target
, op0
);
18031 neon_builtin_compare (const void *a
, const void *b
)
18033 const neon_builtin_datum
*const key
= (const neon_builtin_datum
*) a
;
18034 const neon_builtin_datum
*const memb
= (const neon_builtin_datum
*) b
;
18035 unsigned int soughtcode
= key
->base_fcode
;
18037 if (soughtcode
>= memb
->base_fcode
18038 && soughtcode
< memb
->base_fcode
+ memb
->num_vars
)
18040 else if (soughtcode
< memb
->base_fcode
)
18046 static enum insn_code
18047 locate_neon_builtin_icode (int fcode
, neon_itype
*itype
)
18049 neon_builtin_datum key
, *found
;
18052 key
.base_fcode
= fcode
;
18053 found
= (neon_builtin_datum
*)
18054 bsearch (&key
, &neon_builtin_data
[0], ARRAY_SIZE (neon_builtin_data
),
18055 sizeof (neon_builtin_data
[0]), neon_builtin_compare
);
18056 gcc_assert (found
);
18057 idx
= fcode
- (int) found
->base_fcode
;
18058 gcc_assert (idx
>= 0 && idx
< T_MAX
&& idx
< (int)found
->num_vars
);
18061 *itype
= found
->itype
;
18063 return found
->codes
[idx
];
18067 NEON_ARG_COPY_TO_REG
,
18072 #define NEON_MAX_BUILTIN_ARGS 5
18074 /* Expand a Neon builtin. */
18076 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
18081 tree arg
[NEON_MAX_BUILTIN_ARGS
];
18082 rtx op
[NEON_MAX_BUILTIN_ARGS
];
18083 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18084 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
18089 || GET_MODE (target
) != tmode
18090 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
18091 target
= gen_reg_rtx (tmode
);
18093 va_start (ap
, exp
);
18097 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
18099 if (thisarg
== NEON_ARG_STOP
)
18103 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
18104 op
[argc
] = expand_normal (arg
[argc
]);
18105 mode
[argc
] = insn_data
[icode
].operand
[argc
+ have_retval
].mode
;
18109 case NEON_ARG_COPY_TO_REG
:
18110 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18111 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
18112 (op
[argc
], mode
[argc
]))
18113 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
18116 case NEON_ARG_CONSTANT
:
18117 /* FIXME: This error message is somewhat unhelpful. */
18118 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
18119 (op
[argc
], mode
[argc
]))
18120 error ("argument must be a constant");
18123 case NEON_ARG_STOP
:
18124 gcc_unreachable ();
18137 pat
= GEN_FCN (icode
) (target
, op
[0]);
18141 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
18145 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
18149 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
18153 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
18157 gcc_unreachable ();
18163 pat
= GEN_FCN (icode
) (op
[0]);
18167 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
18171 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
18175 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
18179 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
18183 gcc_unreachable ();
18194 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18195 constants defined per-instruction or per instruction-variant. Instead, the
18196 required info is looked up in the table neon_builtin_data. */
18198 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
18201 enum insn_code icode
= locate_neon_builtin_icode (fcode
, &itype
);
18208 return arm_expand_neon_args (target
, icode
, 1, exp
,
18209 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18213 case NEON_SCALARMUL
:
18214 case NEON_SCALARMULL
:
18215 case NEON_SCALARMULH
:
18216 case NEON_SHIFTINSERT
:
18217 case NEON_LOGICBINOP
:
18218 return arm_expand_neon_args (target
, icode
, 1, exp
,
18219 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18223 return arm_expand_neon_args (target
, icode
, 1, exp
,
18224 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18225 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18229 case NEON_SHIFTIMM
:
18230 return arm_expand_neon_args (target
, icode
, 1, exp
,
18231 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
18235 return arm_expand_neon_args (target
, icode
, 1, exp
,
18236 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18240 case NEON_REINTERP
:
18241 return arm_expand_neon_args (target
, icode
, 1, exp
,
18242 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18246 return arm_expand_neon_args (target
, icode
, 1, exp
,
18247 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18249 case NEON_RESULTPAIR
:
18250 return arm_expand_neon_args (target
, icode
, 0, exp
,
18251 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18255 case NEON_LANEMULL
:
18256 case NEON_LANEMULH
:
18257 return arm_expand_neon_args (target
, icode
, 1, exp
,
18258 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18259 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18262 return arm_expand_neon_args (target
, icode
, 1, exp
,
18263 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18264 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18266 case NEON_SHIFTACC
:
18267 return arm_expand_neon_args (target
, icode
, 1, exp
,
18268 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18269 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18271 case NEON_SCALARMAC
:
18272 return arm_expand_neon_args (target
, icode
, 1, exp
,
18273 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18274 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18278 return arm_expand_neon_args (target
, icode
, 1, exp
,
18279 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18283 case NEON_LOADSTRUCT
:
18284 return arm_expand_neon_args (target
, icode
, 1, exp
,
18285 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18287 case NEON_LOAD1LANE
:
18288 case NEON_LOADSTRUCTLANE
:
18289 return arm_expand_neon_args (target
, icode
, 1, exp
,
18290 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18294 case NEON_STORESTRUCT
:
18295 return arm_expand_neon_args (target
, icode
, 0, exp
,
18296 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18298 case NEON_STORE1LANE
:
18299 case NEON_STORESTRUCTLANE
:
18300 return arm_expand_neon_args (target
, icode
, 0, exp
,
18301 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18305 gcc_unreachable ();
18308 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18310 neon_reinterpret (rtx dest
, rtx src
)
18312 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
18315 /* Emit code to place a Neon pair result in memory locations (with equal
18318 neon_emit_pair_result_insn (enum machine_mode mode
,
18319 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
18322 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
18323 rtx tmp1
= gen_reg_rtx (mode
);
18324 rtx tmp2
= gen_reg_rtx (mode
);
18326 emit_insn (intfn (tmp1
, op1
, tmp2
, op2
));
18328 emit_move_insn (mem
, tmp1
);
18329 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
18330 emit_move_insn (mem
, tmp2
);
18333 /* Set up operands for a register copy from src to dest, taking care not to
18334 clobber registers in the process.
18335 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18336 be called with a large N, so that should be OK. */
18339 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
18341 unsigned int copied
= 0, opctr
= 0;
18342 unsigned int done
= (1 << count
) - 1;
18345 while (copied
!= done
)
18347 for (i
= 0; i
< count
; i
++)
18351 for (j
= 0; good
&& j
< count
; j
++)
18352 if (i
!= j
&& (copied
& (1 << j
)) == 0
18353 && reg_overlap_mentioned_p (src
[j
], dest
[i
]))
18358 operands
[opctr
++] = dest
[i
];
18359 operands
[opctr
++] = src
[i
];
18365 gcc_assert (opctr
== count
* 2);
18368 /* Expand an expression EXP that calls a built-in function,
18369 with result going to TARGET if that's convenient
18370 (and in mode MODE if that's convenient).
18371 SUBTARGET may be used as the target for computing one of EXP's operands.
18372 IGNORE is nonzero if the value is to be ignored. */
18375 arm_expand_builtin (tree exp
,
18377 rtx subtarget ATTRIBUTE_UNUSED
,
18378 enum machine_mode mode ATTRIBUTE_UNUSED
,
18379 int ignore ATTRIBUTE_UNUSED
)
18381 const struct builtin_description
* d
;
18382 enum insn_code icode
;
18383 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
18391 int fcode
= DECL_FUNCTION_CODE (fndecl
);
18393 enum machine_mode tmode
;
18394 enum machine_mode mode0
;
18395 enum machine_mode mode1
;
18396 enum machine_mode mode2
;
18398 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
18399 return arm_expand_neon_builtin (fcode
, exp
, target
);
18403 case ARM_BUILTIN_TEXTRMSB
:
18404 case ARM_BUILTIN_TEXTRMUB
:
18405 case ARM_BUILTIN_TEXTRMSH
:
18406 case ARM_BUILTIN_TEXTRMUH
:
18407 case ARM_BUILTIN_TEXTRMSW
:
18408 case ARM_BUILTIN_TEXTRMUW
:
18409 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
18410 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
18411 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
18412 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
18413 : CODE_FOR_iwmmxt_textrmw
);
18415 arg0
= CALL_EXPR_ARG (exp
, 0);
18416 arg1
= CALL_EXPR_ARG (exp
, 1);
18417 op0
= expand_normal (arg0
);
18418 op1
= expand_normal (arg1
);
18419 tmode
= insn_data
[icode
].operand
[0].mode
;
18420 mode0
= insn_data
[icode
].operand
[1].mode
;
18421 mode1
= insn_data
[icode
].operand
[2].mode
;
18423 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18424 op0
= copy_to_mode_reg (mode0
, op0
);
18425 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18427 /* @@@ better error message */
18428 error ("selector must be an immediate");
18429 return gen_reg_rtx (tmode
);
18432 || GET_MODE (target
) != tmode
18433 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18434 target
= gen_reg_rtx (tmode
);
18435 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18441 case ARM_BUILTIN_TINSRB
:
18442 case ARM_BUILTIN_TINSRH
:
18443 case ARM_BUILTIN_TINSRW
:
18444 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
18445 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
18446 : CODE_FOR_iwmmxt_tinsrw
);
18447 arg0
= CALL_EXPR_ARG (exp
, 0);
18448 arg1
= CALL_EXPR_ARG (exp
, 1);
18449 arg2
= CALL_EXPR_ARG (exp
, 2);
18450 op0
= expand_normal (arg0
);
18451 op1
= expand_normal (arg1
);
18452 op2
= expand_normal (arg2
);
18453 tmode
= insn_data
[icode
].operand
[0].mode
;
18454 mode0
= insn_data
[icode
].operand
[1].mode
;
18455 mode1
= insn_data
[icode
].operand
[2].mode
;
18456 mode2
= insn_data
[icode
].operand
[3].mode
;
18458 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18459 op0
= copy_to_mode_reg (mode0
, op0
);
18460 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18461 op1
= copy_to_mode_reg (mode1
, op1
);
18462 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18464 /* @@@ better error message */
18465 error ("selector must be an immediate");
18469 || GET_MODE (target
) != tmode
18470 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18471 target
= gen_reg_rtx (tmode
);
18472 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18478 case ARM_BUILTIN_SETWCX
:
18479 arg0
= CALL_EXPR_ARG (exp
, 0);
18480 arg1
= CALL_EXPR_ARG (exp
, 1);
18481 op0
= force_reg (SImode
, expand_normal (arg0
));
18482 op1
= expand_normal (arg1
);
18483 emit_insn (gen_iwmmxt_tmcr (op1
, op0
));
18486 case ARM_BUILTIN_GETWCX
:
18487 arg0
= CALL_EXPR_ARG (exp
, 0);
18488 op0
= expand_normal (arg0
);
18489 target
= gen_reg_rtx (SImode
);
18490 emit_insn (gen_iwmmxt_tmrc (target
, op0
));
18493 case ARM_BUILTIN_WSHUFH
:
18494 icode
= CODE_FOR_iwmmxt_wshufh
;
18495 arg0
= CALL_EXPR_ARG (exp
, 0);
18496 arg1
= CALL_EXPR_ARG (exp
, 1);
18497 op0
= expand_normal (arg0
);
18498 op1
= expand_normal (arg1
);
18499 tmode
= insn_data
[icode
].operand
[0].mode
;
18500 mode1
= insn_data
[icode
].operand
[1].mode
;
18501 mode2
= insn_data
[icode
].operand
[2].mode
;
18503 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18504 op0
= copy_to_mode_reg (mode1
, op0
);
18505 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18507 /* @@@ better error message */
18508 error ("mask must be an immediate");
18512 || GET_MODE (target
) != tmode
18513 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18514 target
= gen_reg_rtx (tmode
);
18515 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18521 case ARM_BUILTIN_WSADB
:
18522 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb
, exp
, target
);
18523 case ARM_BUILTIN_WSADH
:
18524 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh
, exp
, target
);
18525 case ARM_BUILTIN_WSADBZ
:
18526 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
18527 case ARM_BUILTIN_WSADHZ
:
18528 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
18530 /* Several three-argument builtins. */
18531 case ARM_BUILTIN_WMACS
:
18532 case ARM_BUILTIN_WMACU
:
18533 case ARM_BUILTIN_WALIGN
:
18534 case ARM_BUILTIN_TMIA
:
18535 case ARM_BUILTIN_TMIAPH
:
18536 case ARM_BUILTIN_TMIATT
:
18537 case ARM_BUILTIN_TMIATB
:
18538 case ARM_BUILTIN_TMIABT
:
18539 case ARM_BUILTIN_TMIABB
:
18540 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
18541 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
18542 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
18543 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
18544 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
18545 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
18546 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
18547 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
18548 : CODE_FOR_iwmmxt_walign
);
18549 arg0
= CALL_EXPR_ARG (exp
, 0);
18550 arg1
= CALL_EXPR_ARG (exp
, 1);
18551 arg2
= CALL_EXPR_ARG (exp
, 2);
18552 op0
= expand_normal (arg0
);
18553 op1
= expand_normal (arg1
);
18554 op2
= expand_normal (arg2
);
18555 tmode
= insn_data
[icode
].operand
[0].mode
;
18556 mode0
= insn_data
[icode
].operand
[1].mode
;
18557 mode1
= insn_data
[icode
].operand
[2].mode
;
18558 mode2
= insn_data
[icode
].operand
[3].mode
;
18560 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18561 op0
= copy_to_mode_reg (mode0
, op0
);
18562 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18563 op1
= copy_to_mode_reg (mode1
, op1
);
18564 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18565 op2
= copy_to_mode_reg (mode2
, op2
);
18567 || GET_MODE (target
) != tmode
18568 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18569 target
= gen_reg_rtx (tmode
);
18570 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18576 case ARM_BUILTIN_WZERO
:
18577 target
= gen_reg_rtx (DImode
);
18578 emit_insn (gen_iwmmxt_clrdi (target
));
18581 case ARM_BUILTIN_THREAD_POINTER
:
18582 return arm_load_tp (target
);
18588 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18589 if (d
->code
== (const enum arm_builtins
) fcode
)
18590 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
18592 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18593 if (d
->code
== (const enum arm_builtins
) fcode
)
18594 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18596 /* @@@ Should really do something sensible here. */
18600 /* Return the number (counting from 0) of
18601 the least significant set bit in MASK. */
18604 number_of_first_bit_set (unsigned mask
)
18609 (mask
& (1 << bit
)) == 0;
18616 /* Emit code to push or pop registers to or from the stack. F is the
18617 assembly file. MASK is the registers to push or pop. PUSH is
18618 nonzero if we should push, and zero if we should pop. For debugging
18619 output, if pushing, adjust CFA_OFFSET by the amount of space added
18620 to the stack. REAL_REGS should have the same number of bits set as
18621 MASK, and will be used instead (in the same order) to describe which
18622 registers were saved - this is used to mark the save slots when we
18623 push high registers after moving them to low registers. */
18625 thumb_pushpop (FILE *f
, unsigned long mask
, int push
, int *cfa_offset
,
18626 unsigned long real_regs
)
18629 int lo_mask
= mask
& 0xFF;
18630 int pushed_words
= 0;
18634 if (lo_mask
== 0 && !push
&& (mask
& (1 << PC_REGNUM
)))
18636 /* Special case. Do not generate a POP PC statement here, do it in
18638 thumb_exit (f
, -1);
18642 if (ARM_EABI_UNWIND_TABLES
&& push
)
18644 fprintf (f
, "\t.save\t{");
18645 for (regno
= 0; regno
< 15; regno
++)
18647 if (real_regs
& (1 << regno
))
18649 if (real_regs
& ((1 << regno
) -1))
18651 asm_fprintf (f
, "%r", regno
);
18654 fprintf (f
, "}\n");
18657 fprintf (f
, "\t%s\t{", push
? "push" : "pop");
18659 /* Look at the low registers first. */
18660 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
18664 asm_fprintf (f
, "%r", regno
);
18666 if ((lo_mask
& ~1) != 0)
18673 if (push
&& (mask
& (1 << LR_REGNUM
)))
18675 /* Catch pushing the LR. */
18679 asm_fprintf (f
, "%r", LR_REGNUM
);
18683 else if (!push
&& (mask
& (1 << PC_REGNUM
)))
18685 /* Catch popping the PC. */
18686 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
18687 || crtl
->calls_eh_return
)
18689 /* The PC is never poped directly, instead
18690 it is popped into r3 and then BX is used. */
18691 fprintf (f
, "}\n");
18693 thumb_exit (f
, -1);
18702 asm_fprintf (f
, "%r", PC_REGNUM
);
18706 fprintf (f
, "}\n");
18708 if (push
&& pushed_words
&& dwarf2out_do_frame ())
18710 char *l
= dwarf2out_cfi_label (false);
18711 int pushed_mask
= real_regs
;
18713 *cfa_offset
+= pushed_words
* 4;
18714 dwarf2out_def_cfa (l
, SP_REGNUM
, *cfa_offset
);
18717 pushed_mask
= real_regs
;
18718 for (regno
= 0; regno
<= 14; regno
++, pushed_mask
>>= 1)
18720 if (pushed_mask
& 1)
18721 dwarf2out_reg_save (l
, regno
, 4 * pushed_words
++ - *cfa_offset
);
18726 /* Generate code to return from a thumb function.
18727 If 'reg_containing_return_addr' is -1, then the return address is
18728 actually on the stack, at the stack pointer. */
18730 thumb_exit (FILE *f
, int reg_containing_return_addr
)
18732 unsigned regs_available_for_popping
;
18733 unsigned regs_to_pop
;
18735 unsigned available
;
18739 int restore_a4
= FALSE
;
18741 /* Compute the registers we need to pop. */
18745 if (reg_containing_return_addr
== -1)
18747 regs_to_pop
|= 1 << LR_REGNUM
;
18751 if (TARGET_BACKTRACE
)
18753 /* Restore the (ARM) frame pointer and stack pointer. */
18754 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
18758 /* If there is nothing to pop then just emit the BX instruction and
18760 if (pops_needed
== 0)
18762 if (crtl
->calls_eh_return
)
18763 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
18765 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
18768 /* Otherwise if we are not supporting interworking and we have not created
18769 a backtrace structure and the function was not entered in ARM mode then
18770 just pop the return address straight into the PC. */
18771 else if (!TARGET_INTERWORK
18772 && !TARGET_BACKTRACE
18773 && !is_called_in_ARM_mode (current_function_decl
)
18774 && !crtl
->calls_eh_return
)
18776 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
18780 /* Find out how many of the (return) argument registers we can corrupt. */
18781 regs_available_for_popping
= 0;
18783 /* If returning via __builtin_eh_return, the bottom three registers
18784 all contain information needed for the return. */
18785 if (crtl
->calls_eh_return
)
18789 /* If we can deduce the registers used from the function's
18790 return value. This is more reliable that examining
18791 df_regs_ever_live_p () because that will be set if the register is
18792 ever used in the function, not just if the register is used
18793 to hold a return value. */
18795 if (crtl
->return_rtx
!= 0)
18796 mode
= GET_MODE (crtl
->return_rtx
);
18798 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
18800 size
= GET_MODE_SIZE (mode
);
18804 /* In a void function we can use any argument register.
18805 In a function that returns a structure on the stack
18806 we can use the second and third argument registers. */
18807 if (mode
== VOIDmode
)
18808 regs_available_for_popping
=
18809 (1 << ARG_REGISTER (1))
18810 | (1 << ARG_REGISTER (2))
18811 | (1 << ARG_REGISTER (3));
18813 regs_available_for_popping
=
18814 (1 << ARG_REGISTER (2))
18815 | (1 << ARG_REGISTER (3));
18817 else if (size
<= 4)
18818 regs_available_for_popping
=
18819 (1 << ARG_REGISTER (2))
18820 | (1 << ARG_REGISTER (3));
18821 else if (size
<= 8)
18822 regs_available_for_popping
=
18823 (1 << ARG_REGISTER (3));
18826 /* Match registers to be popped with registers into which we pop them. */
18827 for (available
= regs_available_for_popping
,
18828 required
= regs_to_pop
;
18829 required
!= 0 && available
!= 0;
18830 available
&= ~(available
& - available
),
18831 required
&= ~(required
& - required
))
18834 /* If we have any popping registers left over, remove them. */
18836 regs_available_for_popping
&= ~available
;
18838 /* Otherwise if we need another popping register we can use
18839 the fourth argument register. */
18840 else if (pops_needed
)
18842 /* If we have not found any free argument registers and
18843 reg a4 contains the return address, we must move it. */
18844 if (regs_available_for_popping
== 0
18845 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
18847 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
18848 reg_containing_return_addr
= LR_REGNUM
;
18850 else if (size
> 12)
18852 /* Register a4 is being used to hold part of the return value,
18853 but we have dire need of a free, low register. */
18856 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
18859 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
18861 /* The fourth argument register is available. */
18862 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
18868 /* Pop as many registers as we can. */
18869 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
18870 regs_available_for_popping
);
18872 /* Process the registers we popped. */
18873 if (reg_containing_return_addr
== -1)
18875 /* The return address was popped into the lowest numbered register. */
18876 regs_to_pop
&= ~(1 << LR_REGNUM
);
18878 reg_containing_return_addr
=
18879 number_of_first_bit_set (regs_available_for_popping
);
18881 /* Remove this register for the mask of available registers, so that
18882 the return address will not be corrupted by further pops. */
18883 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
18886 /* If we popped other registers then handle them here. */
18887 if (regs_available_for_popping
)
18891 /* Work out which register currently contains the frame pointer. */
18892 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
18894 /* Move it into the correct place. */
18895 asm_fprintf (f
, "\tmov\t%r, %r\n",
18896 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
18898 /* (Temporarily) remove it from the mask of popped registers. */
18899 regs_available_for_popping
&= ~(1 << frame_pointer
);
18900 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
18902 if (regs_available_for_popping
)
18906 /* We popped the stack pointer as well,
18907 find the register that contains it. */
18908 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
18910 /* Move it into the stack register. */
18911 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
18913 /* At this point we have popped all necessary registers, so
18914 do not worry about restoring regs_available_for_popping
18915 to its correct value:
18917 assert (pops_needed == 0)
18918 assert (regs_available_for_popping == (1 << frame_pointer))
18919 assert (regs_to_pop == (1 << STACK_POINTER)) */
18923 /* Since we have just move the popped value into the frame
18924 pointer, the popping register is available for reuse, and
18925 we know that we still have the stack pointer left to pop. */
18926 regs_available_for_popping
|= (1 << frame_pointer
);
18930 /* If we still have registers left on the stack, but we no longer have
18931 any registers into which we can pop them, then we must move the return
18932 address into the link register and make available the register that
18934 if (regs_available_for_popping
== 0 && pops_needed
> 0)
18936 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
18938 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
18939 reg_containing_return_addr
);
18941 reg_containing_return_addr
= LR_REGNUM
;
18944 /* If we have registers left on the stack then pop some more.
18945 We know that at most we will want to pop FP and SP. */
18946 if (pops_needed
> 0)
18951 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
18952 regs_available_for_popping
);
18954 /* We have popped either FP or SP.
18955 Move whichever one it is into the correct register. */
18956 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
18957 move_to
= number_of_first_bit_set (regs_to_pop
);
18959 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
18961 regs_to_pop
&= ~(1 << move_to
);
18966 /* If we still have not popped everything then we must have only
18967 had one register available to us and we are now popping the SP. */
18968 if (pops_needed
> 0)
18972 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
18973 regs_available_for_popping
);
18975 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
18977 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
18979 assert (regs_to_pop == (1 << STACK_POINTER))
18980 assert (pops_needed == 1)
18984 /* If necessary restore the a4 register. */
18987 if (reg_containing_return_addr
!= LR_REGNUM
)
18989 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
18990 reg_containing_return_addr
= LR_REGNUM
;
18993 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
18996 if (crtl
->calls_eh_return
)
18997 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
18999 /* Return to caller. */
19000 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
19005 thumb1_final_prescan_insn (rtx insn
)
19007 if (flag_print_asm_name
)
19008 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
19009 INSN_ADDRESSES (INSN_UID (insn
)));
19013 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
19015 unsigned HOST_WIDE_INT mask
= 0xff;
19018 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
19019 if (val
== 0) /* XXX */
19022 for (i
= 0; i
< 25; i
++)
19023 if ((val
& (mask
<< i
)) == val
)
19029 /* Returns nonzero if the current function contains,
19030 or might contain a far jump. */
19032 thumb_far_jump_used_p (void)
19036 /* This test is only important for leaf functions. */
19037 /* assert (!leaf_function_p ()); */
19039 /* If we have already decided that far jumps may be used,
19040 do not bother checking again, and always return true even if
19041 it turns out that they are not being used. Once we have made
19042 the decision that far jumps are present (and that hence the link
19043 register will be pushed onto the stack) we cannot go back on it. */
19044 if (cfun
->machine
->far_jump_used
)
19047 /* If this function is not being called from the prologue/epilogue
19048 generation code then it must be being called from the
19049 INITIAL_ELIMINATION_OFFSET macro. */
19050 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
19052 /* In this case we know that we are being asked about the elimination
19053 of the arg pointer register. If that register is not being used,
19054 then there are no arguments on the stack, and we do not have to
19055 worry that a far jump might force the prologue to push the link
19056 register, changing the stack offsets. In this case we can just
19057 return false, since the presence of far jumps in the function will
19058 not affect stack offsets.
19060 If the arg pointer is live (or if it was live, but has now been
19061 eliminated and so set to dead) then we do have to test to see if
19062 the function might contain a far jump. This test can lead to some
19063 false negatives, since before reload is completed, then length of
19064 branch instructions is not known, so gcc defaults to returning their
19065 longest length, which in turn sets the far jump attribute to true.
19067 A false negative will not result in bad code being generated, but it
19068 will result in a needless push and pop of the link register. We
19069 hope that this does not occur too often.
19071 If we need doubleword stack alignment this could affect the other
19072 elimination offsets so we can't risk getting it wrong. */
19073 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
19074 cfun
->machine
->arg_pointer_live
= 1;
19075 else if (!cfun
->machine
->arg_pointer_live
)
19079 /* Check to see if the function contains a branch
19080 insn with the far jump attribute set. */
19081 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19083 if (GET_CODE (insn
) == JUMP_INSN
19084 /* Ignore tablejump patterns. */
19085 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19086 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
19087 && get_attr_far_jump (insn
) == FAR_JUMP_YES
19090 /* Record the fact that we have decided that
19091 the function does use far jumps. */
19092 cfun
->machine
->far_jump_used
= 1;
19100 /* Return nonzero if FUNC must be entered in ARM mode. */
19102 is_called_in_ARM_mode (tree func
)
19104 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
19106 /* Ignore the problem about functions whose address is taken. */
19107 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
19111 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
19117 /* The bits which aren't usefully expanded as rtl. */
19119 thumb_unexpanded_epilogue (void)
19121 arm_stack_offsets
*offsets
;
19123 unsigned long live_regs_mask
= 0;
19124 int high_regs_pushed
= 0;
19125 int had_to_push_lr
;
19128 if (cfun
->machine
->return_used_this_function
!= 0)
19131 if (IS_NAKED (arm_current_func_type ()))
19134 offsets
= arm_get_frame_offsets ();
19135 live_regs_mask
= offsets
->saved_regs_mask
;
19136 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
19138 /* If we can deduce the registers used from the function's return value.
19139 This is more reliable that examining df_regs_ever_live_p () because that
19140 will be set if the register is ever used in the function, not just if
19141 the register is used to hold a return value. */
19142 size
= arm_size_return_regs ();
19144 /* The prolog may have pushed some high registers to use as
19145 work registers. e.g. the testsuite file:
19146 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19147 compiles to produce:
19148 push {r4, r5, r6, r7, lr}
19152 as part of the prolog. We have to undo that pushing here. */
19154 if (high_regs_pushed
)
19156 unsigned long mask
= live_regs_mask
& 0xff;
19159 /* The available low registers depend on the size of the value we are
19167 /* Oh dear! We have no low registers into which we can pop
19170 ("no low registers available for popping high registers");
19172 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
19173 if (live_regs_mask
& (1 << next_hi_reg
))
19176 while (high_regs_pushed
)
19178 /* Find lo register(s) into which the high register(s) can
19180 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
19182 if (mask
& (1 << regno
))
19183 high_regs_pushed
--;
19184 if (high_regs_pushed
== 0)
19188 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
19190 /* Pop the values into the low register(s). */
19191 thumb_pushpop (asm_out_file
, mask
, 0, NULL
, mask
);
19193 /* Move the value(s) into the high registers. */
19194 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
19196 if (mask
& (1 << regno
))
19198 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
19201 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
19202 if (live_regs_mask
& (1 << next_hi_reg
))
19207 live_regs_mask
&= ~0x0f00;
19210 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
19211 live_regs_mask
&= 0xff;
19213 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
19215 /* Pop the return address into the PC. */
19216 if (had_to_push_lr
)
19217 live_regs_mask
|= 1 << PC_REGNUM
;
19219 /* Either no argument registers were pushed or a backtrace
19220 structure was created which includes an adjusted stack
19221 pointer, so just pop everything. */
19222 if (live_regs_mask
)
19223 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
19226 /* We have either just popped the return address into the
19227 PC or it is was kept in LR for the entire function. */
19228 if (!had_to_push_lr
)
19229 thumb_exit (asm_out_file
, LR_REGNUM
);
19233 /* Pop everything but the return address. */
19234 if (live_regs_mask
)
19235 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
19238 if (had_to_push_lr
)
19242 /* We have no free low regs, so save one. */
19243 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
19247 /* Get the return address into a temporary register. */
19248 thumb_pushpop (asm_out_file
, 1 << LAST_ARG_REGNUM
, 0, NULL
,
19249 1 << LAST_ARG_REGNUM
);
19253 /* Move the return address to lr. */
19254 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
19256 /* Restore the low register. */
19257 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
19262 regno
= LAST_ARG_REGNUM
;
19267 /* Remove the argument registers that were pushed onto the stack. */
19268 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
19269 SP_REGNUM
, SP_REGNUM
,
19270 crtl
->args
.pretend_args_size
);
19272 thumb_exit (asm_out_file
, regno
);
19278 /* Functions to save and restore machine-specific function data. */
19279 static struct machine_function
*
19280 arm_init_machine_status (void)
19282 struct machine_function
*machine
;
19283 machine
= (machine_function
*) ggc_alloc_cleared (sizeof (machine_function
));
19285 #if ARM_FT_UNKNOWN != 0
19286 machine
->func_type
= ARM_FT_UNKNOWN
;
19291 /* Return an RTX indicating where the return address to the
19292 calling function can be found. */
19294 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
19299 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
19302 /* Do anything needed before RTL is emitted for each function. */
19304 arm_init_expanders (void)
19306 /* Arrange to initialize and mark the machine per-function status. */
19307 init_machine_status
= arm_init_machine_status
;
19309 /* This is to stop the combine pass optimizing away the alignment
19310 adjustment of va_arg. */
19311 /* ??? It is claimed that this should not be necessary. */
19313 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
19317 /* Like arm_compute_initial_elimination offset. Simpler because there
19318 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19319 to point at the base of the local variables after static stack
19320 space for a function has been allocated. */
19323 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
19325 arm_stack_offsets
*offsets
;
19327 offsets
= arm_get_frame_offsets ();
19331 case ARG_POINTER_REGNUM
:
19334 case STACK_POINTER_REGNUM
:
19335 return offsets
->outgoing_args
- offsets
->saved_args
;
19337 case FRAME_POINTER_REGNUM
:
19338 return offsets
->soft_frame
- offsets
->saved_args
;
19340 case ARM_HARD_FRAME_POINTER_REGNUM
:
19341 return offsets
->saved_regs
- offsets
->saved_args
;
19343 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19344 return offsets
->locals_base
- offsets
->saved_args
;
19347 gcc_unreachable ();
19351 case FRAME_POINTER_REGNUM
:
19354 case STACK_POINTER_REGNUM
:
19355 return offsets
->outgoing_args
- offsets
->soft_frame
;
19357 case ARM_HARD_FRAME_POINTER_REGNUM
:
19358 return offsets
->saved_regs
- offsets
->soft_frame
;
19360 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19361 return offsets
->locals_base
- offsets
->soft_frame
;
19364 gcc_unreachable ();
19369 gcc_unreachable ();
19373 /* Given the stack offsets and register mask in OFFSETS, decide
19374 how many additional registers to push instead of subtracting
19375 a constant from SP. */
19377 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
)
19379 HOST_WIDE_INT amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
19380 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
19381 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19382 unsigned long l_mask
= live_regs_mask
& 0x40ff;
19383 /* Then count how many other high registers will need to be pushed. */
19384 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
19387 /* If the stack frame size is 512 exactly, we can save one load
19388 instruction, which should make this a win even when optimizing
19390 if (!optimize_size
&& amount
!= 512)
19393 /* Can't do this if there are high registers to push, or if we
19394 are not going to do a push at all. */
19395 if (high_regs_pushed
!= 0 || l_mask
== 0)
19398 /* Don't do this if thumb1_expand_prologue wants to emit instructions
19399 between the push and the stack frame allocation. */
19400 if ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
19401 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0))
19404 for (n_free
= 0; n_free
< 8 && !(live_regs_mask
& 1); live_regs_mask
>>= 1)
19409 gcc_assert (amount
/ 4 * 4 == amount
);
19411 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
19412 return (amount
- 508) / 4;
19413 if (amount
<= n_free
* 4)
19418 /* Generate the rest of a function's prologue. */
19420 thumb1_expand_prologue (void)
19424 HOST_WIDE_INT amount
;
19425 arm_stack_offsets
*offsets
;
19426 unsigned long func_type
;
19428 unsigned long live_regs_mask
;
19430 func_type
= arm_current_func_type ();
19432 /* Naked functions don't have prologues. */
19433 if (IS_NAKED (func_type
))
19436 if (IS_INTERRUPT (func_type
))
19438 error ("interrupt Service Routines cannot be coded in Thumb mode");
19442 offsets
= arm_get_frame_offsets ();
19443 live_regs_mask
= offsets
->saved_regs_mask
;
19444 /* Load the pic register before setting the frame pointer,
19445 so we can use r7 as a temporary work register. */
19446 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
19447 arm_load_pic_register (live_regs_mask
);
19449 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19450 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
19451 stack_pointer_rtx
);
19453 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
19454 amount
-= 4 * thumb1_extra_regs_pushed (offsets
);
19459 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
19460 GEN_INT (- amount
)));
19461 RTX_FRAME_RELATED_P (insn
) = 1;
19467 /* The stack decrement is too big for an immediate value in a single
19468 insn. In theory we could issue multiple subtracts, but after
19469 three of them it becomes more space efficient to place the full
19470 value in the constant pool and load into a register. (Also the
19471 ARM debugger really likes to see only one stack decrement per
19472 function). So instead we look for a scratch register into which
19473 we can load the decrement, and then we subtract this from the
19474 stack pointer. Unfortunately on the thumb the only available
19475 scratch registers are the argument registers, and we cannot use
19476 these as they may hold arguments to the function. Instead we
19477 attempt to locate a call preserved register which is used by this
19478 function. If we can find one, then we know that it will have
19479 been pushed at the start of the prologue and so we can corrupt
19481 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
19482 if (live_regs_mask
& (1 << regno
))
19485 gcc_assert(regno
<= LAST_LO_REGNUM
);
19487 reg
= gen_rtx_REG (SImode
, regno
);
19489 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
19491 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
19492 stack_pointer_rtx
, reg
));
19493 RTX_FRAME_RELATED_P (insn
) = 1;
19494 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19495 plus_constant (stack_pointer_rtx
,
19497 RTX_FRAME_RELATED_P (dwarf
) = 1;
19498 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19502 if (frame_pointer_needed
)
19503 thumb_set_frame_pointer (offsets
);
19505 /* If we are profiling, make sure no instructions are scheduled before
19506 the call to mcount. Similarly if the user has requested no
19507 scheduling in the prolog. Similarly if we want non-call exceptions
19508 using the EABI unwinder, to prevent faulting instructions from being
19509 swapped with a stack adjustment. */
19510 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
19511 || (ARM_EABI_UNWIND_TABLES
&& cfun
->can_throw_non_call_exceptions
))
19512 emit_insn (gen_blockage ());
19514 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
19515 if (live_regs_mask
& 0xff)
19516 cfun
->machine
->lr_save_eliminated
= 0;
19521 thumb1_expand_epilogue (void)
19523 HOST_WIDE_INT amount
;
19524 arm_stack_offsets
*offsets
;
19527 /* Naked functions don't have prologues. */
19528 if (IS_NAKED (arm_current_func_type ()))
19531 offsets
= arm_get_frame_offsets ();
19532 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
19534 if (frame_pointer_needed
)
19536 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
19537 amount
= offsets
->locals_base
- offsets
->saved_regs
;
19540 gcc_assert (amount
>= 0);
19544 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
19545 GEN_INT (amount
)));
19548 /* r3 is always free in the epilogue. */
19549 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
19551 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
19552 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
19556 /* Emit a USE (stack_pointer_rtx), so that
19557 the stack adjustment will not be deleted. */
19558 emit_insn (gen_prologue_use (stack_pointer_rtx
));
19560 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
19561 emit_insn (gen_blockage ());
19563 /* Emit a clobber for each insn that will be restored in the epilogue,
19564 so that flow2 will get register lifetimes correct. */
19565 for (regno
= 0; regno
< 13; regno
++)
19566 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
19567 emit_clobber (gen_rtx_REG (SImode
, regno
));
19569 if (! df_regs_ever_live_p (LR_REGNUM
))
19570 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
19574 thumb1_output_function_prologue (FILE *f
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
19576 arm_stack_offsets
*offsets
;
19577 unsigned long live_regs_mask
= 0;
19578 unsigned long l_mask
;
19579 unsigned high_regs_pushed
= 0;
19580 int cfa_offset
= 0;
19583 if (IS_NAKED (arm_current_func_type ()))
19586 if (is_called_in_ARM_mode (current_function_decl
))
19590 gcc_assert (GET_CODE (DECL_RTL (current_function_decl
)) == MEM
);
19591 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
19593 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
19595 /* Generate code sequence to switch us into Thumb mode. */
19596 /* The .code 32 directive has already been emitted by
19597 ASM_DECLARE_FUNCTION_NAME. */
19598 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
19599 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
19601 /* Generate a label, so that the debugger will notice the
19602 change in instruction sets. This label is also used by
19603 the assembler to bypass the ARM code when this function
19604 is called from a Thumb encoded function elsewhere in the
19605 same file. Hence the definition of STUB_NAME here must
19606 agree with the definition in gas/config/tc-arm.c. */
19608 #define STUB_NAME ".real_start_of"
19610 fprintf (f
, "\t.code\t16\n");
19612 if (arm_dllexport_name_p (name
))
19613 name
= arm_strip_name_encoding (name
);
19615 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
19616 fprintf (f
, "\t.thumb_func\n");
19617 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
19620 if (crtl
->args
.pretend_args_size
)
19622 /* Output unwind directive for the stack adjustment. */
19623 if (ARM_EABI_UNWIND_TABLES
)
19624 fprintf (f
, "\t.pad #%d\n",
19625 crtl
->args
.pretend_args_size
);
19627 if (cfun
->machine
->uses_anonymous_args
)
19631 fprintf (f
, "\tpush\t{");
19633 num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
19635 for (regno
= LAST_ARG_REGNUM
+ 1 - num_pushes
;
19636 regno
<= LAST_ARG_REGNUM
;
19638 asm_fprintf (f
, "%r%s", regno
,
19639 regno
== LAST_ARG_REGNUM
? "" : ", ");
19641 fprintf (f
, "}\n");
19644 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n",
19645 SP_REGNUM
, SP_REGNUM
,
19646 crtl
->args
.pretend_args_size
);
19648 /* We don't need to record the stores for unwinding (would it
19649 help the debugger any if we did?), but record the change in
19650 the stack pointer. */
19651 if (dwarf2out_do_frame ())
19653 char *l
= dwarf2out_cfi_label (false);
19655 cfa_offset
= cfa_offset
+ crtl
->args
.pretend_args_size
;
19656 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
19660 /* Get the registers we are going to push. */
19661 offsets
= arm_get_frame_offsets ();
19662 live_regs_mask
= offsets
->saved_regs_mask
;
19663 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19664 l_mask
= live_regs_mask
& 0x40ff;
19665 /* Then count how many other high registers will need to be pushed. */
19666 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
19668 if (TARGET_BACKTRACE
)
19671 unsigned work_register
;
19673 /* We have been asked to create a stack backtrace structure.
19674 The code looks like this:
19678 0 sub SP, #16 Reserve space for 4 registers.
19679 2 push {R7} Push low registers.
19680 4 add R7, SP, #20 Get the stack pointer before the push.
19681 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19682 8 mov R7, PC Get hold of the start of this code plus 12.
19683 10 str R7, [SP, #16] Store it.
19684 12 mov R7, FP Get hold of the current frame pointer.
19685 14 str R7, [SP, #4] Store it.
19686 16 mov R7, LR Get hold of the current return address.
19687 18 str R7, [SP, #12] Store it.
19688 20 add R7, SP, #16 Point at the start of the backtrace structure.
19689 22 mov FP, R7 Put this value into the frame pointer. */
19691 work_register
= thumb_find_work_register (live_regs_mask
);
19693 if (ARM_EABI_UNWIND_TABLES
)
19694 asm_fprintf (f
, "\t.pad #16\n");
19697 (f
, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19698 SP_REGNUM
, SP_REGNUM
);
19700 if (dwarf2out_do_frame ())
19702 char *l
= dwarf2out_cfi_label (false);
19704 cfa_offset
= cfa_offset
+ 16;
19705 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
19710 thumb_pushpop (f
, l_mask
, 1, &cfa_offset
, l_mask
);
19711 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
19716 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
19717 offset
+ 16 + crtl
->args
.pretend_args_size
);
19719 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19722 /* Make sure that the instruction fetching the PC is in the right place
19723 to calculate "start of backtrace creation code + 12". */
19726 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
19727 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19729 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
19730 ARM_HARD_FRAME_POINTER_REGNUM
);
19731 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19736 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
19737 ARM_HARD_FRAME_POINTER_REGNUM
);
19738 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19740 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
19741 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19745 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, LR_REGNUM
);
19746 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19748 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
19750 asm_fprintf (f
, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19751 ARM_HARD_FRAME_POINTER_REGNUM
, work_register
);
19753 /* Optimization: If we are not pushing any low registers but we are going
19754 to push some high registers then delay our first push. This will just
19755 be a push of LR and we can combine it with the push of the first high
19757 else if ((l_mask
& 0xff) != 0
19758 || (high_regs_pushed
== 0 && l_mask
))
19760 unsigned long mask
= l_mask
;
19761 mask
|= (1 << thumb1_extra_regs_pushed (offsets
)) - 1;
19762 thumb_pushpop (f
, mask
, 1, &cfa_offset
, mask
);
19765 if (high_regs_pushed
)
19767 unsigned pushable_regs
;
19768 unsigned next_hi_reg
;
19770 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
19771 if (live_regs_mask
& (1 << next_hi_reg
))
19774 pushable_regs
= l_mask
& 0xff;
19776 if (pushable_regs
== 0)
19777 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
19779 while (high_regs_pushed
> 0)
19781 unsigned long real_regs_mask
= 0;
19783 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
19785 if (pushable_regs
& (1 << regno
))
19787 asm_fprintf (f
, "\tmov\t%r, %r\n", regno
, next_hi_reg
);
19789 high_regs_pushed
--;
19790 real_regs_mask
|= (1 << next_hi_reg
);
19792 if (high_regs_pushed
)
19794 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
19796 if (live_regs_mask
& (1 << next_hi_reg
))
19801 pushable_regs
&= ~((1 << regno
) - 1);
19807 /* If we had to find a work register and we have not yet
19808 saved the LR then add it to the list of regs to push. */
19809 if (l_mask
== (1 << LR_REGNUM
))
19811 thumb_pushpop (f
, pushable_regs
| (1 << LR_REGNUM
),
19813 real_regs_mask
| (1 << LR_REGNUM
));
19817 thumb_pushpop (f
, pushable_regs
, 1, &cfa_offset
, real_regs_mask
);
19822 /* Handle the case of a double word load into a low register from
19823 a computed memory address. The computed address may involve a
19824 register which is overwritten by the load. */
19826 thumb_load_double_from_address (rtx
*operands
)
19834 gcc_assert (GET_CODE (operands
[0]) == REG
);
19835 gcc_assert (GET_CODE (operands
[1]) == MEM
);
19837 /* Get the memory address. */
19838 addr
= XEXP (operands
[1], 0);
19840 /* Work out how the memory address is computed. */
19841 switch (GET_CODE (addr
))
19844 operands
[2] = adjust_address (operands
[1], SImode
, 4);
19846 if (REGNO (operands
[0]) == REGNO (addr
))
19848 output_asm_insn ("ldr\t%H0, %2", operands
);
19849 output_asm_insn ("ldr\t%0, %1", operands
);
19853 output_asm_insn ("ldr\t%0, %1", operands
);
19854 output_asm_insn ("ldr\t%H0, %2", operands
);
19859 /* Compute <address> + 4 for the high order load. */
19860 operands
[2] = adjust_address (operands
[1], SImode
, 4);
19862 output_asm_insn ("ldr\t%0, %1", operands
);
19863 output_asm_insn ("ldr\t%H0, %2", operands
);
19867 arg1
= XEXP (addr
, 0);
19868 arg2
= XEXP (addr
, 1);
19870 if (CONSTANT_P (arg1
))
19871 base
= arg2
, offset
= arg1
;
19873 base
= arg1
, offset
= arg2
;
19875 gcc_assert (GET_CODE (base
) == REG
);
19877 /* Catch the case of <address> = <reg> + <reg> */
19878 if (GET_CODE (offset
) == REG
)
19880 int reg_offset
= REGNO (offset
);
19881 int reg_base
= REGNO (base
);
19882 int reg_dest
= REGNO (operands
[0]);
19884 /* Add the base and offset registers together into the
19885 higher destination register. */
19886 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
19887 reg_dest
+ 1, reg_base
, reg_offset
);
19889 /* Load the lower destination register from the address in
19890 the higher destination register. */
19891 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
19892 reg_dest
, reg_dest
+ 1);
19894 /* Load the higher destination register from its own address
19896 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
19897 reg_dest
+ 1, reg_dest
+ 1);
19901 /* Compute <address> + 4 for the high order load. */
19902 operands
[2] = adjust_address (operands
[1], SImode
, 4);
19904 /* If the computed address is held in the low order register
19905 then load the high order register first, otherwise always
19906 load the low order register first. */
19907 if (REGNO (operands
[0]) == REGNO (base
))
19909 output_asm_insn ("ldr\t%H0, %2", operands
);
19910 output_asm_insn ("ldr\t%0, %1", operands
);
19914 output_asm_insn ("ldr\t%0, %1", operands
);
19915 output_asm_insn ("ldr\t%H0, %2", operands
);
19921 /* With no registers to worry about we can just load the value
19923 operands
[2] = adjust_address (operands
[1], SImode
, 4);
19925 output_asm_insn ("ldr\t%H0, %2", operands
);
19926 output_asm_insn ("ldr\t%0, %1", operands
);
19930 gcc_unreachable ();
19937 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
19944 if (REGNO (operands
[4]) > REGNO (operands
[5]))
19947 operands
[4] = operands
[5];
19950 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
19951 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
19955 if (REGNO (operands
[4]) > REGNO (operands
[5]))
19958 operands
[4] = operands
[5];
19961 if (REGNO (operands
[5]) > REGNO (operands
[6]))
19964 operands
[5] = operands
[6];
19967 if (REGNO (operands
[4]) > REGNO (operands
[5]))
19970 operands
[4] = operands
[5];
19974 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
19975 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
19979 gcc_unreachable ();
19985 /* Output a call-via instruction for thumb state. */
19987 thumb_call_via_reg (rtx reg
)
19989 int regno
= REGNO (reg
);
19992 gcc_assert (regno
< LR_REGNUM
);
19994 /* If we are in the normal text section we can use a single instance
19995 per compilation unit. If we are doing function sections, then we need
19996 an entry per section, since we can't rely on reachability. */
19997 if (in_section
== text_section
)
19999 thumb_call_reg_needed
= 1;
20001 if (thumb_call_via_label
[regno
] == NULL
)
20002 thumb_call_via_label
[regno
] = gen_label_rtx ();
20003 labelp
= thumb_call_via_label
+ regno
;
20007 if (cfun
->machine
->call_via
[regno
] == NULL
)
20008 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
20009 labelp
= cfun
->machine
->call_via
+ regno
;
20012 output_asm_insn ("bl\t%a0", labelp
);
20016 /* Routines for generating rtl. */
20018 thumb_expand_movmemqi (rtx
*operands
)
20020 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
20021 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
20022 HOST_WIDE_INT len
= INTVAL (operands
[2]);
20023 HOST_WIDE_INT offset
= 0;
20027 emit_insn (gen_movmem12b (out
, in
, out
, in
));
20033 emit_insn (gen_movmem8b (out
, in
, out
, in
));
20039 rtx reg
= gen_reg_rtx (SImode
);
20040 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
20041 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
20048 rtx reg
= gen_reg_rtx (HImode
);
20049 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
20050 plus_constant (in
, offset
))));
20051 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (out
, offset
)),
20059 rtx reg
= gen_reg_rtx (QImode
);
20060 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
20061 plus_constant (in
, offset
))));
20062 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (out
, offset
)),
20068 thumb_reload_out_hi (rtx
*operands
)
20070 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
20073 /* Handle reading a half-word from memory during reload. */
20075 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
20077 gcc_unreachable ();
20080 /* Return the length of a function name prefix
20081 that starts with the character 'c'. */
20083 arm_get_strip_length (int c
)
20087 ARM_NAME_ENCODING_LENGTHS
20092 /* Return a pointer to a function's name with any
20093 and all prefix encodings stripped from it. */
20095 arm_strip_name_encoding (const char *name
)
20099 while ((skip
= arm_get_strip_length (* name
)))
20105 /* If there is a '*' anywhere in the name's prefix, then
20106 emit the stripped name verbatim, otherwise prepend an
20107 underscore if leading underscores are being used. */
20109 arm_asm_output_labelref (FILE *stream
, const char *name
)
20114 while ((skip
= arm_get_strip_length (* name
)))
20116 verbatim
|= (*name
== '*');
20121 fputs (name
, stream
);
20123 asm_fprintf (stream
, "%U%s", name
);
20127 arm_file_start (void)
20131 if (TARGET_UNIFIED_ASM
)
20132 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
20136 const char *fpu_name
;
20137 if (arm_selected_arch
)
20138 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
20140 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
20142 if (TARGET_SOFT_FLOAT
)
20145 fpu_name
= "softvfp";
20147 fpu_name
= "softfpa";
20151 fpu_name
= arm_fpu_desc
->name
;
20152 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
20154 if (TARGET_HARD_FLOAT
)
20155 asm_fprintf (asm_out_file
, "\t.eabi_attribute 27, 3\n");
20156 if (TARGET_HARD_FLOAT_ABI
)
20157 asm_fprintf (asm_out_file
, "\t.eabi_attribute 28, 1\n");
20160 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
20162 /* Some of these attributes only apply when the corresponding features
20163 are used. However we don't have any easy way of figuring this out.
20164 Conservatively record the setting that would have been used. */
20166 /* Tag_ABI_FP_rounding. */
20167 if (flag_rounding_math
)
20168 asm_fprintf (asm_out_file
, "\t.eabi_attribute 19, 1\n");
20169 if (!flag_unsafe_math_optimizations
)
20171 /* Tag_ABI_FP_denomal. */
20172 asm_fprintf (asm_out_file
, "\t.eabi_attribute 20, 1\n");
20173 /* Tag_ABI_FP_exceptions. */
20174 asm_fprintf (asm_out_file
, "\t.eabi_attribute 21, 1\n");
20176 /* Tag_ABI_FP_user_exceptions. */
20177 if (flag_signaling_nans
)
20178 asm_fprintf (asm_out_file
, "\t.eabi_attribute 22, 1\n");
20179 /* Tag_ABI_FP_number_model. */
20180 asm_fprintf (asm_out_file
, "\t.eabi_attribute 23, %d\n",
20181 flag_finite_math_only
? 1 : 3);
20183 /* Tag_ABI_align8_needed. */
20184 asm_fprintf (asm_out_file
, "\t.eabi_attribute 24, 1\n");
20185 /* Tag_ABI_align8_preserved. */
20186 asm_fprintf (asm_out_file
, "\t.eabi_attribute 25, 1\n");
20187 /* Tag_ABI_enum_size. */
20188 asm_fprintf (asm_out_file
, "\t.eabi_attribute 26, %d\n",
20189 flag_short_enums
? 1 : 2);
20191 /* Tag_ABI_optimization_goals. */
20194 else if (optimize
>= 2)
20200 asm_fprintf (asm_out_file
, "\t.eabi_attribute 30, %d\n", val
);
20202 /* Tag_ABI_FP_16bit_format. */
20203 if (arm_fp16_format
)
20204 asm_fprintf (asm_out_file
, "\t.eabi_attribute 38, %d\n",
20205 (int)arm_fp16_format
);
20207 if (arm_lang_output_object_attributes_hook
)
20208 arm_lang_output_object_attributes_hook();
20210 default_file_start();
20214 arm_file_end (void)
20218 if (NEED_INDICATE_EXEC_STACK
)
20219 /* Add .note.GNU-stack. */
20220 file_end_indicate_exec_stack ();
20222 if (! thumb_call_reg_needed
)
20225 switch_to_section (text_section
);
20226 asm_fprintf (asm_out_file
, "\t.code 16\n");
20227 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
20229 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
20231 rtx label
= thumb_call_via_label
[regno
];
20235 targetm
.asm_out
.internal_label (asm_out_file
, "L",
20236 CODE_LABEL_NUMBER (label
));
20237 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
20243 /* Symbols in the text segment can be accessed without indirecting via the
20244 constant pool; it may take an extra binary operation, but this is still
20245 faster than indirecting via memory. Don't do this when not optimizing,
20246 since we won't be calculating al of the offsets necessary to do this
20250 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
20252 if (optimize
> 0 && TREE_CONSTANT (decl
))
20253 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
20255 default_encode_section_info (decl
, rtl
, first
);
20257 #endif /* !ARM_PE */
20260 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
20262 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
20263 && !strcmp (prefix
, "L"))
20265 arm_ccfsm_state
= 0;
20266 arm_target_insn
= NULL
;
20268 default_internal_label (stream
, prefix
, labelno
);
20271 /* Output code to add DELTA to the first argument, and then jump
20272 to FUNCTION. Used for C++ multiple inheritance. */
20274 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
20275 HOST_WIDE_INT delta
,
20276 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
20279 static int thunk_label
= 0;
20282 int mi_delta
= delta
;
20283 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
20285 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
20288 mi_delta
= - mi_delta
;
20292 int labelno
= thunk_label
++;
20293 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
20294 /* Thunks are entered in arm mode when avaiable. */
20295 if (TARGET_THUMB1_ONLY
)
20297 /* push r3 so we can use it as a temporary. */
20298 /* TODO: Omit this save if r3 is not used. */
20299 fputs ("\tpush {r3}\n", file
);
20300 fputs ("\tldr\tr3, ", file
);
20304 fputs ("\tldr\tr12, ", file
);
20306 assemble_name (file
, label
);
20307 fputc ('\n', file
);
20310 /* If we are generating PIC, the ldr instruction below loads
20311 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20312 the address of the add + 8, so we have:
20314 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20317 Note that we have "+ 1" because some versions of GNU ld
20318 don't set the low bit of the result for R_ARM_REL32
20319 relocations against thumb function symbols.
20320 On ARMv6M this is +4, not +8. */
20321 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
20322 assemble_name (file
, labelpc
);
20323 fputs (":\n", file
);
20324 if (TARGET_THUMB1_ONLY
)
20326 /* This is 2 insns after the start of the thunk, so we know it
20327 is 4-byte aligned. */
20328 fputs ("\tadd\tr3, pc, r3\n", file
);
20329 fputs ("\tmov r12, r3\n", file
);
20332 fputs ("\tadd\tr12, pc, r12\n", file
);
20334 else if (TARGET_THUMB1_ONLY
)
20335 fputs ("\tmov r12, r3\n", file
);
20337 if (TARGET_THUMB1_ONLY
)
20339 if (mi_delta
> 255)
20341 fputs ("\tldr\tr3, ", file
);
20342 assemble_name (file
, label
);
20343 fputs ("+4\n", file
);
20344 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
20345 mi_op
, this_regno
, this_regno
);
20347 else if (mi_delta
!= 0)
20349 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
20350 mi_op
, this_regno
, this_regno
,
20356 /* TODO: Use movw/movt for large constants when available. */
20357 while (mi_delta
!= 0)
20359 if ((mi_delta
& (3 << shift
)) == 0)
20363 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
20364 mi_op
, this_regno
, this_regno
,
20365 mi_delta
& (0xff << shift
));
20366 mi_delta
&= ~(0xff << shift
);
20373 if (TARGET_THUMB1_ONLY
)
20374 fputs ("\tpop\t{r3}\n", file
);
20376 fprintf (file
, "\tbx\tr12\n");
20377 ASM_OUTPUT_ALIGN (file
, 2);
20378 assemble_name (file
, label
);
20379 fputs (":\n", file
);
20382 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20383 rtx tem
= XEXP (DECL_RTL (function
), 0);
20384 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
20385 tem
= gen_rtx_MINUS (GET_MODE (tem
),
20387 gen_rtx_SYMBOL_REF (Pmode
,
20388 ggc_strdup (labelpc
)));
20389 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
20392 /* Output ".word .LTHUNKn". */
20393 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
20395 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
20396 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
20400 fputs ("\tb\t", file
);
20401 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
20402 if (NEED_PLT_RELOC
)
20403 fputs ("(PLT)", file
);
20404 fputc ('\n', file
);
20409 arm_emit_vector_const (FILE *file
, rtx x
)
20412 const char * pattern
;
20414 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
20416 switch (GET_MODE (x
))
20418 case V2SImode
: pattern
= "%08x"; break;
20419 case V4HImode
: pattern
= "%04x"; break;
20420 case V8QImode
: pattern
= "%02x"; break;
20421 default: gcc_unreachable ();
20424 fprintf (file
, "0x");
20425 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
20429 element
= CONST_VECTOR_ELT (x
, i
);
20430 fprintf (file
, pattern
, INTVAL (element
));
20436 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20437 HFmode constant pool entries are actually loaded with ldr. */
20439 arm_emit_fp16_const (rtx c
)
20444 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
20445 bits
= real_to_target (NULL
, &r
, HFmode
);
20446 if (WORDS_BIG_ENDIAN
)
20447 assemble_zeros (2);
20448 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
20449 if (!WORDS_BIG_ENDIAN
)
20450 assemble_zeros (2);
20454 arm_output_load_gr (rtx
*operands
)
20461 if (GET_CODE (operands
[1]) != MEM
20462 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
20463 || GET_CODE (reg
= XEXP (sum
, 0)) != REG
20464 || GET_CODE (offset
= XEXP (sum
, 1)) != CONST_INT
20465 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
20466 return "wldrw%?\t%0, %1";
20468 /* Fix up an out-of-range load of a GR register. */
20469 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
20470 wcgr
= operands
[0];
20472 output_asm_insn ("ldr%?\t%0, %1", operands
);
20474 operands
[0] = wcgr
;
20476 output_asm_insn ("tmcr%?\t%0, %1", operands
);
20477 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
20482 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20484 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20485 named arg and all anonymous args onto the stack.
20486 XXX I know the prologue shouldn't be pushing registers, but it is faster
20490 arm_setup_incoming_varargs (CUMULATIVE_ARGS
*pcum
,
20491 enum machine_mode mode
,
20494 int second_time ATTRIBUTE_UNUSED
)
20498 cfun
->machine
->uses_anonymous_args
= 1;
20499 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
20501 nregs
= pcum
->aapcs_ncrn
;
20502 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
20506 nregs
= pcum
->nregs
;
20508 if (nregs
< NUM_ARG_REGS
)
20509 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
20512 /* Return nonzero if the CONSUMER instruction (a store) does not need
20513 PRODUCER's value to calculate the address. */
20516 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
20518 rtx value
= PATTERN (producer
);
20519 rtx addr
= PATTERN (consumer
);
20521 if (GET_CODE (value
) == COND_EXEC
)
20522 value
= COND_EXEC_CODE (value
);
20523 if (GET_CODE (value
) == PARALLEL
)
20524 value
= XVECEXP (value
, 0, 0);
20525 value
= XEXP (value
, 0);
20526 if (GET_CODE (addr
) == COND_EXEC
)
20527 addr
= COND_EXEC_CODE (addr
);
20528 if (GET_CODE (addr
) == PARALLEL
)
20529 addr
= XVECEXP (addr
, 0, 0);
20530 addr
= XEXP (addr
, 0);
20532 return !reg_overlap_mentioned_p (value
, addr
);
20535 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20536 have an early register shift value or amount dependency on the
20537 result of PRODUCER. */
20540 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
20542 rtx value
= PATTERN (producer
);
20543 rtx op
= PATTERN (consumer
);
20546 if (GET_CODE (value
) == COND_EXEC
)
20547 value
= COND_EXEC_CODE (value
);
20548 if (GET_CODE (value
) == PARALLEL
)
20549 value
= XVECEXP (value
, 0, 0);
20550 value
= XEXP (value
, 0);
20551 if (GET_CODE (op
) == COND_EXEC
)
20552 op
= COND_EXEC_CODE (op
);
20553 if (GET_CODE (op
) == PARALLEL
)
20554 op
= XVECEXP (op
, 0, 0);
20557 early_op
= XEXP (op
, 0);
20558 /* This is either an actual independent shift, or a shift applied to
20559 the first operand of another operation. We want the whole shift
20561 if (GET_CODE (early_op
) == REG
)
20564 return !reg_overlap_mentioned_p (value
, early_op
);
20567 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20568 have an early register shift value dependency on the result of
20572 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
20574 rtx value
= PATTERN (producer
);
20575 rtx op
= PATTERN (consumer
);
20578 if (GET_CODE (value
) == COND_EXEC
)
20579 value
= COND_EXEC_CODE (value
);
20580 if (GET_CODE (value
) == PARALLEL
)
20581 value
= XVECEXP (value
, 0, 0);
20582 value
= XEXP (value
, 0);
20583 if (GET_CODE (op
) == COND_EXEC
)
20584 op
= COND_EXEC_CODE (op
);
20585 if (GET_CODE (op
) == PARALLEL
)
20586 op
= XVECEXP (op
, 0, 0);
20589 early_op
= XEXP (op
, 0);
20591 /* This is either an actual independent shift, or a shift applied to
20592 the first operand of another operation. We want the value being
20593 shifted, in either case. */
20594 if (GET_CODE (early_op
) != REG
)
20595 early_op
= XEXP (early_op
, 0);
20597 return !reg_overlap_mentioned_p (value
, early_op
);
20600 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20601 have an early register mult dependency on the result of
20605 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
20607 rtx value
= PATTERN (producer
);
20608 rtx op
= PATTERN (consumer
);
20610 if (GET_CODE (value
) == COND_EXEC
)
20611 value
= COND_EXEC_CODE (value
);
20612 if (GET_CODE (value
) == PARALLEL
)
20613 value
= XVECEXP (value
, 0, 0);
20614 value
= XEXP (value
, 0);
20615 if (GET_CODE (op
) == COND_EXEC
)
20616 op
= COND_EXEC_CODE (op
);
20617 if (GET_CODE (op
) == PARALLEL
)
20618 op
= XVECEXP (op
, 0, 0);
20621 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
20623 if (GET_CODE (XEXP (op
, 0)) == MULT
)
20624 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
20626 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
20632 /* We can't rely on the caller doing the proper promotion when
20633 using APCS or ATPCS. */
20636 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
20638 return !TARGET_AAPCS_BASED
;
20641 static enum machine_mode
20642 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
20643 enum machine_mode mode
,
20644 int *punsignedp ATTRIBUTE_UNUSED
,
20645 const_tree fntype ATTRIBUTE_UNUSED
,
20646 int for_return ATTRIBUTE_UNUSED
)
20648 if (GET_MODE_CLASS (mode
) == MODE_INT
20649 && GET_MODE_SIZE (mode
) < 4)
20655 /* AAPCS based ABIs use short enums by default. */
20658 arm_default_short_enums (void)
20660 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
20664 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20667 arm_align_anon_bitfield (void)
20669 return TARGET_AAPCS_BASED
;
20673 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20676 arm_cxx_guard_type (void)
20678 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
20681 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20682 has an accumulator dependency on the result of the producer (a
20683 multiplication instruction) and no other dependency on that result. */
20685 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
20687 rtx mul
= PATTERN (producer
);
20688 rtx mac
= PATTERN (consumer
);
20690 rtx mac_op0
, mac_op1
, mac_acc
;
20692 if (GET_CODE (mul
) == COND_EXEC
)
20693 mul
= COND_EXEC_CODE (mul
);
20694 if (GET_CODE (mac
) == COND_EXEC
)
20695 mac
= COND_EXEC_CODE (mac
);
20697 /* Check that mul is of the form (set (...) (mult ...))
20698 and mla is of the form (set (...) (plus (mult ...) (...))). */
20699 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
20700 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
20701 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
20704 mul_result
= XEXP (mul
, 0);
20705 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
20706 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
20707 mac_acc
= XEXP (XEXP (mac
, 1), 1);
20709 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
20710 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
20711 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
20715 /* The EABI says test the least significant bit of a guard variable. */
20718 arm_cxx_guard_mask_bit (void)
20720 return TARGET_AAPCS_BASED
;
20724 /* The EABI specifies that all array cookies are 8 bytes long. */
20727 arm_get_cookie_size (tree type
)
20731 if (!TARGET_AAPCS_BASED
)
20732 return default_cxx_get_cookie_size (type
);
20734 size
= build_int_cst (sizetype
, 8);
20739 /* The EABI says that array cookies should also contain the element size. */
20742 arm_cookie_has_size (void)
20744 return TARGET_AAPCS_BASED
;
20748 /* The EABI says constructors and destructors should return a pointer to
20749 the object constructed/destroyed. */
20752 arm_cxx_cdtor_returns_this (void)
20754 return TARGET_AAPCS_BASED
;
20757 /* The EABI says that an inline function may never be the key
20761 arm_cxx_key_method_may_be_inline (void)
20763 return !TARGET_AAPCS_BASED
;
20767 arm_cxx_determine_class_data_visibility (tree decl
)
20769 if (!TARGET_AAPCS_BASED
20770 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
20773 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20774 is exported. However, on systems without dynamic vague linkage,
20775 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20776 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
20777 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
20779 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
20780 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
20784 arm_cxx_class_data_always_comdat (void)
20786 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20787 vague linkage if the class has no key function. */
20788 return !TARGET_AAPCS_BASED
;
20792 /* The EABI says __aeabi_atexit should be used to register static
20796 arm_cxx_use_aeabi_atexit (void)
20798 return TARGET_AAPCS_BASED
;
20803 arm_set_return_address (rtx source
, rtx scratch
)
20805 arm_stack_offsets
*offsets
;
20806 HOST_WIDE_INT delta
;
20808 unsigned long saved_regs
;
20810 offsets
= arm_get_frame_offsets ();
20811 saved_regs
= offsets
->saved_regs_mask
;
20813 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
20814 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
20817 if (frame_pointer_needed
)
20818 addr
= plus_constant(hard_frame_pointer_rtx
, -4);
20821 /* LR will be the first saved register. */
20822 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
20827 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
20828 GEN_INT (delta
& ~4095)));
20833 addr
= stack_pointer_rtx
;
20835 addr
= plus_constant (addr
, delta
);
20837 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
20843 thumb_set_return_address (rtx source
, rtx scratch
)
20845 arm_stack_offsets
*offsets
;
20846 HOST_WIDE_INT delta
;
20847 HOST_WIDE_INT limit
;
20850 unsigned long mask
;
20854 offsets
= arm_get_frame_offsets ();
20855 mask
= offsets
->saved_regs_mask
;
20856 if (mask
& (1 << LR_REGNUM
))
20859 /* Find the saved regs. */
20860 if (frame_pointer_needed
)
20862 delta
= offsets
->soft_frame
- offsets
->saved_args
;
20863 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
20869 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
20872 /* Allow for the stack frame. */
20873 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
20875 /* The link register is always the first saved register. */
20878 /* Construct the address. */
20879 addr
= gen_rtx_REG (SImode
, reg
);
20882 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
20883 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
20887 addr
= plus_constant (addr
, delta
);
20889 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
20892 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
20895 /* Implements target hook vector_mode_supported_p. */
20897 arm_vector_mode_supported_p (enum machine_mode mode
)
20899 /* Neon also supports V2SImode, etc. listed in the clause below. */
20900 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
20901 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
20904 if ((TARGET_NEON
|| TARGET_IWMMXT
)
20905 && ((mode
== V2SImode
)
20906 || (mode
== V4HImode
)
20907 || (mode
== V8QImode
)))
20913 /* Implements target hook small_register_classes_for_mode_p. */
20915 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
20917 return TARGET_THUMB1
;
20920 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20921 ARM insns and therefore guarantee that the shift count is modulo 256.
20922 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20923 guarantee no particular behavior for out-of-range counts. */
20925 static unsigned HOST_WIDE_INT
20926 arm_shift_truncation_mask (enum machine_mode mode
)
20928 return mode
== SImode
? 255 : 0;
20932 /* Map internal gcc register numbers to DWARF2 register numbers. */
20935 arm_dbx_register_number (unsigned int regno
)
20940 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20941 compatibility. The EABI defines them as registers 96-103. */
20942 if (IS_FPA_REGNUM (regno
))
20943 return (TARGET_AAPCS_BASED
? 96 : 16) + regno
- FIRST_FPA_REGNUM
;
20945 if (IS_VFP_REGNUM (regno
))
20947 /* See comment in arm_dwarf_register_span. */
20948 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
20949 return 64 + regno
- FIRST_VFP_REGNUM
;
20951 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
20954 if (IS_IWMMXT_GR_REGNUM (regno
))
20955 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
20957 if (IS_IWMMXT_REGNUM (regno
))
20958 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
20960 gcc_unreachable ();
20963 /* Dwarf models VFPv3 registers as 32 64-bit registers.
20964 GCC models tham as 64 32-bit registers, so we need to describe this to
20965 the DWARF generation code. Other registers can use the default. */
20967 arm_dwarf_register_span (rtx rtl
)
20974 regno
= REGNO (rtl
);
20975 if (!IS_VFP_REGNUM (regno
))
20978 /* XXX FIXME: The EABI defines two VFP register ranges:
20979 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
20981 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
20982 corresponding D register. Until GDB supports this, we shall use the
20983 legacy encodings. We also use these encodings for D0-D15 for
20984 compatibility with older debuggers. */
20985 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
20988 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
20989 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
20990 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
20991 for (i
= 0; i
< nregs
; i
++)
20992 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
20997 #ifdef TARGET_UNWIND_INFO
20998 /* Emit unwind directives for a store-multiple instruction or stack pointer
20999 push during alignment.
21000 These should only ever be generated by the function prologue code, so
21001 expect them to have a particular form. */
21004 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
21007 HOST_WIDE_INT offset
;
21008 HOST_WIDE_INT nregs
;
21014 e
= XVECEXP (p
, 0, 0);
21015 if (GET_CODE (e
) != SET
)
21018 /* First insn will adjust the stack pointer. */
21019 if (GET_CODE (e
) != SET
21020 || GET_CODE (XEXP (e
, 0)) != REG
21021 || REGNO (XEXP (e
, 0)) != SP_REGNUM
21022 || GET_CODE (XEXP (e
, 1)) != PLUS
)
21025 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
21026 nregs
= XVECLEN (p
, 0) - 1;
21028 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
21031 /* The function prologue may also push pc, but not annotate it as it is
21032 never restored. We turn this into a stack pointer adjustment. */
21033 if (nregs
* 4 == offset
- 4)
21035 fprintf (asm_out_file
, "\t.pad #4\n");
21039 fprintf (asm_out_file
, "\t.save {");
21041 else if (IS_VFP_REGNUM (reg
))
21044 fprintf (asm_out_file
, "\t.vsave {");
21046 else if (reg
>= FIRST_FPA_REGNUM
&& reg
<= LAST_FPA_REGNUM
)
21048 /* FPA registers are done differently. */
21049 asm_fprintf (asm_out_file
, "\t.save %r, %wd\n", reg
, nregs
);
21053 /* Unknown register type. */
21056 /* If the stack increment doesn't match the size of the saved registers,
21057 something has gone horribly wrong. */
21058 if (offset
!= nregs
* reg_size
)
21063 /* The remaining insns will describe the stores. */
21064 for (i
= 1; i
<= nregs
; i
++)
21066 /* Expect (set (mem <addr>) (reg)).
21067 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21068 e
= XVECEXP (p
, 0, i
);
21069 if (GET_CODE (e
) != SET
21070 || GET_CODE (XEXP (e
, 0)) != MEM
21071 || GET_CODE (XEXP (e
, 1)) != REG
)
21074 reg
= REGNO (XEXP (e
, 1));
21079 fprintf (asm_out_file
, ", ");
21080 /* We can't use %r for vfp because we need to use the
21081 double precision register names. */
21082 if (IS_VFP_REGNUM (reg
))
21083 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
21085 asm_fprintf (asm_out_file
, "%r", reg
);
21087 #ifdef ENABLE_CHECKING
21088 /* Check that the addresses are consecutive. */
21089 e
= XEXP (XEXP (e
, 0), 0);
21090 if (GET_CODE (e
) == PLUS
)
21092 offset
+= reg_size
;
21093 if (GET_CODE (XEXP (e
, 0)) != REG
21094 || REGNO (XEXP (e
, 0)) != SP_REGNUM
21095 || GET_CODE (XEXP (e
, 1)) != CONST_INT
21096 || offset
!= INTVAL (XEXP (e
, 1)))
21100 || GET_CODE (e
) != REG
21101 || REGNO (e
) != SP_REGNUM
)
21105 fprintf (asm_out_file
, "}\n");
21108 /* Emit unwind directives for a SET. */
21111 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
21119 switch (GET_CODE (e0
))
21122 /* Pushing a single register. */
21123 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
21124 || GET_CODE (XEXP (XEXP (e0
, 0), 0)) != REG
21125 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
21128 asm_fprintf (asm_out_file
, "\t.save ");
21129 if (IS_VFP_REGNUM (REGNO (e1
)))
21130 asm_fprintf(asm_out_file
, "{d%d}\n",
21131 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
21133 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
21137 if (REGNO (e0
) == SP_REGNUM
)
21139 /* A stack increment. */
21140 if (GET_CODE (e1
) != PLUS
21141 || GET_CODE (XEXP (e1
, 0)) != REG
21142 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
21143 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
21146 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
21147 -INTVAL (XEXP (e1
, 1)));
21149 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
21151 HOST_WIDE_INT offset
;
21153 if (GET_CODE (e1
) == PLUS
)
21155 if (GET_CODE (XEXP (e1
, 0)) != REG
21156 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
21158 reg
= REGNO (XEXP (e1
, 0));
21159 offset
= INTVAL (XEXP (e1
, 1));
21160 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
21161 HARD_FRAME_POINTER_REGNUM
, reg
,
21164 else if (GET_CODE (e1
) == REG
)
21167 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
21168 HARD_FRAME_POINTER_REGNUM
, reg
);
21173 else if (GET_CODE (e1
) == REG
&& REGNO (e1
) == SP_REGNUM
)
21175 /* Move from sp to reg. */
21176 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
21178 else if (GET_CODE (e1
) == PLUS
21179 && GET_CODE (XEXP (e1
, 0)) == REG
21180 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
21181 && GET_CODE (XEXP (e1
, 1)) == CONST_INT
)
21183 /* Set reg to offset from sp. */
21184 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
21185 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
21187 else if (GET_CODE (e1
) == UNSPEC
&& XINT (e1
, 1) == UNSPEC_STACK_ALIGN
)
21189 /* Stack pointer save before alignment. */
21191 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21204 /* Emit unwind directives for the given insn. */
21207 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
21211 if (!ARM_EABI_UNWIND_TABLES
)
21214 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
21215 && (TREE_NOTHROW (current_function_decl
)
21216 || crtl
->all_throwers_are_sibcalls
))
21219 if (GET_CODE (insn
) == NOTE
|| !RTX_FRAME_RELATED_P (insn
))
21222 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
21224 pat
= XEXP (pat
, 0);
21226 pat
= PATTERN (insn
);
21228 switch (GET_CODE (pat
))
21231 arm_unwind_emit_set (asm_out_file
, pat
);
21235 /* Store multiple. */
21236 arm_unwind_emit_sequence (asm_out_file
, pat
);
21245 /* Output a reference from a function exception table to the type_info
21246 object X. The EABI specifies that the symbol should be relocated by
21247 an R_ARM_TARGET2 relocation. */
21250 arm_output_ttype (rtx x
)
21252 fputs ("\t.word\t", asm_out_file
);
21253 output_addr_const (asm_out_file
, x
);
21254 /* Use special relocations for symbol references. */
21255 if (GET_CODE (x
) != CONST_INT
)
21256 fputs ("(TARGET2)", asm_out_file
);
21257 fputc ('\n', asm_out_file
);
21261 #endif /* TARGET_UNWIND_INFO */
21264 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21265 stack alignment. */
21268 arm_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
21270 rtx unspec
= SET_SRC (pattern
);
21271 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
21275 case UNSPEC_STACK_ALIGN
:
21276 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21277 put anything on the stack, so hopefully it won't matter.
21278 CFA = SP will be correct after alignment. */
21279 dwarf2out_reg_save_reg (label
, stack_pointer_rtx
,
21280 SET_DEST (pattern
));
21283 gcc_unreachable ();
21288 /* Output unwind directives for the start/end of a function. */
21291 arm_output_fn_unwind (FILE * f
, bool prologue
)
21293 if (!ARM_EABI_UNWIND_TABLES
)
21297 fputs ("\t.fnstart\n", f
);
21300 /* If this function will never be unwound, then mark it as such.
21301 The came condition is used in arm_unwind_emit to suppress
21302 the frame annotations. */
21303 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
21304 && (TREE_NOTHROW (current_function_decl
)
21305 || crtl
->all_throwers_are_sibcalls
))
21306 fputs("\t.cantunwind\n", f
);
21308 fputs ("\t.fnend\n", f
);
21313 arm_emit_tls_decoration (FILE *fp
, rtx x
)
21315 enum tls_reloc reloc
;
21318 val
= XVECEXP (x
, 0, 0);
21319 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
21321 output_addr_const (fp
, val
);
21326 fputs ("(tlsgd)", fp
);
21329 fputs ("(tlsldm)", fp
);
21332 fputs ("(tlsldo)", fp
);
21335 fputs ("(gottpoff)", fp
);
21338 fputs ("(tpoff)", fp
);
21341 gcc_unreachable ();
21349 fputs (" + (. - ", fp
);
21350 output_addr_const (fp
, XVECEXP (x
, 0, 2));
21352 output_addr_const (fp
, XVECEXP (x
, 0, 3));
21362 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21365 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
21367 gcc_assert (size
== 4);
21368 fputs ("\t.word\t", file
);
21369 output_addr_const (file
, x
);
21370 fputs ("(tlsldo)", file
);
21374 arm_output_addr_const_extra (FILE *fp
, rtx x
)
21376 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
21377 return arm_emit_tls_decoration (fp
, x
);
21378 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
21381 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
21383 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
21384 assemble_name_raw (fp
, label
);
21388 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
21390 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
21394 output_addr_const (fp
, XVECEXP (x
, 0, 0));
21398 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
21400 output_addr_const (fp
, XVECEXP (x
, 0, 0));
21404 output_addr_const (fp
, XVECEXP (x
, 0, 1));
21408 else if (GET_CODE (x
) == CONST_VECTOR
)
21409 return arm_emit_vector_const (fp
, x
);
21414 /* Output assembly for a shift instruction.
21415 SET_FLAGS determines how the instruction modifies the condition codes.
21416 0 - Do not set condition codes.
21417 1 - Set condition codes.
21418 2 - Use smallest instruction. */
21420 arm_output_shift(rtx
* operands
, int set_flags
)
21423 static const char flag_chars
[3] = {'?', '.', '!'};
21428 c
= flag_chars
[set_flags
];
21429 if (TARGET_UNIFIED_ASM
)
21431 shift
= shift_op(operands
[3], &val
);
21435 operands
[2] = GEN_INT(val
);
21436 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
21439 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
21442 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
21443 output_asm_insn (pattern
, operands
);
21447 /* Output a Thumb-1 casesi dispatch sequence. */
21449 thumb1_output_casesi (rtx
*operands
)
21451 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
21453 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
21455 switch (GET_MODE(diff_vec
))
21458 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
21459 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21461 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
21462 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21464 return "bl\t%___gnu_thumb1_case_si";
21466 gcc_unreachable ();
21470 /* Output a Thumb-2 casesi instruction. */
21472 thumb2_output_casesi (rtx
*operands
)
21474 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
21476 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
21478 output_asm_insn ("cmp\t%0, %1", operands
);
21479 output_asm_insn ("bhi\t%l3", operands
);
21480 switch (GET_MODE(diff_vec
))
21483 return "tbb\t[%|pc, %0]";
21485 return "tbh\t[%|pc, %0, lsl #1]";
21489 output_asm_insn ("adr\t%4, %l2", operands
);
21490 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
21491 output_asm_insn ("add\t%4, %4, %5", operands
);
21496 output_asm_insn ("adr\t%4, %l2", operands
);
21497 return "ldr\t%|pc, [%4, %0, lsl #2]";
21500 gcc_unreachable ();
21504 /* Most ARM cores are single issue, but some newer ones can dual issue.
21505 The scheduler descriptions rely on this being correct. */
21507 arm_issue_rate (void)
21522 /* A table and a function to perform ARM-specific name mangling for
21523 NEON vector types in order to conform to the AAPCS (see "Procedure
21524 Call Standard for the ARM Architecture", Appendix A). To qualify
21525 for emission with the mangled names defined in that document, a
21526 vector type must not only be of the correct mode but also be
21527 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21530 enum machine_mode mode
;
21531 const char *element_type_name
;
21532 const char *aapcs_name
;
21533 } arm_mangle_map_entry
;
21535 static arm_mangle_map_entry arm_mangle_map
[] = {
21536 /* 64-bit containerized types. */
21537 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
21538 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21539 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
21540 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21541 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
21542 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
21543 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
21544 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21545 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21546 /* 128-bit containerized types. */
21547 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
21548 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21549 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
21550 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21551 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
21552 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
21553 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
21554 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21555 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21556 { VOIDmode
, NULL
, NULL
}
21560 arm_mangle_type (const_tree type
)
21562 arm_mangle_map_entry
*pos
= arm_mangle_map
;
21564 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21565 has to be managled as if it is in the "std" namespace. */
21566 if (TARGET_AAPCS_BASED
21567 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
21569 static bool warned
;
21570 if (!warned
&& warn_psabi
&& !in_system_header
)
21573 inform (input_location
,
21574 "the mangling of %<va_list%> has changed in GCC 4.4");
21576 return "St9__va_list";
21579 /* Half-precision float. */
21580 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
21583 if (TREE_CODE (type
) != VECTOR_TYPE
)
21586 /* Check the mode of the vector type, and the name of the vector
21587 element type, against the table. */
21588 while (pos
->mode
!= VOIDmode
)
21590 tree elt_type
= TREE_TYPE (type
);
21592 if (pos
->mode
== TYPE_MODE (type
)
21593 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
21594 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
21595 pos
->element_type_name
))
21596 return pos
->aapcs_name
;
21601 /* Use the default mangling for unrecognized (possibly user-defined)
21606 /* Order of allocation of core registers for Thumb: this allocation is
21607 written over the corresponding initial entries of the array
21608 initialized with REG_ALLOC_ORDER. We allocate all low registers
21609 first. Saving and restoring a low register is usually cheaper than
21610 using a call-clobbered high register. */
21612 static const int thumb_core_reg_alloc_order
[] =
21614 3, 2, 1, 0, 4, 5, 6, 7,
21615 14, 12, 8, 9, 10, 11, 13, 15
21618 /* Adjust register allocation order when compiling for Thumb. */
21621 arm_order_regs_for_local_alloc (void)
21623 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
21624 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
21626 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
21627 sizeof (thumb_core_reg_alloc_order
));
21630 /* Set default optimization options. */
21632 arm_optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
21634 /* Enable section anchors by default at -O1 or higher.
21635 Use 2 to distinguish from an explicit -fsection-anchors
21636 given on the command line. */
21638 flag_section_anchors
= 2;
21641 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21644 arm_frame_pointer_required (void)
21646 return (cfun
->has_nonlocal_label
21647 || SUBTARGET_FRAME_POINTER_REQUIRED
21648 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
21651 /* Only thumb1 can't support conditional execution, so return true if
21652 the target is not thumb1. */
21654 arm_have_conditional_execution (void)
21656 return !TARGET_THUMB1
;
21659 #include "gt-arm.h"